Compare commits

..

1 Commits

Author SHA1 Message Date
ccfd175417 chore(deps): update helm release fluent-bit to v0.48.6 2025-02-12 18:13:56 +00:00
183 changed files with 3270 additions and 5179 deletions

View File

@ -5,8 +5,8 @@ FROM docker.io/alpine:${ALPINE_VERSION}
ARG ALPINE_VERSION
ARG KUBE_VERSION=1.31
ARG SOPS_VERSION="3.9.4"
ARG VALS_VERSION="0.39.1"
ARG SOPS_VERSION="3.9.1"
ARG VALS_VERSION="0.37.6"
ARG HELM_SECRETS_VERSION="4.6.2"
RUN cd /etc/apk/keys && \

View File

@ -49,6 +49,7 @@ function cert-manager-post() {
wait_for "kubectl get deployment -n $namespace cert-manager-webhook"
kubectl rollout status deployment -n $namespace cert-manager-webhook
wait_for 'kubectl get validatingwebhookconfigurations -o yaml | grep "caBundle: LS0"'
apply
fi
wait_for "kubectl get ClusterIssuer -n $namespace kubezero-local-ca-issuer"
@ -81,11 +82,11 @@ function metrics-pre() {
get_kubezero_values $ARGOCD
# Always use embedded kubezero chart
helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --kube-version $KUBE_VERSION --name-template kubezero --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --kube-version $KUBE_VERSION --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
# Root KubeZero apply directly and exit
if [ ${ARTIFACTS[0]} == "kubezero" ]; then
kubectl replace -f $WORKDIR/kubezero/templates
kubectl apply -f $WORKDIR/kubezero/templates
exit $?
# "catch all" apply all enabled modules

View File

@ -7,8 +7,8 @@ pre_control_plane_upgrade_cluster() {
kubectl label node $n 'node.kubernetes.io/kubezero.version=v1.30.6' || true
done
# patch aws-iam-authenticator DS to NOT run pods on 1.31 controllers
kubectl patch ds aws-iam-authenticator -n kube-system -p '{"spec": {"template": {"spec": {"nodeSelector": {"node.kubernetes.io/kubezero.version": "v1.30.6"}}}}}' || true
# patch aws-iam-authentiator DS to NOT run pods on 1.31 controllers
kubectl patch ds aws-iam-authentiator -p '{"spec": {"template": {"spec": {"nodeSelector": {"node.kubernetes.io/kubezero.version": "v1.30.6"}}}}}' || true
}
@ -20,28 +20,20 @@ post_control_plane_upgrade_cluster() {
# All things AFTER all contollers are on the new version
pre_cluster_upgrade_final() {
set +e
if [ "$PLATFORM" == "aws" ];then
# cleanup aws-iam-authenticator
kubectl delete clusterrolebinding aws-iam-authenticator
kubectl delete clusterrole aws-iam-authenticator
kubectl delete serviceaccount aws-iam-authenticator -n kube-system
kubectl delete cm aws-iam-authenticator -n kube-system
kubectl delete ds aws-iam-authenticator -n kube-system
kubectl delete IAMIdentityMapping kubezero-worker-nodes
kubectl delete IAMIdentityMapping kubernetes-admin
kubectl delete crd iamidentitymappings.iamauthenticator.k8s.aws
kubectl delete secret aws-iam-certs -n kube-system
# cleanup aws-iam-authentiator
kubectl delete clusterrolebinding aws-iam-authentiator || true
kubectl delete clusterrole aws-iam-authentiator || true
kubectl delete serviceaccount aws-iam-authentiator -n kube-system || true
kubectl delete cm aws-iam-authentiator -n kube-system || true
kubectl delete ds aws-iam-authentiator -n kube-system || true
kubectl delete IAMIdentityMapping kubezero-worker-nodes || true
kubectl delete IAMIdentityMapping kubernetes-admin || true
kubectl delete crd iamidentitymappings.iamauthenticator.k8s.aws || true
kubectl delete secret aws-iam-certs -n kube-system || true
fi
# Remove any helm hook related resources
kubectl delete rolebinding argo-argocd-redis-secret-init -n argocd
kubectl delete sa argo-argocd-redis-secret-init -n argocd
kubectl delete role argo-argocd-redis-secret-init -n argocd
kubectl delete job argo-argocd-redis-secret-init -n argocd
set -e
}

View File

@ -97,7 +97,6 @@ pre_kubeadm() {
cp -r ${WORKDIR}/kubeadm/templates/apiserver ${HOSTFS}/etc/kubernetes
# copy patches to host to make --rootfs of kubeadm work
rm -f ${HOSTFS}/etc/kubernetes/patches/*
cp -r ${WORKDIR}/kubeadm/templates/patches ${HOSTFS}/etc/kubernetes
}
@ -122,18 +121,23 @@ control_plane_upgrade() {
get_kubezero_values $ARGOCD
# tumble new config through migrate.py
migrate_argo_values.py < "$WORKDIR"/kubezero-values.yaml > "$WORKDIR"/new-kubezero-values.yaml \
&& mv "$WORKDIR"/new-kubezero-values.yaml "$WORKDIR"/kubezero-values.yaml
migrate_argo_values.py < "$WORKDIR"/kubezero-values.yaml > "$WORKDIR"/new-kubezero-values.yaml
update_kubezero_cm
# Update kubezero-values CM
kubectl get cm -n kubezero kubezero-values -o=yaml | \
yq e '.data."values.yaml" |= load_str("/tmp/kubezero/new-kubezero-values.yaml")' | \
kubectl apply --server-side --force-conflicts -f -
if [ "$ARGOCD" == "True" ]; then
# update argo app
export kubezero_chart_version=$(yq .version $CHARTS/kubezero/Chart.yaml)
kubectl get application kubezero -n argocd -o yaml | \
yq ".spec.source.helm.valuesObject |= load(\"$WORKDIR/kubezero-values.yaml\") | .spec.source.targetRevision = strenv(kubezero_chart_version)" \
yq '.spec.source.helm.valuesObject |= load("/tmp/kubezero/new-kubezero-values.yaml") | .spec.source.targetRevision = strenv(kubezero_chart_version)' \
> $WORKDIR/new-argocd-app.yaml
kubectl replace -f $WORKDIR/new-argocd-app.yaml
kubectl apply --server-side --force-conflicts -f $WORKDIR/new-argocd-app.yaml
# finally remove annotation to allow argo to sync again
kubectl patch app kubezero -n argocd --type json -p='[{"op": "remove", "path": "/metadata/annotations"}]' || true
fi
pre_kubeadm

View File

@ -46,17 +46,13 @@ function get_kubezero_values() {
}
# Overwrite kubezero-values CM with file
# Update kubezero-values CM
function update_kubezero_cm() {
kubectl get cm -n kubezero kubezero-values -o=yaml | \
yq e ".data.\"values.yaml\" |= load_str(\"$WORKDIR/kubezero-values.yaml\")" | \
kubectl replace -f -
}
kubectl get application kubezero -n argocd -o yaml | yq .spec.source.helm.valuesObject > ${WORKDIR}/kubezero-values.yaml
# sync kubezero-values CM from ArgoCD app
function sync_kubezero_cm_from_argo() {
get_kubezero_values True
update_kubezero_cm
kubectl get cm -n kubezero kubezero-values -o=yaml | \
yq e '.data."values.yaml" |= load_str("/tmp/kubezero/kubezero-values.yaml")' | \
kubectl apply --server-side --force-conflicts -f -
}

View File

@ -8,13 +8,6 @@ import yaml
def migrate(values):
"""Actual changes here"""
# remove syncOptions from root app
try:
if values["kubezero"]["syncPolicy"]:
values["kubezero"].pop("syncPolicy")
except KeyError:
pass
return values

View File

@ -14,6 +14,6 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
kubeVersion: ">= 1.26.0"

View File

@ -1,3 +0,0 @@
istioctl
istio
istio.zdt

View File

@ -1,32 +0,0 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
README.md.gotmpl
*.patch
*.sh
*.py
istioctl
istio
istio.zdt

View File

@ -1,19 +0,0 @@
apiVersion: v2
name: envoy-ratelimit
description: Envoy gobal ratelimiting service - part of KubeZero
type: application
version: 0.1.2
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
- kubezero
- envoy
- istio
maintainers:
- name: Stefan Reimer
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
repository: https://cdn.zero-downtime.net/charts/
kubeVersion: ">= 1.31.0-0"

View File

@ -1,37 +0,0 @@
# envoy-ratelimit
![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
Envoy gobal ratelimiting service - part of KubeZero
**Homepage:** <https://kubezero.com>
## Maintainers
| Name | Email | Url |
| ---- | ------ | --- |
| Stefan Reimer | <stefan@zero-downtime.net> | |
## Requirements
Kubernetes: `>= 1.31.0-0`
| Repository | Name | Version |
|------------|------|---------|
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | 0.2.1 |
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| descriptors.ingress[0].key | string | `"remote_address"` | |
| descriptors.ingress[0].rate_limit.requests_per_unit | int | `10` | |
| descriptors.ingress[0].rate_limit.unit | string | `"second"` | |
| descriptors.privateIngress[0].key | string | `"remote_address"` | |
| descriptors.privateIngress[0].rate_limit.requests_per_unit | int | `10` | |
| descriptors.privateIngress[0].rate_limit.unit | string | `"second"` | |
| failureModeDeny | bool | `false` | |
| localCacheSize | int | `1048576` | |
| log.format | string | `"json"` | |
| log.level | string | `"warn"` | |
| metrics.enabled | bool | `true` | |

View File

@ -1,16 +0,0 @@
{{ template "chart.header" . }}
{{ template "chart.deprecationWarning" . }}
{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }}
{{ template "chart.description" . }}
{{ template "chart.homepageLine" . }}
{{ template "chart.maintainersSection" . }}
{{ template "chart.sourcesSection" . }}
{{ template "chart.requirementsSection" . }}
{{ template "chart.valuesSection" . }}

View File

@ -1,63 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: ratelimit
namespace: {{ .Release.Namespace }}
spec:
replicas: 1
selector:
matchLabels:
app: ratelimit
strategy:
type: Recreate
template:
metadata:
labels:
app: ratelimit
spec:
containers:
- image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: IfNotPresent
name: ratelimit
command: ["/bin/ratelimit"]
env:
- name: LOG_LEVEL
value: {{ default "WARN" .Values.log.level }}
- name: LOG_FORMAT
value: {{ default "text" .Values.log.format }}
- name: REDIS_SOCKET_TYPE
value: tcp
- name: REDIS_URL
value: ratelimit-valkey:6379
- name: USE_PROMETHEUS
value: "true"
- name: USE_STATSD
value: "false"
- name: RUNTIME_ROOT
value: /data
- name: RUNTIME_SUBDIRECTORY
value: ratelimit
- name: RUNTIME_WATCH_ROOT
value: "false"
- name: RUNTIME_IGNOREDOTFILES
value: "true"
- name: LOCAL_CACHE_SIZE_IN_BYTES
value: "{{ default 0 .Values.localCacheSize | int }}"
ports:
- containerPort: 8081
#- containerPort: 8080
#- containerPort: 6070
volumeMounts:
- name: ratelimit-config
mountPath: /data/ratelimit/config
resources:
requests:
cpu: 50m
memory: 32Mi
limits:
cpu: 1
memory: 256Mi
volumes:
- name: ratelimit-config
configMap:
name: ratelimit-config

View File

@ -1,27 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: ratelimit
namespace: {{ .Release.Namespace }}
labels:
app: ratelimit
spec:
ports:
#- name: http-port
# port: 8080
# targetPort: 8080
# protocol: TCP
- name: grpc-port
port: 8081
targetPort: 8081
protocol: TCP
#- name: http-debug
# port: 6070
# targetPort: 6070
# protocol: TCP
- name: http-monitoring
port: 9090
targetPort: 9090
protocol: TCP
selector:
app: ratelimit

View File

@ -1,24 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: ratelimit-valkey
namespace: {{ .Release.Namespace }}
spec:
replicas: 1
selector:
matchLabels:
app: ratelimit-valkey
template:
metadata:
labels:
app: ratelimit-valkey
spec:
containers:
- image: valkey/valkey:8.1-alpine3.21
imagePullPolicy: IfNotPresent
name: valkey
ports:
- name: valkey
containerPort: 6379
restartPolicy: Always
serviceAccountName: ""

View File

@ -1,13 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: ratelimit-valkey
namespace: {{ .Release.Namespace }}
labels:
app: ratelimit-valkey
spec:
ports:
- name: valkey
port: 6379
selector:
app: ratelimit-valkey

View File

@ -1,9 +0,0 @@
#!/bin/bash
set -ex
. ../../scripts/lib-update.sh
#login_ecr_public
update_helm
update_docs

View File

@ -1,38 +0,0 @@
image:
repository: envoyproxy/ratelimit
# see: https://hub.docker.com/r/envoyproxy/ratelimit/tags
tag: 80b15778
log:
level: warn
format: json
# 1MB local cache for already reached limits to reduce calls to Redis
localCacheSize: 1048576
# Wether to block requests if ratelimiting is down
failureModeDeny: false
# rate limit descriptors for each domain
# - slow: 1 req/s over a minute per sourceIP
descriptors:
ingress:
- key: speed
value: slow
descriptors:
- key: remote_address
rate_limit:
unit: minute
requests_per_unit: 60
privateIngress:
- key: speed
value: slow
descriptors:
- key: remote_address
rate_limit:
unit: minute
requests_per_unit: 60
metrics:
enabled: false

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubeadm
description: KubeZero Kubeadm cluster config
type: application
version: 1.31.6
version: 1.31.5
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -11,4 +11,4 @@ keywords:
maintainers:
- name: Stefan Reimer
email: stefan@zero-downtime.net
kubeVersion: ">= 1.31.0-0"
kubeVersion: ">= 1.26.0"

View File

@ -14,7 +14,7 @@ KubeZero umbrella chart for various optional cluster addons
## Requirements
Kubernetes: `>= 1.30.0-0`
Kubernetes: `>= 1.26.0`
| Repository | Name | Version |
|------------|------|---------|
@ -94,8 +94,9 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
| aws-node-termination-handler.managedTag | string | `"zdt:kubezero:nth:${ClusterName}"` | "zdt:kubezero:nth:${ClusterName}" |
| aws-node-termination-handler.metadataTries | int | `0` | |
| aws-node-termination-handler.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| aws-node-termination-handler.podMonitor.create | bool | `false` | |
| aws-node-termination-handler.queueURL | string | `""` | https://sqs.${AWS::Region}.amazonaws.com/${AWS::AccountId}/${ClusterName}_Nth |
| aws-node-termination-handler.serviceMonitor.create | bool | `false` | |
| aws-node-termination-handler.rbac.pspEnabled | bool | `false` | |
| aws-node-termination-handler.taintNode | bool | `true` | |
| aws-node-termination-handler.tolerations[0].effect | string | `"NoSchedule"` | |
| aws-node-termination-handler.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` | |
@ -109,7 +110,7 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
| cluster-autoscaler.extraArgs.scan-interval | string | `"30s"` | |
| cluster-autoscaler.extraArgs.skip-nodes-with-local-storage | bool | `false` | |
| cluster-autoscaler.image.repository | string | `"registry.k8s.io/autoscaling/cluster-autoscaler"` | |
| cluster-autoscaler.image.tag | string | `"v1.31.1"` | |
| cluster-autoscaler.image.tag | string | `"v1.30.2"` | |
| cluster-autoscaler.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| cluster-autoscaler.podDisruptionBudget | bool | `false` | |
| cluster-autoscaler.prometheusRule.enabled | bool | `false` | |
@ -158,9 +159,6 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
| neuron-helm-chart.enabled | bool | `false` | |
| neuron-helm-chart.npd.enabled | bool | `false` | |
| nvidia-device-plugin.cdi.nvidiaHookPath | string | `"/usr/bin"` | |
| nvidia-device-plugin.config.default | string | `"default"` | |
| nvidia-device-plugin.config.map.default | string | `"version: v1\nflags:\n migStrategy: none"` | |
| nvidia-device-plugin.config.map.time-slice-4x | string | `"version: v1\nflags:\n migStrategy: none\nsharing:\n timeSlicing:\n resources:\n - name: nvidia.com/gpu\n replicas: 4"` | |
| nvidia-device-plugin.deviceDiscoveryStrategy | string | `"nvml"` | |
| nvidia-device-plugin.enabled | bool | `false` | |
| nvidia-device-plugin.runtimeClassName | string | `"nvidia"` | |

View File

@ -185,22 +185,6 @@ neuron-helm-chart:
nvidia-device-plugin:
enabled: false
config:
default: "default"
map:
default: |-
version: v1
flags:
migStrategy: none
time-slice-4x: |-
version: v1
flags:
migStrategy: none
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 4
cdi:
nvidiaHookPath: /usr/bin
deviceDiscoveryStrategy: nvml

View File

@ -1,7 +1,7 @@
apiVersion: v2
description: KubeZero Argo - Events, Workflow, CD
name: kubezero-argo
version: 0.2.8
version: 0.2.7
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -15,14 +15,14 @@ maintainers:
# Url: https://github.com/argoproj/argo-helm/tree/main/charts
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: argo-events
version: 2.4.13
version: 2.4.9
repository: https://argoproj.github.io/argo-helm
condition: argo-events.enabled
- name: argo-cd
version: 7.8.2
version: 7.7.7
repository: https://argoproj.github.io/argo-helm
condition: argo-cd.enabled
- name: argocd-apps
@ -30,7 +30,7 @@ dependencies:
repository: https://argoproj.github.io/argo-helm
condition: argo-cd.enabled
- name: argocd-image-updater
version: 0.12.0
version: 0.11.2
repository: https://argoproj.github.io/argo-helm
condition: argocd-image-updater.enabled
kubeVersion: ">= 1.26.0-0"

View File

@ -1,6 +1,6 @@
# kubezero-argo
![Version: 0.2.8](https://img.shields.io/badge/Version-0.2.8-informational?style=flat-square)
![Version: 0.2.7](https://img.shields.io/badge/Version-0.2.7-informational?style=flat-square)
KubeZero Argo - Events, Workflow, CD
@ -18,17 +18,16 @@ Kubernetes: `>= 1.26.0-0`
| Repository | Name | Version |
|------------|------|---------|
| https://argoproj.github.io/argo-helm | argo-cd | 7.8.2 |
| https://argoproj.github.io/argo-helm | argo-events | 2.4.13 |
| https://argoproj.github.io/argo-helm | argo-cd | 7.7.7 |
| https://argoproj.github.io/argo-helm | argo-events | 2.4.9 |
| https://argoproj.github.io/argo-helm | argocd-apps | 2.0.2 |
| https://argoproj.github.io/argo-helm | argocd-image-updater | 0.12.0 |
| https://argoproj.github.io/argo-helm | argocd-image-updater | 0.11.2 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| argo-cd.configs.cm."application.instanceLabelKey" | string | `nil` | |
| argo-cd.configs.cm."application.resourceTrackingMethod" | string | `"annotation"` | |
| argo-cd.configs.cm."resource.customizations" | string | `"cert-manager.io/Certificate:\n # Lua script for customizing the health status assessment\n health.lua: |\n hs = {}\n if obj.status ~= nil then\n if obj.status.conditions ~= nil then\n for i, condition in ipairs(obj.status.conditions) do\n if condition.type == \"Ready\" and condition.status == \"False\" then\n hs.status = \"Degraded\"\n hs.message = condition.message\n return hs\n end\n if condition.type == \"Ready\" and condition.status == \"True\" then\n hs.status = \"Healthy\"\n hs.message = condition.message\n return hs\n end\n end\n end\n end\n hs.status = \"Progressing\"\n hs.message = \"Waiting for certificate\"\n return hs\n"` | |
| argo-cd.configs.cm."timeout.reconciliation" | string | `"300s"` | |
@ -36,7 +35,6 @@ Kubernetes: `>= 1.26.0-0`
| argo-cd.configs.cm."ui.bannerpermanent" | string | `"true"` | |
| argo-cd.configs.cm."ui.bannerposition" | string | `"bottom"` | |
| argo-cd.configs.cm."ui.bannerurl" | string | `"https://kubezero.com/releases/v1.31"` | |
| argo-cd.configs.cm.installationID | string | `"KubeZero-ArgoCD"` | |
| argo-cd.configs.cm.url | string | `"https://argocd.example.com"` | |
| argo-cd.configs.params."controller.diff.server.side" | string | `"true"` | |
| argo-cd.configs.params."controller.operation.processors" | string | `"5"` | |
@ -96,7 +94,7 @@ Kubernetes: `>= 1.26.0-0`
| argo-events.configs.jetstream.streamConfig.maxMsgs | int | `1000000` | Maximum number of messages before expiring oldest message |
| argo-events.configs.jetstream.streamConfig.replicas | int | `1` | Number of replicas, defaults to 3 and requires minimal 3 |
| argo-events.configs.jetstream.versions[0].configReloaderImage | string | `"natsio/nats-server-config-reloader:0.14.1"` | |
| argo-events.configs.jetstream.versions[0].metricsExporterImage | string | `"natsio/prometheus-nats-exporter:0.16.0"` | |
| argo-events.configs.jetstream.versions[0].metricsExporterImage | string | `"natsio/prometheus-nats-exporter:0.14.0"` | |
| argo-events.configs.jetstream.versions[0].natsImage | string | `"nats:2.10.11-scratch"` | |
| argo-events.configs.jetstream.versions[0].startCommand | string | `"/nats-server"` | |
| argo-events.configs.jetstream.versions[0].version | string | `"2.10.11"` | |

View File

@ -45,7 +45,7 @@ argo-cd:
format: json
image:
repository: public.ecr.aws/zero-downtime/zdt-argocd
tag: v2.14.2
tag: v2.13.1
networkPolicy:
create: true
@ -106,12 +106,9 @@ argo-cd:
extraHosts: "git.zero-downtime.net ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC7UgK7Z4dDcuIW1uMOsuwhrqdkJCvYG/ZjHtLM7WaKFxVRnzNnNkQJNncWIGNDUQ1xxrbsoSNRZDtk0NlOjNtx2aApSWl4iWghkpXELvsZtOZ7I9FSC/E6ImLC3KWfK7P0mhZaF6kHPfpu8Y6pjUyLBTpV1AaVwr0I8onyqGazJOVotTFaBFEi/sT0O2FUk7agwZYfj61w3JGOy3c+fmBcK3lXf/QM90tosOpJNuJ7n5Vk5FDDLkl9rO4XR/+mXHFvITiWb8F5C50YAwjYcy36yWSSryUAAHAuqpgotwh65vSG6fZvFhmEwO2BrCkOV5+k8iRfhy/yZODJzZ5V/5cbMbdZrY6lm/p5/S1wv8BEyPekBGdseqQjEO0IQiQHcMrfgTrrQ7ndbZzVZRByZI+wbGFkBCzNSJcNsoiHjs2EblxYyuW0qUvvrBxLnySvaxyPm4BOukSAZAOEaUrajpQlnHdnY1CGcgbwxw0LNv3euKQ3tDJSUlKO0Wd8d85PRv1THW4Ui9Lhsmv+BPA2vJZDOkx/n0oyPFAB0oyd5JNM38eFxLCmPC2OE63gDP+WmzVO61YCVTnvhpQjEOLawEWVFsk0y25R5z5BboDqJaOFnZF6i517O96cn17z3Ls4hxw3+0rlKczYRoyfUHs7KQENa4mY8YlJweNTBgld//RMUQ=="
params:
controller.status.processors: 8
controller.operation.processors: 4
controller.kubectl.parallelism.limit: 8
controller.resource.health.persist: "false"
controller.status.processors: "10"
controller.operation.processors: "5"
controller.diff.server.side: "true"
controller.sync.timeout.seconds: 1800
server.insecure: true
server.enable.gzip: true
@ -180,9 +177,6 @@ argo-cd:
serviceMonitor:
enabled: true
redisSecretInit:
enabled: false
# redis:
# We might want to try to keep redis close to the controller
# affinity:

View File

@ -14,7 +14,7 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: keycloak
repository: "oci://registry-1.docker.io/bitnamicharts"

View File

@ -14,7 +14,7 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.2.1"
repository: https://cdn.zero-downtime.net/charts/
- name: redis
version: 20.0.3

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-cert-manager
description: KubeZero Umbrella Chart for cert-manager
type: application
version: 0.9.12
version: 0.9.11
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -13,9 +13,9 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: cert-manager
version: v1.17.1
version: v1.17.0
repository: https://charts.jetstack.io
kubeVersion: ">= 1.30.0-0"

View File

@ -1,6 +1,6 @@
# kubezero-cert-manager
![Version: 0.9.12](https://img.shields.io/badge/Version-0.9.12-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.9.11](https://img.shields.io/badge/Version-0.9.11-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Umbrella Chart for cert-manager
@ -14,12 +14,12 @@ KubeZero Umbrella Chart for cert-manager
## Requirements
Kubernetes: `>= 1.30.0-0`
Kubernetes: `>= 1.26.0-0`
| Repository | Name | Version |
|------------|------|---------|
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
| https://charts.jetstack.io | cert-manager | v1.17.1 |
| https://charts.jetstack.io | cert-manager | v1.17.0 |
## AWS - OIDC IAM roles

View File

@ -18,7 +18,7 @@
"subdir": "contrib/mixin"
}
},
"version": "eb7607bd8b3665d14aa40d50435ae8c9002d620c",
"version": "c0e7e8c873a6067f9ae9076c3c243a20fa713a58",
"sum": "XmXkOCriQIZmXwlIIFhqlJMa0e6qGWdxZD+ZDYaN0Po="
},
{
@ -78,7 +78,7 @@
"subdir": "grafana-builder"
}
},
"version": "ef841d571a704013b689368fe51e437810b6c935",
"version": "d6c38bb26f576b128cadca4137d73a037afdd872",
"sum": "yxqWcq/N3E/a/XreeU6EuE6X7kYPnG0AspAQFKOjASo="
},
{
@ -118,8 +118,8 @@
"subdir": ""
}
},
"version": "4ff562d5e8145940cf355f62cf2308895c4dca81",
"sum": "kiL19fTbXOtNglsmT62kOzIf/Xpu+YwoiMPAApDXhkE="
"version": "e27267571be06c2bdc3d2fd8dbd70161cd709cb4",
"sum": "je1RPCp2aFNefYs5Q57Q5wDm93p8pL4pdBtA5rC7jLA="
},
{
"source": {
@ -128,7 +128,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "350a7c472e1801a2e13b9895ec8ef38876c96dd0",
"version": "2a95d4649b2fea55799032fb9c0b571c4ba7f776",
"sum": "3bioG7CfTfY9zeu5xU4yon6Zt3kYvNkyl492nOhQxnM="
},
{
@ -138,7 +138,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "350a7c472e1801a2e13b9895ec8ef38876c96dd0",
"version": "2a95d4649b2fea55799032fb9c0b571c4ba7f776",
"sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c="
},
{
@ -158,7 +158,7 @@
"subdir": "jsonnet/mixin"
}
},
"version": "7deab71d6d5921eeaf8c79e3ae8e31efe63783a9",
"version": "4da36fdd2377362c285aee3a96f7b0516f6e41bf",
"sum": "gi+knjdxs2T715iIQIntrimbHRgHnpM8IFBJDD1gYfs=",
"name": "prometheus-operator-mixin"
},
@ -169,8 +169,8 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "7deab71d6d5921eeaf8c79e3ae8e31efe63783a9",
"sum": "LctDdofQostvviE5y8vpRKWGGO1ZKO3dgJe7P9xifW0="
"version": "4da36fdd2377362c285aee3a96f7b0516f6e41bf",
"sum": "tb5PzIT75Hv4m3kbOHXvmrlcplg+EbS4++NfTttDNOk="
},
{
"source": {
@ -190,8 +190,8 @@
"subdir": "docs/node-mixin"
}
},
"version": "02afa5c53c36123611533f2defea6ccd4546a9bb",
"sum": "8dNyJ4vpnKVBbCFN9YLsugp1IjlZjDCwdKMjKi0KTG4="
"version": "11365f97bef6cb0e6259d536a7e21c49e3f5c065",
"sum": "xYj6VYFT/eafsbleNlC+Z2VfLy1CndyYrJs9BcTmnX8="
},
{
"source": {
@ -200,7 +200,7 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "a5ffa83be83be22e2ec9fd1d4765299d8d16119e",
"version": "906f6a33b60cec2596018ac8cc97ac41b16b06b7",
"sum": "2c+wttfee9TwuQJZIkNV7Tekem74Qgc7iZ842P28rNw=",
"name": "prometheus"
},

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-ci
description: KubeZero umbrella chart for all things CI
type: application
version: 0.8.20
version: 0.8.19
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -15,22 +15,22 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: gitea
version: 10.6.0
repository: https://dl.gitea.io/charts/
condition: gitea.enabled
- name: jenkins
version: 5.8.16
version: 5.7.15
repository: https://charts.jenkins.io
condition: jenkins.enabled
- name: trivy
version: 0.11.1
version: 0.9.0
repository: https://aquasecurity.github.io/helm-charts/
condition: trivy.enabled
- name: renovate
version: 39.180.2
version: 39.33.1
repository: https://docs.renovatebot.com/helm-charts
condition: renovate.enabled
kubeVersion: ">= 1.25.0"

View File

@ -1,6 +1,6 @@
# kubezero-ci
![Version: 0.8.20](https://img.shields.io/badge/Version-0.8.20-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.8.19](https://img.shields.io/badge/Version-0.8.19-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero umbrella chart for all things CI
@ -18,11 +18,11 @@ Kubernetes: `>= 1.25.0`
| Repository | Name | Version |
|------------|------|---------|
| https://aquasecurity.github.io/helm-charts/ | trivy | 0.11.1 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | 0.1.6 |
| https://charts.jenkins.io | jenkins | 5.8.16 |
| https://aquasecurity.github.io/helm-charts/ | trivy | 0.9.0 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
| https://charts.jenkins.io | jenkins | 5.7.15 |
| https://dl.gitea.io/charts/ | gitea | 10.6.0 |
| https://docs.renovatebot.com/helm-charts | renovate | 39.180.2 |
| https://docs.renovatebot.com/helm-charts | renovate | 39.33.1 |
# Jenkins
- default build retention 10 builds, 32days
@ -68,7 +68,7 @@ Kubernetes: `>= 1.25.0`
| gitea.gitea.metrics.enabled | bool | `false` | |
| gitea.gitea.metrics.serviceMonitor.enabled | bool | `true` | |
| gitea.image.rootless | bool | `true` | |
| gitea.image.tag | string | `"1.23.4"` | |
| gitea.image.tag | string | `"1.22.3"` | |
| gitea.istio.enabled | bool | `false` | |
| gitea.istio.gateway | string | `"istio-ingress/private-ingressgateway"` | |
| gitea.istio.url | string | `"git.example.com"` | |
@ -90,7 +90,6 @@ Kubernetes: `>= 1.25.0`
| jenkins.agent.containerCap | int | `2` | |
| jenkins.agent.customJenkinsLabels[0] | string | `"podman-aws-trivy"` | |
| jenkins.agent.defaultsProviderTemplate | string | `"podman-aws"` | |
| jenkins.agent.garbageCollection.enabled | bool | `true` | |
| jenkins.agent.idleMinutes | int | `30` | |
| jenkins.agent.image.repository | string | `"public.ecr.aws/zero-downtime/jenkins-podman"` | |
| jenkins.agent.image.tag | string | `"v0.7.0"` | |
@ -161,8 +160,7 @@ Kubernetes: `>= 1.25.0`
| renovate.cronjob.successfulJobsHistoryLimit | int | `1` | |
| renovate.enabled | bool | `false` | |
| renovate.env.LOG_FORMAT | string | `"json"` | |
| renovate.renovate.config | string | `"{\n}\n"` | |
| renovate.securityContext.fsGroupChangePolicy | string | `"OnRootMismatch"` | |
| renovate.securityContext.fsGroup | int | `1000` | |
| trivy.enabled | bool | `false` | |
| trivy.persistence.enabled | bool | `true` | |
| trivy.persistence.size | string | `"1Gi"` | |

View File

@ -12,122 +12,6 @@ Use the following links to reference issues, PRs, and commits prior to v2.6.0.
The changelog until v1.5.7 was auto-generated based on git commits.
Those entries include a reference to the git commit to be able to get more details.
## 5.8.16
Update `docker.io/kiwigrid/k8s-sidecar` to version `1.30.1`
## 5.8.15
Update `kubernetes` to version `4313.va_9b_4fe2a_0e34`
## 5.8.14
Update `jenkins/inbound-agent` to version `3283.v92c105e0f819-9`
## 5.8.13
Fix `agentListenerPort` not being updated in `config.xml` when set via Helm values.
## 5.8.12
Update plugin count.
## 5.8.11
Update `jenkins/inbound-agent` to version `3283.v92c105e0f819-8`
## 5.8.10
Update `jenkins/jenkins` to version `2.492.1-jdk17`
## 5.8.9
Update `configuration-as-code` to version `1932.v75cb_b_f1b_698d`
## 5.8.8
Update `docker.io/kiwigrid/k8s-sidecar` to version `1.30.0`
## 5.8.7
Update `configuration-as-code` to version `1929.v036b_5a_e1f123`
## 5.8.6
Update `docker.io/kiwigrid/k8s-sidecar` to version `1.29.1`
## 5.8.5
Update `jenkins/inbound-agent` to version `3283.v92c105e0f819-7`
## 5.8.4
Allow setting [automountServiceAccountToken](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#opt-out-of-api-credential-automounting)
## 5.8.3
Update `docker.io/kiwigrid/k8s-sidecar` to version `1.29.0`
## 5.8.2
Update `jenkins/jenkins` to version `2.479.3-jdk17`
## 5.8.1
Update `configuration-as-code` to version `1915.vcdd0a_d0d2625`
## 5.8.0
Add option to publish not-ready addresses in controller service.
## 5.7.27
Update `git` to version `5.7.0`
## 5.7.26
Update `configuration-as-code` to version `1909.vb_b_f59a_27d013`
## 5.7.25
Update `kubernetes` to version `4306.vc91e951ea_eb_d`
## 5.7.24
Update `kubernetes` to version `4304.v1b_39d4f98210`
## 5.7.23
Update `docker.io/kiwigrid/k8s-sidecar` to version `1.28.4`
## 5.7.22
Update `docker.io/kiwigrid/k8s-sidecar` to version `1.28.3`
## 5.7.21
Update `docker.io/kiwigrid/k8s-sidecar` to version `1.28.1`
## 5.7.20
Update `kubernetes` to version `4302.va_756e4b_67715`
## 5.7.19
Update `configuration-as-code` to version `1903.v004d55388f30`
## 5.7.18
Update `kubernetes` to version `4300.vd82c5692b_3a_e`
## 5.7.17
Update `docker.io/bats/bats` to version `1.11.1`
## 5.7.16
Add tpl support for persistence.storageClassName in home-pvc.yaml and tpl support in controller.ingress parameters(ingressClassName, annotations, hostname) in jenkins-controller-ingress.yaml
## 5.7.15
Update `jenkins/jenkins` to version `2.479.2-jdk17`

View File

@ -1,14 +1,14 @@
annotations:
artifacthub.io/category: integration-delivery
artifacthub.io/changes: |
- Update `docker.io/kiwigrid/k8s-sidecar` to version `1.30.1`
- Update `jenkins/jenkins` to version `2.479.2-jdk17`
artifacthub.io/images: |
- name: jenkins
image: docker.io/jenkins/jenkins:2.492.1-jdk17
image: docker.io/jenkins/jenkins:2.479.2-jdk17
- name: k8s-sidecar
image: docker.io/kiwigrid/k8s-sidecar:1.30.1
image: docker.io/kiwigrid/k8s-sidecar:1.28.0
- name: inbound-agent
image: jenkins/inbound-agent:3283.v92c105e0f819-9
image: jenkins/inbound-agent:3273.v4cfe589b_fd83-1
artifacthub.io/license: Apache-2.0
artifacthub.io/links: |
- name: Chart Source
@ -18,9 +18,9 @@ annotations:
- name: support
url: https://github.com/jenkinsci/helm-charts/issues
apiVersion: v2
appVersion: 2.492.1
appVersion: 2.479.2
description: 'Jenkins - Build great things at any scale! As the leading open source
automation server, Jenkins provides over 2000 plugins to support building, deploying
automation server, Jenkins provides over 1800 plugins to support building, deploying
and automating any project. '
home: https://www.jenkins.io/
icon: https://get.jenkins.io/art/jenkins-logo/logo.svg
@ -46,4 +46,4 @@ sources:
- https://github.com/maorfr/kube-tasks
- https://github.com/jenkinsci/configuration-as-code-plugin
type: application
version: 5.8.16
version: 5.7.15

View File

@ -5,7 +5,7 @@
[![Releases downloads](https://img.shields.io/github/downloads/jenkinsci/helm-charts/total.svg)](https://github.com/jenkinsci/helm-charts/releases)
[![Join the chat at https://app.gitter.im/#/room/#jenkins-ci:matrix.org](https://badges.gitter.im/badge.svg)](https://app.gitter.im/#/room/#jenkins-ci:matrix.org)
[Jenkins](https://www.jenkins.io/) is the leading open source automation server, Jenkins provides over 2000 plugins to support building, deploying and automating any project.
[Jenkins](https://www.jenkins.io/) is the leading open source automation server, Jenkins provides over 1800 plugins to support building, deploying and automating any project.
This chart installs a Jenkins server which spawns agents on [Kubernetes](http://kubernetes.io) utilizing the [Jenkins Kubernetes plugin](https://plugins.jenkins.io/kubernetes/).

View File

@ -8,164 +8,164 @@ The following tables list the configurable parameters of the Jenkins chart and t
| Key | Type | Description | Default |
|:----|:-----|:---------|:------------|
| [additionalAgents](./values.yaml#L1199) | object | Configure additional | `{}` |
| [additionalClouds](./values.yaml#L1224) | object | | `{}` |
| [agent.TTYEnabled](./values.yaml#L1105) | bool | Allocate pseudo tty to the side container | `false` |
| [agent.additionalContainers](./values.yaml#L1152) | list | Add additional containers to the agents | `[]` |
| [agent.alwaysPullImage](./values.yaml#L998) | bool | Always pull agent container image before build | `false` |
| [agent.annotations](./values.yaml#L1148) | object | Annotations to apply to the pod | `{}` |
| [agent.args](./values.yaml#L1099) | string | Arguments passed to command to execute | `"${computer.jnlpmac} ${computer.name}"` |
| [agent.command](./values.yaml#L1097) | string | Command to execute when side container starts | `nil` |
| [agent.componentName](./values.yaml#L966) | string | | `"jenkins-agent"` |
| [agent.connectTimeout](./values.yaml#L1146) | int | Timeout in seconds for an agent to be online | `100` |
| [agent.containerCap](./values.yaml#L1107) | int | Max number of agents to launch | `10` |
| [agent.customJenkinsLabels](./values.yaml#L963) | list | Append Jenkins labels to the agent | `[]` |
| [agent.defaultsProviderTemplate](./values.yaml#L917) | string | The name of the pod template to use for providing default values | `""` |
| [agent.directConnection](./values.yaml#L969) | bool | | `false` |
| [agent.disableDefaultAgent](./values.yaml#L1170) | bool | Disable the default Jenkins Agent configuration | `false` |
| [agent.enabled](./values.yaml#L915) | bool | Enable Kubernetes plugin jnlp-agent podTemplate | `true` |
| [agent.envVars](./values.yaml#L1080) | list | Environment variables for the agent Pod | `[]` |
| [agent.garbageCollection.enabled](./values.yaml#L1114) | bool | When enabled, Jenkins will periodically check for orphan pods that have not been touched for the given timeout period and delete them. | `false` |
| [agent.garbageCollection.namespaces](./values.yaml#L1116) | string | Namespaces to look at for garbage collection, in addition to the default namespace defined for the cloud. One namespace per line. | `""` |
| [agent.garbageCollection.timeout](./values.yaml#L1121) | int | Timeout value for orphaned pods | `300` |
| [agent.hostNetworking](./values.yaml#L977) | bool | Enables the agent to use the host network | `false` |
| [agent.idleMinutes](./values.yaml#L1124) | int | Allows the Pod to remain active for reuse until the configured number of minutes has passed since the last step was executed on it | `0` |
| [agent.image.repository](./values.yaml#L956) | string | Repository to pull the agent jnlp image from | `"jenkins/inbound-agent"` |
| [agent.image.tag](./values.yaml#L958) | string | Tag of the image to pull | `"3283.v92c105e0f819-9"` |
| [agent.imagePullSecretName](./values.yaml#L965) | string | Name of the secret to be used to pull the image | `nil` |
| [agent.inheritYamlMergeStrategy](./values.yaml#L1144) | bool | Controls whether the defined yaml merge strategy will be inherited if another defined pod template is configured to inherit from the current one | `false` |
| [agent.jenkinsTunnel](./values.yaml#L933) | string | Overrides the Kubernetes Jenkins tunnel | `nil` |
| [agent.jenkinsUrl](./values.yaml#L929) | string | Overrides the Kubernetes Jenkins URL | `nil` |
| [agent.jnlpregistry](./values.yaml#L953) | string | Custom registry used to pull the agent jnlp image from | `nil` |
| [agent.kubernetesConnectTimeout](./values.yaml#L939) | int | The connection timeout in seconds for connections to Kubernetes API. The minimum value is 5 | `5` |
| [agent.kubernetesReadTimeout](./values.yaml#L941) | int | The read timeout in seconds for connections to Kubernetes API. The minimum value is 15 | `15` |
| [agent.livenessProbe](./values.yaml#L988) | object | | `{}` |
| [agent.maxRequestsPerHostStr](./values.yaml#L943) | string | The maximum concurrent connections to Kubernetes API | `"32"` |
| [agent.namespace](./values.yaml#L949) | string | Namespace in which the Kubernetes agents should be launched | `nil` |
| [agent.nodeSelector](./values.yaml#L1091) | object | Node labels for pod assignment | `{}` |
| [agent.nodeUsageMode](./values.yaml#L961) | string | | `"NORMAL"` |
| [agent.podLabels](./values.yaml#L951) | object | Custom Pod labels (an object with `label-key: label-value` pairs) | `{}` |
| [agent.podName](./values.yaml#L1109) | string | Agent Pod base name | `"default"` |
| [agent.podRetention](./values.yaml#L1007) | string | | `"Never"` |
| [agent.podTemplates](./values.yaml#L1180) | object | Configures extra pod templates for the default kubernetes cloud | `{}` |
| [agent.privileged](./values.yaml#L971) | bool | Agent privileged container | `false` |
| [agent.resources](./values.yaml#L979) | object | Resources allocation (Requests and Limits) | `{"limits":{"cpu":"512m","memory":"512Mi"},"requests":{"cpu":"512m","memory":"512Mi"}}` |
| [agent.restrictedPssSecurityContext](./values.yaml#L1004) | bool | Set a restricted securityContext on jnlp containers | `false` |
| [agent.retentionTimeout](./values.yaml#L945) | int | Time in minutes after which the Kubernetes cloud plugin will clean up an idle worker that has not already terminated | `5` |
| [agent.runAsGroup](./values.yaml#L975) | string | Configure container group | `nil` |
| [agent.runAsUser](./values.yaml#L973) | string | Configure container user | `nil` |
| [agent.secretEnvVars](./values.yaml#L1084) | list | Mount a secret as environment variable | `[]` |
| [agent.serviceAccount](./values.yaml#L925) | string | Override the default service account | `serviceAccountAgent.name` if `agent.useDefaultServiceAccount` is `true` |
| [agent.showRawYaml](./values.yaml#L1011) | bool | | `true` |
| [agent.sideContainerName](./values.yaml#L1101) | string | Side container name | `"jnlp"` |
| [agent.skipTlsVerify](./values.yaml#L935) | bool | Disables the verification of the controller certificate on remote connection. This flag correspond to the "Disable https certificate check" flag in kubernetes plugin UI | `false` |
| [agent.usageRestricted](./values.yaml#L937) | bool | Enable the possibility to restrict the usage of this agent to specific folder. This flag correspond to the "Restrict pipeline support to authorized folders" flag in kubernetes plugin UI | `false` |
| [agent.useDefaultServiceAccount](./values.yaml#L921) | bool | Use `serviceAccountAgent.name` as the default value for defaults template `serviceAccount` | `true` |
| [agent.volumes](./values.yaml#L1018) | list | Additional volumes | `[]` |
| [agent.waitForPodSec](./values.yaml#L947) | int | Seconds to wait for pod to be running | `600` |
| [agent.websocket](./values.yaml#L968) | bool | Enables agent communication via websockets | `false` |
| [agent.workingDir](./values.yaml#L960) | string | Configure working directory for default agent | `"/home/jenkins/agent"` |
| [agent.workspaceVolume](./values.yaml#L1053) | object | Workspace volume (defaults to EmptyDir) | `{}` |
| [agent.yamlMergeStrategy](./values.yaml#L1142) | string | Defines how the raw yaml field gets merged with yaml definitions from inherited pod templates. Possible values: "merge" or "override" | `"override"` |
| [agent.yamlTemplate](./values.yaml#L1131) | string | The raw yaml of a Pod API Object to merge into the agent spec | `""` |
| [awsSecurityGroupPolicies.enabled](./values.yaml#L1356) | bool | | `false` |
| [awsSecurityGroupPolicies.policies[0].name](./values.yaml#L1358) | string | | `""` |
| [awsSecurityGroupPolicies.policies[0].podSelector](./values.yaml#L1360) | object | | `{}` |
| [awsSecurityGroupPolicies.policies[0].securityGroupIds](./values.yaml#L1359) | list | | `[]` |
| [checkDeprecation](./values.yaml#L1353) | bool | Checks if any deprecated values are used | `true` |
| [additionalAgents](./values.yaml#L1195) | object | Configure additional | `{}` |
| [additionalClouds](./values.yaml#L1220) | object | | `{}` |
| [agent.TTYEnabled](./values.yaml#L1101) | bool | Allocate pseudo tty to the side container | `false` |
| [agent.additionalContainers](./values.yaml#L1148) | list | Add additional containers to the agents | `[]` |
| [agent.alwaysPullImage](./values.yaml#L994) | bool | Always pull agent container image before build | `false` |
| [agent.annotations](./values.yaml#L1144) | object | Annotations to apply to the pod | `{}` |
| [agent.args](./values.yaml#L1095) | string | Arguments passed to command to execute | `"${computer.jnlpmac} ${computer.name}"` |
| [agent.command](./values.yaml#L1093) | string | Command to execute when side container starts | `nil` |
| [agent.componentName](./values.yaml#L962) | string | | `"jenkins-agent"` |
| [agent.connectTimeout](./values.yaml#L1142) | int | Timeout in seconds for an agent to be online | `100` |
| [agent.containerCap](./values.yaml#L1103) | int | Max number of agents to launch | `10` |
| [agent.customJenkinsLabels](./values.yaml#L959) | list | Append Jenkins labels to the agent | `[]` |
| [agent.defaultsProviderTemplate](./values.yaml#L913) | string | The name of the pod template to use for providing default values | `""` |
| [agent.directConnection](./values.yaml#L965) | bool | | `false` |
| [agent.disableDefaultAgent](./values.yaml#L1166) | bool | Disable the default Jenkins Agent configuration | `false` |
| [agent.enabled](./values.yaml#L911) | bool | Enable Kubernetes plugin jnlp-agent podTemplate | `true` |
| [agent.envVars](./values.yaml#L1076) | list | Environment variables for the agent Pod | `[]` |
| [agent.garbageCollection.enabled](./values.yaml#L1110) | bool | When enabled, Jenkins will periodically check for orphan pods that have not been touched for the given timeout period and delete them. | `false` |
| [agent.garbageCollection.namespaces](./values.yaml#L1112) | string | Namespaces to look at for garbage collection, in addition to the default namespace defined for the cloud. One namespace per line. | `""` |
| [agent.garbageCollection.timeout](./values.yaml#L1117) | int | Timeout value for orphaned pods | `300` |
| [agent.hostNetworking](./values.yaml#L973) | bool | Enables the agent to use the host network | `false` |
| [agent.idleMinutes](./values.yaml#L1120) | int | Allows the Pod to remain active for reuse until the configured number of minutes has passed since the last step was executed on it | `0` |
| [agent.image.repository](./values.yaml#L952) | string | Repository to pull the agent jnlp image from | `"jenkins/inbound-agent"` |
| [agent.image.tag](./values.yaml#L954) | string | Tag of the image to pull | `"3273.v4cfe589b_fd83-1"` |
| [agent.imagePullSecretName](./values.yaml#L961) | string | Name of the secret to be used to pull the image | `nil` |
| [agent.inheritYamlMergeStrategy](./values.yaml#L1140) | bool | Controls whether the defined yaml merge strategy will be inherited if another defined pod template is configured to inherit from the current one | `false` |
| [agent.jenkinsTunnel](./values.yaml#L929) | string | Overrides the Kubernetes Jenkins tunnel | `nil` |
| [agent.jenkinsUrl](./values.yaml#L925) | string | Overrides the Kubernetes Jenkins URL | `nil` |
| [agent.jnlpregistry](./values.yaml#L949) | string | Custom registry used to pull the agent jnlp image from | `nil` |
| [agent.kubernetesConnectTimeout](./values.yaml#L935) | int | The connection timeout in seconds for connections to Kubernetes API. The minimum value is 5 | `5` |
| [agent.kubernetesReadTimeout](./values.yaml#L937) | int | The read timeout in seconds for connections to Kubernetes API. The minimum value is 15 | `15` |
| [agent.livenessProbe](./values.yaml#L984) | object | | `{}` |
| [agent.maxRequestsPerHostStr](./values.yaml#L939) | string | The maximum concurrent connections to Kubernetes API | `"32"` |
| [agent.namespace](./values.yaml#L945) | string | Namespace in which the Kubernetes agents should be launched | `nil` |
| [agent.nodeSelector](./values.yaml#L1087) | object | Node labels for pod assignment | `{}` |
| [agent.nodeUsageMode](./values.yaml#L957) | string | | `"NORMAL"` |
| [agent.podLabels](./values.yaml#L947) | object | Custom Pod labels (an object with `label-key: label-value` pairs) | `{}` |
| [agent.podName](./values.yaml#L1105) | string | Agent Pod base name | `"default"` |
| [agent.podRetention](./values.yaml#L1003) | string | | `"Never"` |
| [agent.podTemplates](./values.yaml#L1176) | object | Configures extra pod templates for the default kubernetes cloud | `{}` |
| [agent.privileged](./values.yaml#L967) | bool | Agent privileged container | `false` |
| [agent.resources](./values.yaml#L975) | object | Resources allocation (Requests and Limits) | `{"limits":{"cpu":"512m","memory":"512Mi"},"requests":{"cpu":"512m","memory":"512Mi"}}` |
| [agent.restrictedPssSecurityContext](./values.yaml#L1000) | bool | Set a restricted securityContext on jnlp containers | `false` |
| [agent.retentionTimeout](./values.yaml#L941) | int | Time in minutes after which the Kubernetes cloud plugin will clean up an idle worker that has not already terminated | `5` |
| [agent.runAsGroup](./values.yaml#L971) | string | Configure container group | `nil` |
| [agent.runAsUser](./values.yaml#L969) | string | Configure container user | `nil` |
| [agent.secretEnvVars](./values.yaml#L1080) | list | Mount a secret as environment variable | `[]` |
| [agent.serviceAccount](./values.yaml#L921) | string | Override the default service account | `serviceAccountAgent.name` if `agent.useDefaultServiceAccount` is `true` |
| [agent.showRawYaml](./values.yaml#L1007) | bool | | `true` |
| [agent.sideContainerName](./values.yaml#L1097) | string | Side container name | `"jnlp"` |
| [agent.skipTlsVerify](./values.yaml#L931) | bool | Disables the verification of the controller certificate on remote connection. This flag correspond to the "Disable https certificate check" flag in kubernetes plugin UI | `false` |
| [agent.usageRestricted](./values.yaml#L933) | bool | Enable the possibility to restrict the usage of this agent to specific folder. This flag correspond to the "Restrict pipeline support to authorized folders" flag in kubernetes plugin UI | `false` |
| [agent.useDefaultServiceAccount](./values.yaml#L917) | bool | Use `serviceAccountAgent.name` as the default value for defaults template `serviceAccount` | `true` |
| [agent.volumes](./values.yaml#L1014) | list | Additional volumes | `[]` |
| [agent.waitForPodSec](./values.yaml#L943) | int | Seconds to wait for pod to be running | `600` |
| [agent.websocket](./values.yaml#L964) | bool | Enables agent communication via websockets | `false` |
| [agent.workingDir](./values.yaml#L956) | string | Configure working directory for default agent | `"/home/jenkins/agent"` |
| [agent.workspaceVolume](./values.yaml#L1049) | object | Workspace volume (defaults to EmptyDir) | `{}` |
| [agent.yamlMergeStrategy](./values.yaml#L1138) | string | Defines how the raw yaml field gets merged with yaml definitions from inherited pod templates. Possible values: "merge" or "override" | `"override"` |
| [agent.yamlTemplate](./values.yaml#L1127) | string | The raw yaml of a Pod API Object to merge into the agent spec | `""` |
| [awsSecurityGroupPolicies.enabled](./values.yaml#L1348) | bool | | `false` |
| [awsSecurityGroupPolicies.policies[0].name](./values.yaml#L1350) | string | | `""` |
| [awsSecurityGroupPolicies.policies[0].podSelector](./values.yaml#L1352) | object | | `{}` |
| [awsSecurityGroupPolicies.policies[0].securityGroupIds](./values.yaml#L1351) | list | | `[]` |
| [checkDeprecation](./values.yaml#L1345) | bool | Checks if any deprecated values are used | `true` |
| [clusterZone](./values.yaml#L21) | string | Override the cluster name for FQDN resolving | `"cluster.local"` |
| [controller.JCasC.authorizationStrategy](./values.yaml#L543) | string | Jenkins Config as Code Authorization Strategy-section | `"loggedInUsersCanDoAnything:\n allowAnonymousRead: false"` |
| [controller.JCasC.configMapAnnotations](./values.yaml#L548) | object | Annotations for the JCasC ConfigMap | `{}` |
| [controller.JCasC.configScripts](./values.yaml#L517) | object | List of Jenkins Config as Code scripts | `{}` |
| [controller.JCasC.configUrls](./values.yaml#L514) | list | Remote URLs for configuration files. | `[]` |
| [controller.JCasC.defaultConfig](./values.yaml#L508) | bool | Enables default Jenkins configuration via configuration as code plugin | `true` |
| [controller.JCasC.overwriteConfiguration](./values.yaml#L512) | bool | Whether Jenkins Config as Code should overwrite any existing configuration | `false` |
| [controller.JCasC.security](./values.yaml#L524) | object | Jenkins Config as Code security-section | `{"apiToken":{"creationOfLegacyTokenEnabled":false,"tokenGenerationOnCreationEnabled":false,"usageStatisticsEnabled":true}}` |
| [controller.JCasC.securityRealm](./values.yaml#L532) | string | Jenkins Config as Code Security Realm-section | `"local:\n allowsSignup: false\n enableCaptcha: false\n users:\n - id: \"${chart-admin-username}\"\n name: \"Jenkins Admin\"\n password: \"${chart-admin-password}\""` |
| [controller.additionalExistingSecrets](./values.yaml#L469) | list | List of additional existing secrets to mount | `[]` |
| [controller.additionalPlugins](./values.yaml#L419) | list | List of plugins to install in addition to those listed in controller.installPlugins | `[]` |
| [controller.additionalSecrets](./values.yaml#L478) | list | List of additional secrets to create and mount | `[]` |
| [controller.JCasC.authorizationStrategy](./values.yaml#L539) | string | Jenkins Config as Code Authorization Strategy-section | `"loggedInUsersCanDoAnything:\n allowAnonymousRead: false"` |
| [controller.JCasC.configMapAnnotations](./values.yaml#L544) | object | Annotations for the JCasC ConfigMap | `{}` |
| [controller.JCasC.configScripts](./values.yaml#L513) | object | List of Jenkins Config as Code scripts | `{}` |
| [controller.JCasC.configUrls](./values.yaml#L510) | list | Remote URLs for configuration files. | `[]` |
| [controller.JCasC.defaultConfig](./values.yaml#L504) | bool | Enables default Jenkins configuration via configuration as code plugin | `true` |
| [controller.JCasC.overwriteConfiguration](./values.yaml#L508) | bool | Whether Jenkins Config as Code should overwrite any existing configuration | `false` |
| [controller.JCasC.security](./values.yaml#L520) | object | Jenkins Config as Code security-section | `{"apiToken":{"creationOfLegacyTokenEnabled":false,"tokenGenerationOnCreationEnabled":false,"usageStatisticsEnabled":true}}` |
| [controller.JCasC.securityRealm](./values.yaml#L528) | string | Jenkins Config as Code Security Realm-section | `"local:\n allowsSignup: false\n enableCaptcha: false\n users:\n - id: \"${chart-admin-username}\"\n name: \"Jenkins Admin\"\n password: \"${chart-admin-password}\""` |
| [controller.additionalExistingSecrets](./values.yaml#L465) | list | List of additional existing secrets to mount | `[]` |
| [controller.additionalPlugins](./values.yaml#L415) | list | List of plugins to install in addition to those listed in controller.installPlugins | `[]` |
| [controller.additionalSecrets](./values.yaml#L474) | list | List of additional secrets to create and mount | `[]` |
| [controller.admin.createSecret](./values.yaml#L91) | bool | Create secret for admin user | `true` |
| [controller.admin.existingSecret](./values.yaml#L94) | string | The name of an existing secret containing the admin credentials | `""` |
| [controller.admin.password](./values.yaml#L81) | string | Admin password created as a secret if `controller.admin.createSecret` is true | `<random password>` |
| [controller.admin.passwordKey](./values.yaml#L86) | string | The key in the existing admin secret containing the password | `"jenkins-admin-password"` |
| [controller.admin.userKey](./values.yaml#L84) | string | The key in the existing admin secret containing the username | `"jenkins-admin-user"` |
| [controller.admin.username](./values.yaml#L78) | string | Admin username created as a secret if `controller.admin.createSecret` is true | `"admin"` |
| [controller.affinity](./values.yaml#L670) | object | Affinity settings | `{}` |
| [controller.agentListenerEnabled](./values.yaml#L328) | bool | Create Agent listener service | `true` |
| [controller.agentListenerExternalTrafficPolicy](./values.yaml#L338) | string | Traffic Policy of for the agentListener service | `nil` |
| [controller.agentListenerHostPort](./values.yaml#L332) | string | Host port to listen for agents | `nil` |
| [controller.agentListenerLoadBalancerIP](./values.yaml#L368) | string | Static IP for the agentListener LoadBalancer | `nil` |
| [controller.agentListenerLoadBalancerSourceRanges](./values.yaml#L340) | list | Allowed inbound IP for the agentListener service | `["0.0.0.0/0"]` |
| [controller.agentListenerNodePort](./values.yaml#L334) | string | Node port to listen for agents | `nil` |
| [controller.agentListenerPort](./values.yaml#L330) | int | Listening port for agents | `50000` |
| [controller.agentListenerServiceAnnotations](./values.yaml#L363) | object | Annotations for the agentListener service | `{}` |
| [controller.agentListenerServiceType](./values.yaml#L360) | string | Defines how to expose the agentListener service | `"ClusterIP"` |
| [controller.backendconfig.annotations](./values.yaml#L773) | object | backendconfig annotations | `{}` |
| [controller.backendconfig.apiVersion](./values.yaml#L767) | string | backendconfig API version | `"extensions/v1beta1"` |
| [controller.backendconfig.enabled](./values.yaml#L765) | bool | Enables backendconfig | `false` |
| [controller.backendconfig.labels](./values.yaml#L771) | object | backendconfig labels | `{}` |
| [controller.backendconfig.name](./values.yaml#L769) | string | backendconfig name | `nil` |
| [controller.backendconfig.spec](./values.yaml#L775) | object | backendconfig spec | `{}` |
| [controller.cloudName](./values.yaml#L497) | string | Name of default cloud configuration. | `"kubernetes"` |
| [controller.affinity](./values.yaml#L666) | object | Affinity settings | `{}` |
| [controller.agentListenerEnabled](./values.yaml#L324) | bool | Create Agent listener service | `true` |
| [controller.agentListenerExternalTrafficPolicy](./values.yaml#L334) | string | Traffic Policy of for the agentListener service | `nil` |
| [controller.agentListenerHostPort](./values.yaml#L328) | string | Host port to listen for agents | `nil` |
| [controller.agentListenerLoadBalancerIP](./values.yaml#L364) | string | Static IP for the agentListener LoadBalancer | `nil` |
| [controller.agentListenerLoadBalancerSourceRanges](./values.yaml#L336) | list | Allowed inbound IP for the agentListener service | `["0.0.0.0/0"]` |
| [controller.agentListenerNodePort](./values.yaml#L330) | string | Node port to listen for agents | `nil` |
| [controller.agentListenerPort](./values.yaml#L326) | int | Listening port for agents | `50000` |
| [controller.agentListenerServiceAnnotations](./values.yaml#L359) | object | Annotations for the agentListener service | `{}` |
| [controller.agentListenerServiceType](./values.yaml#L356) | string | Defines how to expose the agentListener service | `"ClusterIP"` |
| [controller.backendconfig.annotations](./values.yaml#L769) | object | backendconfig annotations | `{}` |
| [controller.backendconfig.apiVersion](./values.yaml#L763) | string | backendconfig API version | `"extensions/v1beta1"` |
| [controller.backendconfig.enabled](./values.yaml#L761) | bool | Enables backendconfig | `false` |
| [controller.backendconfig.labels](./values.yaml#L767) | object | backendconfig labels | `{}` |
| [controller.backendconfig.name](./values.yaml#L765) | string | backendconfig name | `nil` |
| [controller.backendconfig.spec](./values.yaml#L771) | object | backendconfig spec | `{}` |
| [controller.cloudName](./values.yaml#L493) | string | Name of default cloud configuration. | `"kubernetes"` |
| [controller.clusterIp](./values.yaml#L223) | string | k8s service clusterIP. Only used if serviceType is ClusterIP | `nil` |
| [controller.componentName](./values.yaml#L34) | string | Used for label app.kubernetes.io/component | `"jenkins-controller"` |
| [controller.containerEnv](./values.yaml#L156) | list | Environment variables for Jenkins Container | `[]` |
| [controller.containerEnvFrom](./values.yaml#L153) | list | Environment variable sources for Jenkins Container | `[]` |
| [controller.containerSecurityContext](./values.yaml#L211) | object | Allow controlling the securityContext for the jenkins container | `{"allowPrivilegeEscalation":false,"readOnlyRootFilesystem":true,"runAsGroup":1000,"runAsUser":1000}` |
| [controller.csrf.defaultCrumbIssuer.enabled](./values.yaml#L349) | bool | Enable the default CSRF Crumb issuer | `true` |
| [controller.csrf.defaultCrumbIssuer.proxyCompatability](./values.yaml#L351) | bool | Enable proxy compatibility | `true` |
| [controller.customInitContainers](./values.yaml#L551) | list | Custom init-container specification in raw-yaml format | `[]` |
| [controller.csrf.defaultCrumbIssuer.enabled](./values.yaml#L345) | bool | Enable the default CSRF Crumb issuer | `true` |
| [controller.csrf.defaultCrumbIssuer.proxyCompatability](./values.yaml#L347) | bool | Enable proxy compatibility | `true` |
| [controller.customInitContainers](./values.yaml#L547) | list | Custom init-container specification in raw-yaml format | `[]` |
| [controller.customJenkinsLabels](./values.yaml#L68) | list | Append Jenkins labels to the controller | `[]` |
| [controller.disableRememberMe](./values.yaml#L59) | bool | Disable use of remember me | `false` |
| [controller.disabledAgentProtocols](./values.yaml#L343) | list | Disabled agent protocols | `["JNLP-connect","JNLP2-connect"]` |
| [controller.enableRawHtmlMarkupFormatter](./values.yaml#L439) | bool | Enable HTML parsing using OWASP Markup Formatter Plugin (antisamy-markup-formatter) | `false` |
| [controller.disabledAgentProtocols](./values.yaml#L339) | list | Disabled agent protocols | `["JNLP-connect","JNLP2-connect"]` |
| [controller.enableRawHtmlMarkupFormatter](./values.yaml#L435) | bool | Enable HTML parsing using OWASP Markup Formatter Plugin (antisamy-markup-formatter) | `false` |
| [controller.enableServiceLinks](./values.yaml#L130) | bool | | `false` |
| [controller.executorMode](./values.yaml#L65) | string | Sets the executor mode of the Jenkins node. Possible values are "NORMAL" or "EXCLUSIVE" | `"NORMAL"` |
| [controller.existingSecret](./values.yaml#L466) | string | | `nil` |
| [controller.extraPorts](./values.yaml#L398) | list | Optionally configure other ports to expose in the controller container | `[]` |
| [controller.existingSecret](./values.yaml#L462) | string | | `nil` |
| [controller.extraPorts](./values.yaml#L394) | list | Optionally configure other ports to expose in the controller container | `[]` |
| [controller.fsGroup](./values.yaml#L192) | int | Deprecated in favor of `controller.podSecurityContextOverride`. uid that will be used for persistent volume. | `1000` |
| [controller.googlePodMonitor.enabled](./values.yaml#L836) | bool | | `false` |
| [controller.googlePodMonitor.scrapeEndpoint](./values.yaml#L841) | string | | `"/prometheus"` |
| [controller.googlePodMonitor.scrapeInterval](./values.yaml#L839) | string | | `"60s"` |
| [controller.healthProbes](./values.yaml#L258) | bool | Enable Kubernetes Probes configuration configured in `controller.probes` | `true` |
| [controller.hostAliases](./values.yaml#L789) | list | Allows for adding entries to Pod /etc/hosts | `[]` |
| [controller.googlePodMonitor.enabled](./values.yaml#L832) | bool | | `false` |
| [controller.googlePodMonitor.scrapeEndpoint](./values.yaml#L837) | string | | `"/prometheus"` |
| [controller.googlePodMonitor.scrapeInterval](./values.yaml#L835) | string | | `"60s"` |
| [controller.healthProbes](./values.yaml#L254) | bool | Enable Kubernetes Probes configuration configured in `controller.probes` | `true` |
| [controller.hostAliases](./values.yaml#L785) | list | Allows for adding entries to Pod /etc/hosts | `[]` |
| [controller.hostNetworking](./values.yaml#L70) | bool | | `false` |
| [controller.httpsKeyStore.disableSecretMount](./values.yaml#L857) | bool | | `false` |
| [controller.httpsKeyStore.enable](./values.yaml#L848) | bool | Enables HTTPS keystore on jenkins controller | `false` |
| [controller.httpsKeyStore.fileName](./values.yaml#L865) | string | Jenkins keystore filename which will appear under controller.httpsKeyStore.path | `"keystore.jks"` |
| [controller.httpsKeyStore.httpPort](./values.yaml#L861) | int | HTTP Port that Jenkins should listen to along with HTTPS, it also serves as the liveness and readiness probes port. | `8081` |
| [controller.httpsKeyStore.jenkinsHttpsJksPasswordSecretKey](./values.yaml#L856) | string | Name of the key in the secret that contains the JKS password | `"https-jks-password"` |
| [controller.httpsKeyStore.jenkinsHttpsJksPasswordSecretName](./values.yaml#L854) | string | Name of the secret that contains the JKS password, if it is not in the same secret as the JKS file | `""` |
| [controller.httpsKeyStore.jenkinsHttpsJksSecretKey](./values.yaml#L852) | string | Name of the key in the secret that already has ssl keystore | `"jenkins-jks-file"` |
| [controller.httpsKeyStore.jenkinsHttpsJksSecretName](./values.yaml#L850) | string | Name of the secret that already has ssl keystore | `""` |
| [controller.httpsKeyStore.jenkinsKeyStoreBase64Encoded](./values.yaml#L870) | string | Base64 encoded Keystore content. Keystore must be converted to base64 then being pasted here | `nil` |
| [controller.httpsKeyStore.password](./values.yaml#L867) | string | Jenkins keystore password | `"password"` |
| [controller.httpsKeyStore.path](./values.yaml#L863) | string | Path of HTTPS keystore file | `"/var/jenkins_keystore"` |
| [controller.httpsKeyStore.disableSecretMount](./values.yaml#L853) | bool | | `false` |
| [controller.httpsKeyStore.enable](./values.yaml#L844) | bool | Enables HTTPS keystore on jenkins controller | `false` |
| [controller.httpsKeyStore.fileName](./values.yaml#L861) | string | Jenkins keystore filename which will appear under controller.httpsKeyStore.path | `"keystore.jks"` |
| [controller.httpsKeyStore.httpPort](./values.yaml#L857) | int | HTTP Port that Jenkins should listen to along with HTTPS, it also serves as the liveness and readiness probes port. | `8081` |
| [controller.httpsKeyStore.jenkinsHttpsJksPasswordSecretKey](./values.yaml#L852) | string | Name of the key in the secret that contains the JKS password | `"https-jks-password"` |
| [controller.httpsKeyStore.jenkinsHttpsJksPasswordSecretName](./values.yaml#L850) | string | Name of the secret that contains the JKS password, if it is not in the same secret as the JKS file | `""` |
| [controller.httpsKeyStore.jenkinsHttpsJksSecretKey](./values.yaml#L848) | string | Name of the key in the secret that already has ssl keystore | `"jenkins-jks-file"` |
| [controller.httpsKeyStore.jenkinsHttpsJksSecretName](./values.yaml#L846) | string | Name of the secret that already has ssl keystore | `""` |
| [controller.httpsKeyStore.jenkinsKeyStoreBase64Encoded](./values.yaml#L866) | string | Base64 encoded Keystore content. Keystore must be converted to base64 then being pasted here | `nil` |
| [controller.httpsKeyStore.password](./values.yaml#L863) | string | Jenkins keystore password | `"password"` |
| [controller.httpsKeyStore.path](./values.yaml#L859) | string | Path of HTTPS keystore file | `"/var/jenkins_keystore"` |
| [controller.image.pullPolicy](./values.yaml#L47) | string | Controller image pull policy | `"Always"` |
| [controller.image.registry](./values.yaml#L37) | string | Controller image registry | `"docker.io"` |
| [controller.image.repository](./values.yaml#L39) | string | Controller image repository | `"jenkins/jenkins"` |
| [controller.image.tag](./values.yaml#L42) | string | Controller image tag override; i.e., tag: "2.440.1-jdk17" | `nil` |
| [controller.image.tagLabel](./values.yaml#L45) | string | Controller image tag label | `"jdk17"` |
| [controller.imagePullSecretName](./values.yaml#L49) | string | Controller image pull secret | `nil` |
| [controller.ingress.annotations](./values.yaml#L712) | object | Ingress annotations | `{}` |
| [controller.ingress.apiVersion](./values.yaml#L708) | string | Ingress API version | `"extensions/v1beta1"` |
| [controller.ingress.enabled](./values.yaml#L691) | bool | Enables ingress | `false` |
| [controller.ingress.hostName](./values.yaml#L725) | string | Ingress hostname | `nil` |
| [controller.ingress.labels](./values.yaml#L710) | object | Ingress labels | `{}` |
| [controller.ingress.path](./values.yaml#L721) | string | Ingress path | `nil` |
| [controller.ingress.paths](./values.yaml#L695) | list | Override for the default Ingress paths | `[]` |
| [controller.ingress.resourceRootUrl](./values.yaml#L727) | string | Hostname to serve assets from | `nil` |
| [controller.ingress.tls](./values.yaml#L729) | list | Ingress TLS configuration | `[]` |
| [controller.initConfigMap](./values.yaml#L456) | string | Name of the existing ConfigMap that contains init scripts | `nil` |
| [controller.ingress.annotations](./values.yaml#L708) | object | Ingress annotations | `{}` |
| [controller.ingress.apiVersion](./values.yaml#L704) | string | Ingress API version | `"extensions/v1beta1"` |
| [controller.ingress.enabled](./values.yaml#L687) | bool | Enables ingress | `false` |
| [controller.ingress.hostName](./values.yaml#L721) | string | Ingress hostname | `nil` |
| [controller.ingress.labels](./values.yaml#L706) | object | Ingress labels | `{}` |
| [controller.ingress.path](./values.yaml#L717) | string | Ingress path | `nil` |
| [controller.ingress.paths](./values.yaml#L691) | list | Override for the default Ingress paths | `[]` |
| [controller.ingress.resourceRootUrl](./values.yaml#L723) | string | Hostname to serve assets from | `nil` |
| [controller.ingress.tls](./values.yaml#L725) | list | Ingress TLS configuration | `[]` |
| [controller.initConfigMap](./values.yaml#L452) | string | Name of the existing ConfigMap that contains init scripts | `nil` |
| [controller.initContainerEnv](./values.yaml#L147) | list | Environment variables for Init Container | `[]` |
| [controller.initContainerEnvFrom](./values.yaml#L143) | list | Environment variable sources for Init Container | `[]` |
| [controller.initContainerResources](./values.yaml#L134) | object | Resources allocation (Requests and Limits) for Init Container | `{}` |
| [controller.initScripts](./values.yaml#L452) | object | Map of groovy init scripts to be executed during Jenkins controller start | `{}` |
| [controller.initializeOnce](./values.yaml#L424) | bool | Initialize only on first installation. Ensures plugins do not get updated inadvertently. Requires `persistence.enabled` to be set to `true` | `false` |
| [controller.installLatestPlugins](./values.yaml#L413) | bool | Download the minimum required version or latest version of all dependencies | `true` |
| [controller.installLatestSpecifiedPlugins](./values.yaml#L416) | bool | Set to true to download the latest version of any plugin that is requested to have the latest version | `false` |
| [controller.installPlugins](./values.yaml#L405) | list | List of Jenkins plugins to install. If you don't want to install plugins, set it to `false` | `["kubernetes:4313.va_9b_4fe2a_0e34","workflow-aggregator:600.vb_57cdd26fdd7","git:5.7.0","configuration-as-code:1932.v75cb_b_f1b_698d"]` |
| [controller.initScripts](./values.yaml#L448) | object | Map of groovy init scripts to be executed during Jenkins controller start | `{}` |
| [controller.initializeOnce](./values.yaml#L420) | bool | Initialize only on first installation. Ensures plugins do not get updated inadvertently. Requires `persistence.enabled` to be set to `true` | `false` |
| [controller.installLatestPlugins](./values.yaml#L409) | bool | Download the minimum required version or latest version of all dependencies | `true` |
| [controller.installLatestSpecifiedPlugins](./values.yaml#L412) | bool | Set to true to download the latest version of any plugin that is requested to have the latest version | `false` |
| [controller.installPlugins](./values.yaml#L401) | list | List of Jenkins plugins to install. If you don't want to install plugins, set it to `false` | `["kubernetes:4296.v20a_7e4d77cf6","workflow-aggregator:600.vb_57cdd26fdd7","git:5.6.0","configuration-as-code:1897.v79281e066ea_7"]` |
| [controller.javaOpts](./values.yaml#L162) | string | Append to `JAVA_OPTS` env var | `nil` |
| [controller.jenkinsAdminEmail](./values.yaml#L96) | string | Email address for the administrator of the Jenkins instance | `nil` |
| [controller.jenkinsHome](./values.yaml#L101) | string | Custom Jenkins home path | `"/var/jenkins_home"` |
@ -175,147 +175,144 @@ The following tables list the configurable parameters of the Jenkins chart and t
| [controller.jenkinsUrl](./values.yaml#L174) | string | Set Jenkins URL if you are not using the ingress definitions provided by the chart | `nil` |
| [controller.jenkinsUrlProtocol](./values.yaml#L171) | string | Set protocol for Jenkins URL; `https` if `controller.ingress.tls`, `http` otherwise | `nil` |
| [controller.jenkinsWar](./values.yaml#L109) | string | | `"/usr/share/jenkins/jenkins.war"` |
| [controller.jmxPort](./values.yaml#L395) | string | Open a port, for JMX stats | `nil` |
| [controller.legacyRemotingSecurityEnabled](./values.yaml#L371) | bool | Whether legacy remoting security should be enabled | `false` |
| [controller.jmxPort](./values.yaml#L391) | string | Open a port, for JMX stats | `nil` |
| [controller.legacyRemotingSecurityEnabled](./values.yaml#L367) | bool | Whether legacy remoting security should be enabled | `false` |
| [controller.lifecycle](./values.yaml#L51) | object | Lifecycle specification for controller-container | `{}` |
| [controller.loadBalancerIP](./values.yaml#L386) | string | Optionally assign a known public LB IP | `nil` |
| [controller.loadBalancerSourceRanges](./values.yaml#L382) | list | Allowed inbound IP addresses | `["0.0.0.0/0"]` |
| [controller.markupFormatter](./values.yaml#L443) | string | Yaml of the markup formatter to use | `"plainText"` |
| [controller.loadBalancerIP](./values.yaml#L382) | string | Optionally assign a known public LB IP | `nil` |
| [controller.loadBalancerSourceRanges](./values.yaml#L378) | list | Allowed inbound IP addresses | `["0.0.0.0/0"]` |
| [controller.markupFormatter](./values.yaml#L439) | string | Yaml of the markup formatter to use | `"plainText"` |
| [controller.nodePort](./values.yaml#L229) | string | k8s node port. Only used if serviceType is NodePort | `nil` |
| [controller.nodeSelector](./values.yaml#L657) | object | Node labels for pod assignment | `{}` |
| [controller.nodeSelector](./values.yaml#L653) | object | Node labels for pod assignment | `{}` |
| [controller.numExecutors](./values.yaml#L62) | int | Set Number of executors | `0` |
| [controller.overwritePlugins](./values.yaml#L428) | bool | Overwrite installed plugins on start | `false` |
| [controller.overwritePluginsFromImage](./values.yaml#L432) | bool | Overwrite plugins that are already installed in the controller image | `true` |
| [controller.podAnnotations](./values.yaml#L678) | object | Annotations for controller pod | `{}` |
| [controller.podDisruptionBudget.annotations](./values.yaml#L322) | object | | `{}` |
| [controller.podDisruptionBudget.apiVersion](./values.yaml#L320) | string | Policy API version | `"policy/v1beta1"` |
| [controller.podDisruptionBudget.enabled](./values.yaml#L315) | bool | Enable Kubernetes Pod Disruption Budget configuration | `false` |
| [controller.podDisruptionBudget.labels](./values.yaml#L323) | object | | `{}` |
| [controller.podDisruptionBudget.maxUnavailable](./values.yaml#L325) | string | Number of pods that can be unavailable. Either an absolute number or a percentage | `"0"` |
| [controller.podLabels](./values.yaml#L251) | object | Custom Pod labels (an object with `label-key: label-value` pairs) | `{}` |
| [controller.overwritePlugins](./values.yaml#L424) | bool | Overwrite installed plugins on start | `false` |
| [controller.overwritePluginsFromImage](./values.yaml#L428) | bool | Overwrite plugins that are already installed in the controller image | `true` |
| [controller.podAnnotations](./values.yaml#L674) | object | Annotations for controller pod | `{}` |
| [controller.podDisruptionBudget.annotations](./values.yaml#L318) | object | | `{}` |
| [controller.podDisruptionBudget.apiVersion](./values.yaml#L316) | string | Policy API version | `"policy/v1beta1"` |
| [controller.podDisruptionBudget.enabled](./values.yaml#L311) | bool | Enable Kubernetes Pod Disruption Budget configuration | `false` |
| [controller.podDisruptionBudget.labels](./values.yaml#L319) | object | | `{}` |
| [controller.podDisruptionBudget.maxUnavailable](./values.yaml#L321) | string | Number of pods that can be unavailable. Either an absolute number or a percentage | `"0"` |
| [controller.podLabels](./values.yaml#L247) | object | Custom Pod labels (an object with `label-key: label-value` pairs) | `{}` |
| [controller.podSecurityContextOverride](./values.yaml#L208) | string | Completely overwrites the contents of the pod security context, ignoring the values provided for `runAsUser`, `fsGroup`, and `securityContextCapabilities` | `nil` |
| [controller.priorityClassName](./values.yaml#L675) | string | The name of a `priorityClass` to apply to the controller pod | `nil` |
| [controller.probes.livenessProbe.failureThreshold](./values.yaml#L276) | int | Set the failure threshold for the liveness probe | `5` |
| [controller.probes.livenessProbe.httpGet.path](./values.yaml#L279) | string | Set the Pod's HTTP path for the liveness probe | `"{{ default \"\" .Values.controller.jenkinsUriPrefix }}/login"` |
| [controller.probes.livenessProbe.httpGet.port](./values.yaml#L281) | string | Set the Pod's HTTP port to use for the liveness probe | `"http"` |
| [controller.probes.livenessProbe.initialDelaySeconds](./values.yaml#L290) | string | Set the initial delay for the liveness probe in seconds | `nil` |
| [controller.probes.livenessProbe.periodSeconds](./values.yaml#L283) | int | Set the time interval between two liveness probes executions in seconds | `10` |
| [controller.probes.livenessProbe.timeoutSeconds](./values.yaml#L285) | int | Set the timeout for the liveness probe in seconds | `5` |
| [controller.probes.readinessProbe.failureThreshold](./values.yaml#L294) | int | Set the failure threshold for the readiness probe | `3` |
| [controller.probes.readinessProbe.httpGet.path](./values.yaml#L297) | string | Set the Pod's HTTP path for the liveness probe | `"{{ default \"\" .Values.controller.jenkinsUriPrefix }}/login"` |
| [controller.probes.readinessProbe.httpGet.port](./values.yaml#L299) | string | Set the Pod's HTTP port to use for the readiness probe | `"http"` |
| [controller.probes.readinessProbe.initialDelaySeconds](./values.yaml#L308) | string | Set the initial delay for the readiness probe in seconds | `nil` |
| [controller.probes.readinessProbe.periodSeconds](./values.yaml#L301) | int | Set the time interval between two readiness probes executions in seconds | `10` |
| [controller.probes.readinessProbe.timeoutSeconds](./values.yaml#L303) | int | Set the timeout for the readiness probe in seconds | `5` |
| [controller.probes.startupProbe.failureThreshold](./values.yaml#L263) | int | Set the failure threshold for the startup probe | `12` |
| [controller.probes.startupProbe.httpGet.path](./values.yaml#L266) | string | Set the Pod's HTTP path for the startup probe | `"{{ default \"\" .Values.controller.jenkinsUriPrefix }}/login"` |
| [controller.probes.startupProbe.httpGet.port](./values.yaml#L268) | string | Set the Pod's HTTP port to use for the startup probe | `"http"` |
| [controller.probes.startupProbe.periodSeconds](./values.yaml#L270) | int | Set the time interval between two startup probes executions in seconds | `10` |
| [controller.probes.startupProbe.timeoutSeconds](./values.yaml#L272) | int | Set the timeout for the startup probe in seconds | `5` |
| [controller.projectNamingStrategy](./values.yaml#L435) | string | | `"standard"` |
| [controller.prometheus.alertingRulesAdditionalLabels](./values.yaml#L822) | object | Additional labels to add to the PrometheusRule object | `{}` |
| [controller.prometheus.alertingrules](./values.yaml#L820) | list | Array of prometheus alerting rules | `[]` |
| [controller.prometheus.enabled](./values.yaml#L805) | bool | Enables prometheus service monitor | `false` |
| [controller.prometheus.metricRelabelings](./values.yaml#L832) | list | | `[]` |
| [controller.prometheus.prometheusRuleNamespace](./values.yaml#L824) | string | Set a custom namespace where to deploy PrometheusRule resource | `""` |
| [controller.prometheus.relabelings](./values.yaml#L830) | list | | `[]` |
| [controller.prometheus.scrapeEndpoint](./values.yaml#L815) | string | The endpoint prometheus should get metrics from | `"/prometheus"` |
| [controller.prometheus.scrapeInterval](./values.yaml#L811) | string | How often prometheus should scrape metrics | `"60s"` |
| [controller.prometheus.serviceMonitorAdditionalLabels](./values.yaml#L807) | object | Additional labels to add to the service monitor object | `{}` |
| [controller.prometheus.serviceMonitorNamespace](./values.yaml#L809) | string | Set a custom namespace where to deploy ServiceMonitor resource | `nil` |
| [controller.publishNotReadyAddresses](./values.yaml#L237) | string | | `nil` |
| [controller.priorityClassName](./values.yaml#L671) | string | The name of a `priorityClass` to apply to the controller pod | `nil` |
| [controller.probes.livenessProbe.failureThreshold](./values.yaml#L272) | int | Set the failure threshold for the liveness probe | `5` |
| [controller.probes.livenessProbe.httpGet.path](./values.yaml#L275) | string | Set the Pod's HTTP path for the liveness probe | `"{{ default \"\" .Values.controller.jenkinsUriPrefix }}/login"` |
| [controller.probes.livenessProbe.httpGet.port](./values.yaml#L277) | string | Set the Pod's HTTP port to use for the liveness probe | `"http"` |
| [controller.probes.livenessProbe.initialDelaySeconds](./values.yaml#L286) | string | Set the initial delay for the liveness probe in seconds | `nil` |
| [controller.probes.livenessProbe.periodSeconds](./values.yaml#L279) | int | Set the time interval between two liveness probes executions in seconds | `10` |
| [controller.probes.livenessProbe.timeoutSeconds](./values.yaml#L281) | int | Set the timeout for the liveness probe in seconds | `5` |
| [controller.probes.readinessProbe.failureThreshold](./values.yaml#L290) | int | Set the failure threshold for the readiness probe | `3` |
| [controller.probes.readinessProbe.httpGet.path](./values.yaml#L293) | string | Set the Pod's HTTP path for the liveness probe | `"{{ default \"\" .Values.controller.jenkinsUriPrefix }}/login"` |
| [controller.probes.readinessProbe.httpGet.port](./values.yaml#L295) | string | Set the Pod's HTTP port to use for the readiness probe | `"http"` |
| [controller.probes.readinessProbe.initialDelaySeconds](./values.yaml#L304) | string | Set the initial delay for the readiness probe in seconds | `nil` |
| [controller.probes.readinessProbe.periodSeconds](./values.yaml#L297) | int | Set the time interval between two readiness probes executions in seconds | `10` |
| [controller.probes.readinessProbe.timeoutSeconds](./values.yaml#L299) | int | Set the timeout for the readiness probe in seconds | `5` |
| [controller.probes.startupProbe.failureThreshold](./values.yaml#L259) | int | Set the failure threshold for the startup probe | `12` |
| [controller.probes.startupProbe.httpGet.path](./values.yaml#L262) | string | Set the Pod's HTTP path for the startup probe | `"{{ default \"\" .Values.controller.jenkinsUriPrefix }}/login"` |
| [controller.probes.startupProbe.httpGet.port](./values.yaml#L264) | string | Set the Pod's HTTP port to use for the startup probe | `"http"` |
| [controller.probes.startupProbe.periodSeconds](./values.yaml#L266) | int | Set the time interval between two startup probes executions in seconds | `10` |
| [controller.probes.startupProbe.timeoutSeconds](./values.yaml#L268) | int | Set the timeout for the startup probe in seconds | `5` |
| [controller.projectNamingStrategy](./values.yaml#L431) | string | | `"standard"` |
| [controller.prometheus.alertingRulesAdditionalLabels](./values.yaml#L818) | object | Additional labels to add to the PrometheusRule object | `{}` |
| [controller.prometheus.alertingrules](./values.yaml#L816) | list | Array of prometheus alerting rules | `[]` |
| [controller.prometheus.enabled](./values.yaml#L801) | bool | Enables prometheus service monitor | `false` |
| [controller.prometheus.metricRelabelings](./values.yaml#L828) | list | | `[]` |
| [controller.prometheus.prometheusRuleNamespace](./values.yaml#L820) | string | Set a custom namespace where to deploy PrometheusRule resource | `""` |
| [controller.prometheus.relabelings](./values.yaml#L826) | list | | `[]` |
| [controller.prometheus.scrapeEndpoint](./values.yaml#L811) | string | The endpoint prometheus should get metrics from | `"/prometheus"` |
| [controller.prometheus.scrapeInterval](./values.yaml#L807) | string | How often prometheus should scrape metrics | `"60s"` |
| [controller.prometheus.serviceMonitorAdditionalLabels](./values.yaml#L803) | object | Additional labels to add to the service monitor object | `{}` |
| [controller.prometheus.serviceMonitorNamespace](./values.yaml#L805) | string | Set a custom namespace where to deploy ServiceMonitor resource | `nil` |
| [controller.resources](./values.yaml#L115) | object | Resource allocation (Requests and Limits) | `{"limits":{"cpu":"2000m","memory":"4096Mi"},"requests":{"cpu":"50m","memory":"256Mi"}}` |
| [controller.route.annotations](./values.yaml#L784) | object | Route annotations | `{}` |
| [controller.route.enabled](./values.yaml#L780) | bool | Enables openshift route | `false` |
| [controller.route.labels](./values.yaml#L782) | object | Route labels | `{}` |
| [controller.route.path](./values.yaml#L786) | string | Route path | `nil` |
| [controller.route.annotations](./values.yaml#L780) | object | Route annotations | `{}` |
| [controller.route.enabled](./values.yaml#L776) | bool | Enables openshift route | `false` |
| [controller.route.labels](./values.yaml#L778) | object | Route labels | `{}` |
| [controller.route.path](./values.yaml#L782) | string | Route path | `nil` |
| [controller.runAsUser](./values.yaml#L189) | int | Deprecated in favor of `controller.podSecurityContextOverride`. uid that jenkins runs with. | `1000` |
| [controller.schedulerName](./values.yaml#L653) | string | Name of the Kubernetes scheduler to use | `""` |
| [controller.scriptApproval](./values.yaml#L447) | list | List of groovy functions to approve | `[]` |
| [controller.secondaryingress.annotations](./values.yaml#L747) | object | | `{}` |
| [controller.secondaryingress.apiVersion](./values.yaml#L745) | string | | `"extensions/v1beta1"` |
| [controller.secondaryingress.enabled](./values.yaml#L739) | bool | | `false` |
| [controller.secondaryingress.hostName](./values.yaml#L754) | string | | `nil` |
| [controller.secondaryingress.labels](./values.yaml#L746) | object | | `{}` |
| [controller.secondaryingress.paths](./values.yaml#L742) | list | | `[]` |
| [controller.secondaryingress.tls](./values.yaml#L755) | string | | `nil` |
| [controller.secretClaims](./values.yaml#L490) | list | List of `SecretClaim` resources to create | `[]` |
| [controller.schedulerName](./values.yaml#L649) | string | Name of the Kubernetes scheduler to use | `""` |
| [controller.scriptApproval](./values.yaml#L443) | list | List of groovy functions to approve | `[]` |
| [controller.secondaryingress.annotations](./values.yaml#L743) | object | | `{}` |
| [controller.secondaryingress.apiVersion](./values.yaml#L741) | string | | `"extensions/v1beta1"` |
| [controller.secondaryingress.enabled](./values.yaml#L735) | bool | | `false` |
| [controller.secondaryingress.hostName](./values.yaml#L750) | string | | `nil` |
| [controller.secondaryingress.labels](./values.yaml#L742) | object | | `{}` |
| [controller.secondaryingress.paths](./values.yaml#L738) | list | | `[]` |
| [controller.secondaryingress.tls](./values.yaml#L751) | string | | `nil` |
| [controller.secretClaims](./values.yaml#L486) | list | List of `SecretClaim` resources to create | `[]` |
| [controller.securityContextCapabilities](./values.yaml#L198) | object | | `{}` |
| [controller.serviceAnnotations](./values.yaml#L240) | object | Jenkins controller service annotations | `{}` |
| [controller.serviceAnnotations](./values.yaml#L236) | object | Jenkins controller service annotations | `{}` |
| [controller.serviceExternalTrafficPolicy](./values.yaml#L233) | string | | `nil` |
| [controller.serviceLabels](./values.yaml#L246) | object | Labels for the Jenkins controller-service | `{}` |
| [controller.serviceLabels](./values.yaml#L242) | object | Labels for the Jenkins controller-service | `{}` |
| [controller.servicePort](./values.yaml#L225) | int | k8s service port | `8080` |
| [controller.serviceType](./values.yaml#L220) | string | k8s service type | `"ClusterIP"` |
| [controller.shareProcessNamespace](./values.yaml#L124) | bool | | `false` |
| [controller.sidecars.additionalSidecarContainers](./values.yaml#L635) | list | Configures additional sidecar container(s) for the Jenkins controller | `[]` |
| [controller.sidecars.configAutoReload.additionalVolumeMounts](./values.yaml#L581) | list | Enables additional volume mounts for the config auto-reload container | `[]` |
| [controller.sidecars.configAutoReload.containerSecurityContext](./values.yaml#L630) | object | Enable container security context | `{"allowPrivilegeEscalation":false,"readOnlyRootFilesystem":true}` |
| [controller.sidecars.configAutoReload.enabled](./values.yaml#L564) | bool | Enables Jenkins Config as Code auto-reload | `true` |
| [controller.sidecars.configAutoReload.env](./values.yaml#L612) | object | Environment variables for the Jenkins Config as Code auto-reload container | `{}` |
| [controller.sidecars.configAutoReload.envFrom](./values.yaml#L610) | list | Environment variable sources for the Jenkins Config as Code auto-reload container | `[]` |
| [controller.sidecars.configAutoReload.folder](./values.yaml#L623) | string | | `"/var/jenkins_home/casc_configs"` |
| [controller.sidecars.configAutoReload.image.registry](./values.yaml#L567) | string | Registry for the image that triggers the reload | `"docker.io"` |
| [controller.sidecars.configAutoReload.image.repository](./values.yaml#L569) | string | Repository of the image that triggers the reload | `"kiwigrid/k8s-sidecar"` |
| [controller.sidecars.configAutoReload.image.tag](./values.yaml#L571) | string | Tag for the image that triggers the reload | `"1.30.1"` |
| [controller.sidecars.configAutoReload.imagePullPolicy](./values.yaml#L572) | string | | `"IfNotPresent"` |
| [controller.sidecars.configAutoReload.logging](./values.yaml#L587) | object | Config auto-reload logging settings | `{"configuration":{"backupCount":3,"formatter":"JSON","logLevel":"INFO","logToConsole":true,"logToFile":false,"maxBytes":1024,"override":false}}` |
| [controller.sidecars.configAutoReload.logging.configuration.override](./values.yaml#L591) | bool | Enables custom log config utilizing using the settings below. | `false` |
| [controller.sidecars.configAutoReload.reqRetryConnect](./values.yaml#L605) | int | How many connection-related errors to retry on | `10` |
| [controller.sidecars.configAutoReload.resources](./values.yaml#L573) | object | | `{}` |
| [controller.sidecars.configAutoReload.scheme](./values.yaml#L600) | string | The scheme to use when connecting to the Jenkins configuration as code endpoint | `"http"` |
| [controller.sidecars.configAutoReload.skipTlsVerify](./values.yaml#L602) | bool | Skip TLS verification when connecting to the Jenkins configuration as code endpoint | `false` |
| [controller.sidecars.configAutoReload.sleepTime](./values.yaml#L607) | string | How many seconds to wait before updating config-maps/secrets (sets METHOD=SLEEP on the sidecar) | `nil` |
| [controller.sidecars.configAutoReload.sshTcpPort](./values.yaml#L621) | int | | `1044` |
| [controller.statefulSetAnnotations](./values.yaml#L680) | object | Annotations for controller StatefulSet | `{}` |
| [controller.statefulSetLabels](./values.yaml#L242) | object | Jenkins controller custom labels for the StatefulSet | `{}` |
| [controller.sidecars.additionalSidecarContainers](./values.yaml#L631) | list | Configures additional sidecar container(s) for the Jenkins controller | `[]` |
| [controller.sidecars.configAutoReload.additionalVolumeMounts](./values.yaml#L577) | list | Enables additional volume mounts for the config auto-reload container | `[]` |
| [controller.sidecars.configAutoReload.containerSecurityContext](./values.yaml#L626) | object | Enable container security context | `{"allowPrivilegeEscalation":false,"readOnlyRootFilesystem":true}` |
| [controller.sidecars.configAutoReload.enabled](./values.yaml#L560) | bool | Enables Jenkins Config as Code auto-reload | `true` |
| [controller.sidecars.configAutoReload.env](./values.yaml#L608) | object | Environment variables for the Jenkins Config as Code auto-reload container | `{}` |
| [controller.sidecars.configAutoReload.envFrom](./values.yaml#L606) | list | Environment variable sources for the Jenkins Config as Code auto-reload container | `[]` |
| [controller.sidecars.configAutoReload.folder](./values.yaml#L619) | string | | `"/var/jenkins_home/casc_configs"` |
| [controller.sidecars.configAutoReload.image.registry](./values.yaml#L563) | string | Registry for the image that triggers the reload | `"docker.io"` |
| [controller.sidecars.configAutoReload.image.repository](./values.yaml#L565) | string | Repository of the image that triggers the reload | `"kiwigrid/k8s-sidecar"` |
| [controller.sidecars.configAutoReload.image.tag](./values.yaml#L567) | string | Tag for the image that triggers the reload | `"1.28.0"` |
| [controller.sidecars.configAutoReload.imagePullPolicy](./values.yaml#L568) | string | | `"IfNotPresent"` |
| [controller.sidecars.configAutoReload.logging](./values.yaml#L583) | object | Config auto-reload logging settings | `{"configuration":{"backupCount":3,"formatter":"JSON","logLevel":"INFO","logToConsole":true,"logToFile":false,"maxBytes":1024,"override":false}}` |
| [controller.sidecars.configAutoReload.logging.configuration.override](./values.yaml#L587) | bool | Enables custom log config utilizing using the settings below. | `false` |
| [controller.sidecars.configAutoReload.reqRetryConnect](./values.yaml#L601) | int | How many connection-related errors to retry on | `10` |
| [controller.sidecars.configAutoReload.resources](./values.yaml#L569) | object | | `{}` |
| [controller.sidecars.configAutoReload.scheme](./values.yaml#L596) | string | The scheme to use when connecting to the Jenkins configuration as code endpoint | `"http"` |
| [controller.sidecars.configAutoReload.skipTlsVerify](./values.yaml#L598) | bool | Skip TLS verification when connecting to the Jenkins configuration as code endpoint | `false` |
| [controller.sidecars.configAutoReload.sleepTime](./values.yaml#L603) | string | How many seconds to wait before updating config-maps/secrets (sets METHOD=SLEEP on the sidecar) | `nil` |
| [controller.sidecars.configAutoReload.sshTcpPort](./values.yaml#L617) | int | | `1044` |
| [controller.statefulSetAnnotations](./values.yaml#L676) | object | Annotations for controller StatefulSet | `{}` |
| [controller.statefulSetLabels](./values.yaml#L238) | object | Jenkins controller custom labels for the StatefulSet | `{}` |
| [controller.targetPort](./values.yaml#L227) | int | k8s target port | `8080` |
| [controller.terminationGracePeriodSeconds](./values.yaml#L663) | string | Set TerminationGracePeriodSeconds | `nil` |
| [controller.terminationMessagePath](./values.yaml#L665) | string | Set the termination message path | `nil` |
| [controller.terminationMessagePolicy](./values.yaml#L667) | string | Set the termination message policy | `nil` |
| [controller.testEnabled](./values.yaml#L844) | bool | Can be used to disable rendering controller test resources when using helm template | `true` |
| [controller.tolerations](./values.yaml#L661) | list | Toleration labels for pod assignment | `[]` |
| [controller.topologySpreadConstraints](./values.yaml#L687) | object | Topology spread constraints | `{}` |
| [controller.updateStrategy](./values.yaml#L684) | object | Update strategy for StatefulSet | `{}` |
| [controller.terminationGracePeriodSeconds](./values.yaml#L659) | string | Set TerminationGracePeriodSeconds | `nil` |
| [controller.terminationMessagePath](./values.yaml#L661) | string | Set the termination message path | `nil` |
| [controller.terminationMessagePolicy](./values.yaml#L663) | string | Set the termination message policy | `nil` |
| [controller.testEnabled](./values.yaml#L840) | bool | Can be used to disable rendering controller test resources when using helm template | `true` |
| [controller.tolerations](./values.yaml#L657) | list | Toleration labels for pod assignment | `[]` |
| [controller.topologySpreadConstraints](./values.yaml#L683) | object | Topology spread constraints | `{}` |
| [controller.updateStrategy](./values.yaml#L680) | object | Update strategy for StatefulSet | `{}` |
| [controller.usePodSecurityContext](./values.yaml#L182) | bool | Enable pod security context (must be `true` if podSecurityContextOverride, runAsUser or fsGroup are set) | `true` |
| [credentialsId](./values.yaml#L27) | string | The Jenkins credentials to access the Kubernetes API server. For the default cluster it is not needed. | `nil` |
| [fullnameOverride](./values.yaml#L13) | string | Override the full resource names | `jenkins-(release-name)` or `jenkins` if the release-name is `jenkins` |
| [helmtest.bats.image.registry](./values.yaml#L1369) | string | Registry of the image used to test the framework | `"docker.io"` |
| [helmtest.bats.image.repository](./values.yaml#L1371) | string | Repository of the image used to test the framework | `"bats/bats"` |
| [helmtest.bats.image.tag](./values.yaml#L1373) | string | Tag of the image to test the framework | `"1.11.1"` |
| [helmtest.bats.image.registry](./values.yaml#L1361) | string | Registry of the image used to test the framework | `"docker.io"` |
| [helmtest.bats.image.repository](./values.yaml#L1363) | string | Repository of the image used to test the framework | `"bats/bats"` |
| [helmtest.bats.image.tag](./values.yaml#L1365) | string | Tag of the image to test the framework | `"1.11.0"` |
| [kubernetesURL](./values.yaml#L24) | string | The URL of the Kubernetes API server | `"https://kubernetes.default"` |
| [nameOverride](./values.yaml#L10) | string | Override the resource name prefix | `Chart.Name` |
| [namespaceOverride](./values.yaml#L16) | string | Override the deployment namespace | `Release.Namespace` |
| [networkPolicy.apiVersion](./values.yaml#L1293) | string | NetworkPolicy ApiVersion | `"networking.k8s.io/v1"` |
| [networkPolicy.enabled](./values.yaml#L1288) | bool | Enable the creation of NetworkPolicy resources | `false` |
| [networkPolicy.externalAgents.except](./values.yaml#L1307) | list | A list of IP sub-ranges to be excluded from the allowlisted IP range | `[]` |
| [networkPolicy.externalAgents.ipCIDR](./values.yaml#L1305) | string | The IP range from which external agents are allowed to connect to controller, i.e., 172.17.0.0/16 | `nil` |
| [networkPolicy.internalAgents.allowed](./values.yaml#L1297) | bool | Allow internal agents (from the same cluster) to connect to controller. Agent pods will be filtered based on PodLabels | `true` |
| [networkPolicy.internalAgents.namespaceLabels](./values.yaml#L1301) | object | A map of labels (keys/values) that agents namespaces must have to be able to connect to controller | `{}` |
| [networkPolicy.internalAgents.podLabels](./values.yaml#L1299) | object | A map of labels (keys/values) that agent pods must have to be able to connect to controller | `{}` |
| [persistence.accessMode](./values.yaml#L1263) | string | The PVC access mode | `"ReadWriteOnce"` |
| [persistence.annotations](./values.yaml#L1259) | object | Annotations for the PVC | `{}` |
| [persistence.dataSource](./values.yaml#L1269) | object | Existing data source to clone PVC from | `{}` |
| [persistence.enabled](./values.yaml#L1243) | bool | Enable the use of a Jenkins PVC | `true` |
| [persistence.existingClaim](./values.yaml#L1249) | string | Provide the name of a PVC | `nil` |
| [persistence.labels](./values.yaml#L1261) | object | Labels for the PVC | `{}` |
| [persistence.mounts](./values.yaml#L1281) | list | Additional mounts | `[]` |
| [persistence.size](./values.yaml#L1265) | string | The size of the PVC | `"8Gi"` |
| [persistence.storageClass](./values.yaml#L1257) | string | Storage class for the PVC | `nil` |
| [persistence.subPath](./values.yaml#L1274) | string | SubPath for jenkins-home mount | `nil` |
| [persistence.volumes](./values.yaml#L1276) | list | Additional volumes | `[]` |
| [rbac.create](./values.yaml#L1313) | bool | Whether RBAC resources are created | `true` |
| [rbac.readSecrets](./values.yaml#L1315) | bool | Whether the Jenkins service account should be able to read Kubernetes secrets | `false` |
| [rbac.useOpenShiftNonRootSCC](./values.yaml#L1317) | bool | Whether the Jenkins service account should be able to use the OpenShift "nonroot" Security Context Constraints | `false` |
| [networkPolicy.apiVersion](./values.yaml#L1289) | string | NetworkPolicy ApiVersion | `"networking.k8s.io/v1"` |
| [networkPolicy.enabled](./values.yaml#L1284) | bool | Enable the creation of NetworkPolicy resources | `false` |
| [networkPolicy.externalAgents.except](./values.yaml#L1303) | list | A list of IP sub-ranges to be excluded from the allowlisted IP range | `[]` |
| [networkPolicy.externalAgents.ipCIDR](./values.yaml#L1301) | string | The IP range from which external agents are allowed to connect to controller, i.e., 172.17.0.0/16 | `nil` |
| [networkPolicy.internalAgents.allowed](./values.yaml#L1293) | bool | Allow internal agents (from the same cluster) to connect to controller. Agent pods will be filtered based on PodLabels | `true` |
| [networkPolicy.internalAgents.namespaceLabels](./values.yaml#L1297) | object | A map of labels (keys/values) that agents namespaces must have to be able to connect to controller | `{}` |
| [networkPolicy.internalAgents.podLabels](./values.yaml#L1295) | object | A map of labels (keys/values) that agent pods must have to be able to connect to controller | `{}` |
| [persistence.accessMode](./values.yaml#L1259) | string | The PVC access mode | `"ReadWriteOnce"` |
| [persistence.annotations](./values.yaml#L1255) | object | Annotations for the PVC | `{}` |
| [persistence.dataSource](./values.yaml#L1265) | object | Existing data source to clone PVC from | `{}` |
| [persistence.enabled](./values.yaml#L1239) | bool | Enable the use of a Jenkins PVC | `true` |
| [persistence.existingClaim](./values.yaml#L1245) | string | Provide the name of a PVC | `nil` |
| [persistence.labels](./values.yaml#L1257) | object | Labels for the PVC | `{}` |
| [persistence.mounts](./values.yaml#L1277) | list | Additional mounts | `[]` |
| [persistence.size](./values.yaml#L1261) | string | The size of the PVC | `"8Gi"` |
| [persistence.storageClass](./values.yaml#L1253) | string | Storage class for the PVC | `nil` |
| [persistence.subPath](./values.yaml#L1270) | string | SubPath for jenkins-home mount | `nil` |
| [persistence.volumes](./values.yaml#L1272) | list | Additional volumes | `[]` |
| [rbac.create](./values.yaml#L1309) | bool | Whether RBAC resources are created | `true` |
| [rbac.readSecrets](./values.yaml#L1311) | bool | Whether the Jenkins service account should be able to read Kubernetes secrets | `false` |
| [rbac.useOpenShiftNonRootSCC](./values.yaml#L1313) | bool | Whether the Jenkins service account should be able to use the OpenShift "nonroot" Security Context Constraints | `false` |
| [renderHelmLabels](./values.yaml#L30) | bool | Enables rendering of the helm.sh/chart label to the annotations | `true` |
| [serviceAccount.annotations](./values.yaml#L1327) | object | Configures annotations for the ServiceAccount | `{}` |
| [serviceAccount.automountServiceAccountToken](./values.yaml#L1333) | bool | Auto-mount ServiceAccount token | `true` |
| [serviceAccount.create](./values.yaml#L1321) | bool | Configures if a ServiceAccount with this name should be created | `true` |
| [serviceAccount.extraLabels](./values.yaml#L1329) | object | Configures extra labels for the ServiceAccount | `{}` |
| [serviceAccount.imagePullSecretName](./values.yaml#L1331) | string | Controller ServiceAccount image pull secret | `nil` |
| [serviceAccount.name](./values.yaml#L1325) | string | | `nil` |
| [serviceAccountAgent.annotations](./values.yaml#L1344) | object | Configures annotations for the agent ServiceAccount | `{}` |
| [serviceAccountAgent.automountServiceAccountToken](./values.yaml#L1350) | bool | Auto-mount ServiceAccount token | `true` |
| [serviceAccountAgent.create](./values.yaml#L1338) | bool | Configures if an agent ServiceAccount should be created | `false` |
| [serviceAccountAgent.extraLabels](./values.yaml#L1346) | object | Configures extra labels for the agent ServiceAccount | `{}` |
| [serviceAccountAgent.imagePullSecretName](./values.yaml#L1348) | string | Agent ServiceAccount image pull secret | `nil` |
| [serviceAccountAgent.name](./values.yaml#L1342) | string | The name of the agent ServiceAccount to be used by access-controlled resources | `nil` |
| [serviceAccount.annotations](./values.yaml#L1323) | object | Configures annotations for the ServiceAccount | `{}` |
| [serviceAccount.create](./values.yaml#L1317) | bool | Configures if a ServiceAccount with this name should be created | `true` |
| [serviceAccount.extraLabels](./values.yaml#L1325) | object | Configures extra labels for the ServiceAccount | `{}` |
| [serviceAccount.imagePullSecretName](./values.yaml#L1327) | string | Controller ServiceAccount image pull secret | `nil` |
| [serviceAccount.name](./values.yaml#L1321) | string | | `nil` |
| [serviceAccountAgent.annotations](./values.yaml#L1338) | object | Configures annotations for the agent ServiceAccount | `{}` |
| [serviceAccountAgent.create](./values.yaml#L1332) | bool | Configures if an agent ServiceAccount should be created | `false` |
| [serviceAccountAgent.extraLabels](./values.yaml#L1340) | object | Configures extra labels for the agent ServiceAccount | `{}` |
| [serviceAccountAgent.imagePullSecretName](./values.yaml#L1342) | string | Agent ServiceAccount image pull secret | `nil` |
| [serviceAccountAgent.name](./values.yaml#L1336) | string | The name of the agent ServiceAccount to be used by access-controlled resources | `nil` |

View File

@ -309,7 +309,6 @@ jenkins:
{{- /* restore root */}}
{{- $_ := set $ "Values" $oldRoot.Values }}
{{- end }}
slaveAgentPort: {{ .Values.controller.agentListenerPort }}
{{- if .Values.controller.csrf.defaultCrumbIssuer.enabled }}
crumbIssuer:
standard:

View File

@ -34,7 +34,7 @@ spec:
{{- if (eq "-" .Values.persistence.storageClass) }}
storageClassName: ""
{{- else }}
storageClassName: "{{ tpl .Values.persistence.storageClass . }}"
storageClassName: "{{ .Values.persistence.storageClass }}"
{{- end }}
{{- end }}
{{- end }}

View File

@ -23,12 +23,12 @@ metadata:
{{- end }}
{{- if .Values.controller.ingress.annotations }}
annotations:
{{ tpl (toYaml .Values.controller.ingress.annotations) . | indent 4 }}
{{ toYaml .Values.controller.ingress.annotations | indent 4 }}
{{- end }}
name: {{ template "jenkins.fullname" . }}
spec:
{{- if .Values.controller.ingress.ingressClassName }}
ingressClassName: {{ tpl .Values.controller.ingress.ingressClassName . | quote }}
ingressClassName: {{ .Values.controller.ingress.ingressClassName | quote }}
{{- end }}
rules:
- http:

View File

@ -107,7 +107,6 @@ spec:
{{- end }}
{{- end }}
serviceAccountName: "{{ template "jenkins.serviceAccountName" . }}"
automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }}
{{- if .Values.controller.hostNetworking }}
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet

View File

@ -41,9 +41,6 @@ spec:
targetPort: {{ $port.port }}
{{- end -}}
{{- end }}
{{- if .Values.controller.publishNotReadyAddresses }}
publishNotReadyAddresses: true
{{- end }}
selector:
"app.kubernetes.io/component": "{{ .Values.controller.componentName }}"
"app.kubernetes.io/instance": "{{ .Release.Name }}"

View File

@ -1,7 +1,6 @@
{{ if .Values.serviceAccountAgent.create }}
apiVersion: v1
kind: ServiceAccount
automountServiceAccountToken: {{ .Values.serviceAccountAgent.automountServiceAccountToken }}
metadata:
name: {{ include "jenkins.serviceAccountAgentName" . }}
namespace: {{ template "jenkins.agent.namespace" . }}

View File

@ -1,7 +1,6 @@
{{ if .Values.serviceAccount.create }}
apiVersion: v1
kind: ServiceAccount
automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }}
metadata:
name: {{ include "jenkins.serviceAccountName" . }}
namespace: {{ template "jenkins.namespace" . }}

View File

@ -232,10 +232,6 @@ controller:
# but risks potentially imbalanced traffic spreading.
serviceExternalTrafficPolicy:
# If enabled, the controller is available through its service before its pods reports ready. Makes startup screen and
# auto-reload on restart feature possible.
publishNotReadyAddresses:
# -- Jenkins controller service annotations
serviceAnnotations: {}
# -- Jenkins controller custom labels for the StatefulSet
@ -403,10 +399,10 @@ controller:
# Plugins will be installed during Jenkins controller start
# -- List of Jenkins plugins to install. If you don't want to install plugins, set it to `false`
installPlugins:
- kubernetes:4313.va_9b_4fe2a_0e34
- kubernetes:4296.v20a_7e4d77cf6
- workflow-aggregator:600.vb_57cdd26fdd7
- git:5.7.0
- configuration-as-code:1932.v75cb_b_f1b_698d
- git:5.6.0
- configuration-as-code:1897.v79281e066ea_7
# If set to false, Jenkins will download the minimum required version of all dependencies.
# -- Download the minimum required version or latest version of all dependencies
@ -568,7 +564,7 @@ controller:
# -- Repository of the image that triggers the reload
repository: kiwigrid/k8s-sidecar
# -- Tag for the image that triggers the reload
tag: 1.30.1
tag: 1.28.0
imagePullPolicy: IfNotPresent
resources: {}
# limits:
@ -955,7 +951,7 @@ agent:
# -- Repository to pull the agent jnlp image from
repository: "jenkins/inbound-agent"
# -- Tag of the image to pull
tag: "3283.v92c105e0f819-9"
tag: "3273.v4cfe589b_fd83-1"
# -- Configure working directory for default agent
workingDir: "/home/jenkins/agent"
nodeUsageMode: "NORMAL"
@ -1329,8 +1325,6 @@ serviceAccount:
extraLabels: {}
# -- Controller ServiceAccount image pull secret
imagePullSecretName:
# -- Auto-mount ServiceAccount token
automountServiceAccountToken: true
serviceAccountAgent:
@ -1346,8 +1340,6 @@ serviceAccountAgent:
extraLabels: {}
# -- Agent ServiceAccount image pull secret
imagePullSecretName:
# -- Auto-mount ServiceAccount token
automountServiceAccountToken: true
# -- Checks if any deprecated values are used
checkDeprecation: true
@ -1370,4 +1362,4 @@ helmtest:
# -- Repository of the image used to test the framework
repository: "bats/bats"
# -- Tag of the image to test the framework
tag: "1.11.1"
tag: "1.11.0"

View File

@ -2,7 +2,7 @@ gitea:
enabled: false
image:
tag: 1.23.4
tag: 1.22.6
rootless: true
repliaCount: 1
@ -16,10 +16,6 @@ gitea:
claimName: data-gitea-0
size: 4Gi
service:
http:
port: 80
securityContext:
allowPrivilegeEscalation: false
capabilities:
@ -293,18 +289,12 @@ trivy:
renovate:
enabled: false
renovate:
config: |
{
}
env:
LOG_FORMAT: json
cronjob:
concurrencyPolicy: Forbid
jobBackoffLimit: 2
jobBackoffLimit: 3
schedule: "0 3 * * *"
successfulJobsHistoryLimit: 1
securityContext:
fsGroupChangePolicy: OnRootMismatch
fsGroup: 1000

View File

@ -13,7 +13,7 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: falco
version: 4.2.5

View File

@ -13,7 +13,7 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.2.1"
repository: https://cdn.zero-downtime.net/charts/
- name: neo4j
version: 5.26.0

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-istio-gateway
description: KubeZero Umbrella Chart for Istio gateways
type: application
version: 0.24.3
version: 0.24.2
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -14,9 +14,9 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: gateway
version: 1.24.3
version: 1.24.2
repository: https://istio-release.storage.googleapis.com/charts
kubeVersion: ">= 1.30.0-0"

View File

@ -1,6 +1,6 @@
# kubezero-istio-gateway
![Version: 0.24.3](https://img.shields.io/badge/Version-0.24.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.24.2](https://img.shields.io/badge/Version-0.24.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Umbrella Chart for Istio gateways
@ -20,8 +20,8 @@ Kubernetes: `>= 1.30.0-0`
| Repository | Name | Version |
|------------|------|---------|
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | 0.2.1 |
| https://istio-release.storage.googleapis.com/charts | gateway | 1.24.3 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
| https://istio-release.storage.googleapis.com/charts | gateway | 1.24.2 |
## Values
@ -32,8 +32,8 @@ Kubernetes: `>= 1.30.0-0`
| gateway.autoscaling.maxReplicas | int | `4` | |
| gateway.autoscaling.minReplicas | int | `1` | |
| gateway.autoscaling.targetCPUUtilizationPercentage | int | `80` | |
| gateway.minReadySeconds | int | `10` | |
| gateway.podAnnotations."proxy.istio.io/config" | string | `"{ \"terminationDrainDuration\": \"90s\" }"` | |
| gateway.minReadySeconds | int | `120` | |
| gateway.podAnnotations."proxy.istio.io/config" | string | `"{ \"terminationDrainDuration\": \"20s\" }"` | |
| gateway.replicaCount | int | `1` | |
| gateway.resources.limits.memory | string | `"512Mi"` | |
| gateway.resources.requests.cpu | string | `"50m"` | |

View File

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 1.24.3
appVersion: 1.24.2
description: Helm chart for deploying Istio gateways
icon: https://istio.io/latest/favicons/android-192x192.png
keywords:
@ -9,4 +9,4 @@ name: gateway
sources:
- https://github.com/istio/istio
type: application
version: 1.24.3
version: 1.24.2

View File

@ -77,7 +77,7 @@ spec:
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: true
{{- if not (eq (.Values.platform | default "") "openshift") }}
{{- if not (eq .Values.platform "openshift") }}
runAsUser: 1337
runAsGroup: 1337
{{- end }}

View File

@ -49,7 +49,7 @@ Finally, we can set all of that under .Values so the chart behaves without aware
{{- $a := mustMergeOverwrite $defaults $profile }}
{{- end }}
# Flatten globals, if defined on a per-chart basis
{{- if true }}
{{- if false }}
{{- $a := mustMergeOverwrite $defaults ($profile.global) ($.Values.global | default dict) }}
{{- end }}
{{- $b := set $ "Values" (mustMergeOverwrite $defaults $.Values) }}

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-istio
description: KubeZero Umbrella Chart for Istio
type: application
version: 0.24.3
version: 0.24.2
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -13,20 +13,16 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: envoy-ratelimit
version: 0.1.2
repository: https://cdn.zero-downtime.net/charts/
condition: envoy-ratelimit.enabled
- name: base
version: 1.24.3
version: 1.24.2
repository: https://istio-release.storage.googleapis.com/charts
- name: istiod
version: 1.24.3
version: 1.24.2
repository: https://istio-release.storage.googleapis.com/charts
- name: kiali-server
version: "2.6.0"
version: "1.89.7"
repository: https://kiali.org/helm-charts
condition: kiali-server.enabled
kubeVersion: ">= 1.30.0-0"

View File

@ -1,6 +1,6 @@
# kubezero-istio
![Version: 0.24.3](https://img.shields.io/badge/Version-0.24.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.24.2](https://img.shields.io/badge/Version-0.24.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Umbrella Chart for Istio
@ -20,27 +20,15 @@ Kubernetes: `>= 1.30.0-0`
| Repository | Name | Version |
|------------|------|---------|
| https://cdn.zero-downtime.net/charts/ | envoy-ratelimit | 0.1.2 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | 0.2.1 |
| https://istio-release.storage.googleapis.com/charts | base | 1.24.3 |
| https://istio-release.storage.googleapis.com/charts | istiod | 1.24.3 |
| https://kiali.org/helm-charts | kiali-server | 2.6.0 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
| https://istio-release.storage.googleapis.com/charts | base | 1.24.2 |
| https://istio-release.storage.googleapis.com/charts | istiod | 1.24.2 |
| https://kiali.org/helm-charts | kiali-server | 1.89.7 |
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| envoy-ratelimit.descriptors.ingress[0].key | string | `"remote_address"` | |
| envoy-ratelimit.descriptors.ingress[0].rate_limit.requests_per_unit | int | `10` | |
| envoy-ratelimit.descriptors.ingress[0].rate_limit.unit | string | `"second"` | |
| envoy-ratelimit.descriptors.privateIngress[0].key | string | `"remote_address"` | |
| envoy-ratelimit.descriptors.privateIngress[0].rate_limit.requests_per_unit | int | `10` | |
| envoy-ratelimit.descriptors.privateIngress[0].rate_limit.unit | string | `"second"` | |
| envoy-ratelimit.enabled | bool | `false` | |
| envoy-ratelimit.failureModeDeny | bool | `false` | |
| envoy-ratelimit.localCacheSize | int | `1048576` | |
| envoy-ratelimit.log.format | string | `"json"` | |
| envoy-ratelimit.log.level | string | `"warn"` | |
| global.defaultPodDisruptionBudget.enabled | bool | `false` | |
| global.logAsJson | bool | `true` | |
| global.variant | string | `"distroless"` | |
@ -62,6 +50,17 @@ Kubernetes: `>= 1.30.0-0`
| kiali-server.istio.enabled | bool | `false` | |
| kiali-server.istio.gateway | string | `"istio-ingress/private-ingressgateway"` | |
| kiali-server.server.metrics_enabled | bool | `false` | |
| rateLimiting.descriptors.ingress[0].key | string | `"remote_address"` | |
| rateLimiting.descriptors.ingress[0].rate_limit.requests_per_unit | int | `10` | |
| rateLimiting.descriptors.ingress[0].rate_limit.unit | string | `"second"` | |
| rateLimiting.descriptors.privateIngress[0].key | string | `"remote_address"` | |
| rateLimiting.descriptors.privateIngress[0].rate_limit.requests_per_unit | int | `10` | |
| rateLimiting.descriptors.privateIngress[0].rate_limit.unit | string | `"second"` | |
| rateLimiting.enabled | bool | `false` | |
| rateLimiting.failureModeDeny | bool | `false` | |
| rateLimiting.localCacheSize | int | `1048576` | |
| rateLimiting.log.format | string | `"json"` | |
| rateLimiting.log.level | string | `"warn"` | |
## Resources

View File

@ -5,22 +5,22 @@ folder: Istio
condition: '.Values.istiod.telemetry.enabled'
dashboards:
- name: istio-control-plane
url: https://grafana.com/api/dashboards/7645/revisions/239/download
url: https://grafana.com/api/dashboards/7645/revisions/229/download
tags:
- Istio
- name: istio-mesh
url: https://grafana.com/api/dashboards/7639/revisions/239/download
url: https://grafana.com/api/dashboards/7639/revisions/229/download
tags:
- Istio
- name: istio-service
url: https://grafana.com/api/dashboards/7636/revisions/239/download
url: https://grafana.com/api/dashboards/7636/revisions/229/download
tags:
- Istio
- name: istio-workload
url: https://grafana.com/api/dashboards/7630/revisions/239/download
url: https://grafana.com/api/dashboards/7630/revisions/229/download
tags:
- Istio
- name: istio-performance
url: https://grafana.com/api/dashboards/11829/revisions/239/download
url: https://grafana.com/api/dashboards/11829/revisions/229/download
tags:
- Istio

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,106 @@
{{- if .Values.rateLimiting.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: ratelimit-statsd-exporter-config
namespace: {{ .Release.Namespace }}
labels:
{{- include "kubezero-lib.labels" . | nindent 4 }}
data:
config.yaml: |
defaults:
ttl: 1m # Resets the metrics every minute
mappings:
- match:
"ratelimit.service.rate_limit.*.*.near_limit"
name: "ratelimit_service_rate_limit_near_limit"
timer_type: "histogram"
labels:
domain: "$1"
key1: "$2"
- match:
"ratelimit.service.rate_limit.*.*.over_limit"
name: "ratelimit_service_rate_limit_over_limit"
timer_type: "histogram"
labels:
domain: "$1"
key1: "$2"
- match:
"ratelimit.service.rate_limit.*.*.total_hits"
name: "ratelimit_service_rate_limit_total_hits"
timer_type: "histogram"
labels:
domain: "$1"
key1: "$2"
- match:
"ratelimit.service.rate_limit.*.*.within_limit"
name: "ratelimit_service_rate_limit_within_limit"
timer_type: "histogram"
labels:
domain: "$1"
key1: "$2"
- match:
"ratelimit.service.rate_limit.*.*.*.near_limit"
name: "ratelimit_service_rate_limit_near_limit"
timer_type: "histogram"
labels:
domain: "$1"
key1: "$2"
key2: "$3"
- match:
"ratelimit.service.rate_limit.*.*.*.over_limit"
name: "ratelimit_service_rate_limit_over_limit"
timer_type: "histogram"
labels:
domain: "$1"
key1: "$2"
key2: "$3"
- match:
"ratelimit.service.rate_limit.*.*.*.total_hits"
name: "ratelimit_service_rate_limit_total_hits"
timer_type: "histogram"
labels:
domain: "$1"
key1: "$2"
key2: "$3"
- match:
"ratelimit.service.rate_limit.*.*.*.within_limit"
name: "ratelimit_service_rate_limit_within_limit"
timer_type: "histogram"
labels:
domain: "$1"
key1: "$2"
key2: "$3"
- match:
"ratelimit.service.call.should_rate_limit.*"
name: "ratelimit_service_should_rate_limit_error"
match_metric_type: counter
labels:
err_type: "$1"
- match:
"ratelimit_server.*.total_requests"
name: "ratelimit_service_total_requests"
match_metric_type: counter
labels:
grpc_method: "$1"
- match:
"ratelimit_server.*.response_time"
name: "ratelimit_service_response_time_seconds"
timer_type: histogram
labels:
grpc_method: "$1"
- match:
"ratelimit.service.config_load_success"
name: "ratelimit_service_config_load_success"
match_metric_type: counter
ttl: 3m
- match:
"ratelimit.service.config_load_error"
name: "ratelimit_service_config_load_error"
match_metric_type: counter
ttl: 3m
- match: "."
match_type: "regex"
action: "drop"
name: "dropped"
{{- end }}

View File

@ -1,3 +1,4 @@
{{- if .Values.rateLimiting.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
@ -9,9 +10,10 @@ data:
ingress.yaml: |
domain: ingress
descriptors:
{{- toYaml .Values.descriptors.ingress | nindent 4 }}
{{- toYaml .Values.rateLimiting.descriptors.ingress | nindent 4 }}
private-ingress.yaml: |
domain: private-ingress
descriptors:
{{- toYaml .Values.descriptors.privateIngress | nindent 4 }}
{{- toYaml .Values.rateLimiting.descriptors.privateIngress | nindent 4 }}
{{- end }}

View File

@ -1,3 +1,4 @@
{{- if .Values.rateLimiting.enabled }}
apiVersion: networking.istio.io/v1alpha3
kind: EnvoyFilter
metadata:
@ -26,7 +27,7 @@ spec:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.ratelimit.v3.RateLimit
domain: ingress
failure_mode_deny: {{ .Values.failureModeDeny }}
failure_mode_deny: {{ .Values.rateLimiting.failureModeDeny }}
timeout: 0.5s
rate_limit_service:
grpc_service:
@ -84,7 +85,7 @@ spec:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.ratelimit.v3.RateLimit
domain: private-ingress
failure_mode_deny: {{ .Values.failureModeDeny }}
failure_mode_deny: {{ .Values.rateLimiting.failureModeDeny }}
timeout: 0.5s
rate_limit_service:
grpc_service:
@ -112,3 +113,4 @@ spec:
socket_address:
address: ratelimit.istio-system
port_value: 8081
{{- end }}

View File

@ -0,0 +1,154 @@
{{- if .Values.rateLimiting.enabled }}
apiVersion: v1
kind: Service
metadata:
name: ratelimit-redis
namespace: {{ .Release.Namespace }}
labels:
app: ratelimit-redis
spec:
ports:
- name: redis
port: 6379
selector:
app: ratelimit-redis
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ratelimit-redis
namespace: {{ .Release.Namespace }}
spec:
replicas: 1
selector:
matchLabels:
app: ratelimit-redis
template:
metadata:
labels:
app: ratelimit-redis
spec:
containers:
- image: redis:6-alpine
imagePullPolicy: IfNotPresent
name: redis
ports:
- name: redis
containerPort: 6379
restartPolicy: Always
serviceAccountName: ""
---
apiVersion: v1
kind: Service
metadata:
name: ratelimit
namespace: {{ .Release.Namespace }}
labels:
app: ratelimit
spec:
ports:
#- name: http-port
# port: 8080
# targetPort: 8080
# protocol: TCP
- name: grpc-port
port: 8081
targetPort: 8081
protocol: TCP
#- name: http-debug
# port: 6070
# targetPort: 6070
# protocol: TCP
- name: http-monitoring
port: 9102
targetPort: 9102
protocol: TCP
selector:
app: ratelimit
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ratelimit
namespace: {{ .Release.Namespace }}
spec:
replicas: 1
selector:
matchLabels:
app: ratelimit
strategy:
type: Recreate
template:
metadata:
labels:
app: ratelimit
spec:
containers:
- image: envoyproxy/ratelimit:b42701cb # 2021/08/12
imagePullPolicy: IfNotPresent
name: ratelimit
command: ["/bin/ratelimit"]
env:
- name: LOG_LEVEL
value: {{ default "WARN" .Values.rateLimiting.log.level }}
- name: LOG_FORMAT
value: {{ default "text" .Values.rateLimiting.log.format }}
- name: REDIS_SOCKET_TYPE
value: tcp
- name: REDIS_URL
value: ratelimit-redis:6379
- name: USE_STATSD
value: "true"
- name: STATSD_HOST
value: "localhost"
- name: STATSD_PORT
value: "9125"
- name: RUNTIME_ROOT
value: /data
- name: RUNTIME_SUBDIRECTORY
value: ratelimit
- name: RUNTIME_WATCH_ROOT
value: "false"
- name: RUNTIME_IGNOREDOTFILES
value: "true"
- name: LOCAL_CACHE_SIZE_IN_BYTES
value: "{{ default 0 .Values.rateLimiting.localCacheSize | int }}"
ports:
#- containerPort: 8080
- containerPort: 8081
#- containerPort: 6070
volumeMounts:
- name: ratelimit-config
mountPath: /data/ratelimit/config
resources:
requests:
cpu: 50m
memory: 32Mi
limits:
cpu: 1
memory: 256Mi
- name: statsd-exporter
image: docker.io/prom/statsd-exporter:v0.21.0
imagePullPolicy: Always
args: ["--statsd.mapping-config=/etc/statsd-exporter/config.yaml"]
ports:
- containerPort: 9125
# - containerPort: 9102
resources:
requests:
cpu: 50m
memory: 32Mi
limits:
cpu: 200m
memory: 64Mi
volumeMounts:
- name: statsd-exporter-config
mountPath: /etc/statsd-exporter
volumes:
- name: ratelimit-config
configMap:
name: ratelimit-config
- name: statsd-exporter-config
configMap:
name: ratelimit-statsd-exporter-config
{{- end }}

View File

@ -1,4 +1,4 @@
{{- if and .Values.metrics.enabled }}
{{- if and .Values.istiod.telemetry.enabled .Values.rateLimiting.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:

View File

@ -56,7 +56,29 @@ kiali-server:
#url: "kiali.example.com"
# for available options see envoy-ratelimit chart
envoy-ratelimit:
rateLimiting:
enabled: false
log:
level: warn
format: json
# 1MB local cache for already reached limits to reduce calls to Redis
localCacheSize: 1048576
# Wether to block requests if ratelimiting is down
failureModeDeny: false
# rate limit descriptors for each domain, examples 10 req/s per sourceIP
descriptors:
ingress:
- key: remote_address
rate_limit:
unit: second
requests_per_unit: 10
privateIngress:
- key: remote_address
rate_limit:
unit: second
requests_per_unit: 10

View File

@ -10,4 +10,4 @@ keywords:
maintainers:
- name: Stefan Reimer
email: stefan@zero-downtime.net
kubeVersion: ">= 1.30.0-0"
kubeVersion: ">= 1.30.0"

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-logging
description: KubeZero Umbrella Chart for complete EFK stack
type: application
version: 0.8.13
version: 0.8.14
appVersion: 1.6.0
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
@ -17,14 +17,14 @@ maintainers:
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: fluentd
version: 0.5.2
repository: https://fluent.github.io/helm-charts
condition: fluentd.enabled
- name: fluent-bit
version: 0.47.10
version: 0.48.6
repository: https://fluent.github.io/helm-charts
condition: fluent-bit.enabled
kubeVersion: ">= 1.26.0"

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-metrics
description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
type: application
version: 0.11.0
version: 0.10.3
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -16,17 +16,17 @@ maintainers:
# https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
dependencies:
- name: kubezero-lib
version: 0.2.1
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: kube-prometheus-stack
version: 69.2.3
version: 66.7.1
repository: https://prometheus-community.github.io/helm-charts
- name: prometheus-adapter
version: 4.11.0
repository: https://prometheus-community.github.io/helm-charts
condition: prometheus-adapter.enabled
- name: prometheus-pushgateway
version: 3.0.0
version: 2.17.0
repository: https://prometheus-community.github.io/helm-charts
condition: prometheus-pushgateway.enabled
kubeVersion: ">= 1.30.0-0"
kubeVersion: ">= 1.26.0"

View File

@ -1,6 +1,6 @@
# kubezero-metrics
![Version: 0.10.4](https://img.shields.io/badge/Version-0.10.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.10.2](https://img.shields.io/badge/Version-0.10.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
@ -14,14 +14,14 @@ KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all
## Requirements
Kubernetes: `>= 1.30.0-0`
Kubernetes: `>= 1.26.0`
| Repository | Name | Version |
|------------|------|---------|
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
| https://prometheus-community.github.io/helm-charts | kube-prometheus-stack | 69.2.3 |
| https://prometheus-community.github.io/helm-charts | kube-prometheus-stack | 66.1.1 |
| https://prometheus-community.github.io/helm-charts | prometheus-adapter | 4.11.0 |
| https://prometheus-community.github.io/helm-charts | prometheus-pushgateway | 3.0.0 |
| https://prometheus-community.github.io/helm-charts | prometheus-pushgateway | 2.15.0 |
## Values

View File

@ -30,7 +30,7 @@
"options": {
"content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only."
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"title": "Notice",
"type": "text"
},
@ -54,7 +54,7 @@
},
"id": 2,
"interval": "1m",
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -100,7 +100,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -134,7 +134,7 @@
},
"id": 4,
"interval": "1m",
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -232,7 +232,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -276,7 +276,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -319,7 +319,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -353,7 +353,7 @@
},
"id": 8,
"interval": "1m",
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -451,7 +451,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -495,7 +495,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -538,7 +538,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -581,7 +581,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -624,7 +624,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -670,7 +670,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -712,7 +712,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -755,7 +755,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -797,7 +797,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -46,7 +46,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -92,7 +92,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -163,7 +163,7 @@
"y": 9
},
"id": 3,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -328,7 +328,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -374,7 +374,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -420,7 +420,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -466,7 +466,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -512,7 +512,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -558,7 +558,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -604,7 +604,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -650,7 +650,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -696,7 +696,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -742,7 +742,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -35,7 +35,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -86,7 +86,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -137,7 +137,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -188,7 +188,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -239,7 +239,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -314,15 +314,15 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))",
"legendFormat": "{{verb}}"
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"legendFormat": "{{verb}} {{url}}"
}
],
"title": "Post Request Latency 99th Quantile",
@ -365,15 +365,15 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
"legendFormat": "{{verb}}"
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"legendFormat": "{{verb}} {{url}}"
}
],
"title": "Get Request Latency 99th Quantile",
@ -416,7 +416,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -467,7 +467,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -518,7 +518,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -35,7 +35,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -70,7 +70,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -105,7 +105,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -140,7 +140,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -175,7 +175,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -210,7 +210,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -260,7 +260,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -319,7 +319,7 @@
"y": 12
},
"id": 8,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -476,7 +476,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -571,7 +571,7 @@
"y": 24
},
"id": 10,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -748,7 +748,7 @@
"y": 30
},
"id": 11,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -892,7 +892,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -943,7 +943,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -994,7 +994,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1045,7 +1045,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1096,7 +1096,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1147,7 +1147,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1198,7 +1198,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1249,7 +1249,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1300,7 +1300,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1351,7 +1351,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1422,7 +1422,7 @@
"y": 96
},
"id": 22,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -35,7 +35,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -70,7 +70,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -105,7 +105,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -140,7 +140,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -242,7 +242,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -317,7 +317,7 @@
"y": 14
},
"id": 6,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -500,7 +500,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -578,7 +578,7 @@
"y": 28
},
"id": 8,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -768,7 +768,7 @@
"y": 35
},
"id": 9,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -912,7 +912,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -963,7 +963,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1014,7 +1014,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1065,7 +1065,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1116,7 +1116,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1167,7 +1167,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1218,7 +1218,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1269,7 +1269,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1340,7 +1340,7 @@
"y": 70
},
"id": 18,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -94,7 +94,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -161,7 +161,7 @@
"y": 6
},
"id": 2,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -323,7 +323,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -342,109 +342,6 @@
"legendFormat": "{{pod}}"
}
],
"title": "Memory Usage (w/cache)",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true,
"stacking": {
"mode": "normal"
}
},
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "max capacity"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
},
{
"id": "custom.stacking",
"value": {
"mode": "none"
}
},
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": true,
"viz": false
}
},
{
"id": "custom.lineStyle",
"value": {
"dash": [
10,
10
],
"fill": "dash"
}
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 18
},
"id": 4,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})",
"legendFormat": "max capacity"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
"legendFormat": "{{pod}}"
}
],
"title": "Memory Usage (w/o cache)",
"type": "timeseries"
},
@ -493,10 +390,10 @@
"h": 6,
"w": 24,
"x": 0,
"y": 24
"y": 18
},
"id": 5,
"pluginVersion": "v11.4.0",
"id": 4,
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -102,7 +102,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -208,7 +208,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -250,7 +250,7 @@
"y": 14
},
"id": 3,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -433,7 +433,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -494,7 +494,7 @@
"y": 28
},
"id": 5,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -664,7 +664,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -715,7 +715,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -766,7 +766,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -817,7 +817,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -868,7 +868,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -919,7 +919,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -970,7 +970,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1029,7 +1029,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1088,7 +1088,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1139,7 +1139,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1193,7 +1193,7 @@
"y": 70
},
"id": 16,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -1,675 +0,0 @@
{
"editable": false,
"panels": [
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "none"
}
},
"gridPos": {
"h": 3,
"w": 4,
"x": 0,
"y": 0
},
"id": 1,
"interval": "1m",
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "1 - avg(rate(windows_cpu_time_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", mode=\"idle\"}[$__rate_interval]))",
"instant": true
}
],
"title": "CPU Utilisation",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "percentunit"
}
},
"gridPos": {
"h": 3,
"w": 4,
"x": 4,
"y": 0
},
"id": 2,
"interval": "1m",
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})",
"instant": true
}
],
"title": "CPU Requests Commitment",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "percentunit"
}
},
"gridPos": {
"h": 3,
"w": 4,
"x": 8,
"y": 0
},
"id": 3,
"interval": "1m",
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})",
"instant": true
}
],
"title": "CPU Limits Commitment",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "percentunit"
}
},
"gridPos": {
"h": 3,
"w": 4,
"x": 12,
"y": 0
},
"id": 4,
"interval": "1m",
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
"instant": true
}
],
"title": "Memory Utilisation",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "percentunit"
}
},
"gridPos": {
"h": 3,
"w": 4,
"x": 16,
"y": 0
},
"id": 5,
"interval": "1m",
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
"instant": true
}
],
"title": "Memory Requests Commitment",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "percentunit"
}
},
"gridPos": {
"h": 3,
"w": 4,
"x": 20,
"y": 0
},
"id": 6,
"interval": "1m",
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
"instant": true
}
],
"title": "Memory Limits Commitment",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
}
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 7
},
"id": 7,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
"legendFormat": "__auto"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/%/"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Namespace"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Drill down to pods",
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
}
]
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 14
},
"id": 8,
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
}
],
"title": "CPU Quota",
"transformations": [
{
"id": "joinByField",
"options": {
"byField": "namespace",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Time 1": true,
"Time 2": true,
"Time 3": true,
"Time 4": true,
"Time 5": true
},
"indexByName": {
"Time 1": 0,
"Time 2": 1,
"Time 3": 2,
"Time 4": 3,
"Time 5": 4,
"Value #A": 6,
"Value #B": 7,
"Value #C": 8,
"Value #D": 9,
"Value #E": 10,
"namespace": 5
},
"renameByName": {
"Value #A": "CPU Usage",
"Value #B": "CPU Requests",
"Value #C": "CPU Requests %",
"Value #D": "CPU Limits",
"Value #E": "CPU Limits %",
"namespace": "Namespace"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "decbytes"
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 21
},
"id": 9,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)",
"legendFormat": "__auto"
}
],
"title": "Memory Usage (Private Working Set)",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/%/"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Memory Usage"
},
"properties": [
{
"id": "unit",
"value": "decbytes"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Memory Requests"
},
"properties": [
{
"id": "unit",
"value": "decbytes"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Memory Limits"
},
"properties": [
{
"id": "unit",
"value": "decbytes"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Namespace"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Drill down to pods",
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
}
]
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 28
},
"id": 10,
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true
}
],
"title": "Memory Requests by Namespace",
"transformations": [
{
"id": "joinByField",
"options": {
"byField": "namespace",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Time 1": true,
"Time 2": true,
"Time 3": true,
"Time 4": true,
"Time 5": true
},
"indexByName": {
"Time 1": 0,
"Time 2": 1,
"Time 3": 2,
"Time 4": 3,
"Time 5": 4,
"Value #A": 6,
"Value #B": 7,
"Value #C": 8,
"Value #D": 9,
"Value #E": 10,
"namespace": 5
},
"renameByName": {
"Value #A": "Memory Usage",
"Value #B": "Memory Requests",
"Value #C": "Memory Requests %",
"Value #D": "Memory Limits",
"Value #E": "Memory Limits %",
"namespace": "Namespace"
}
}
}
],
"type": "table"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"kubernetes"
],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data source",
"name": "datasource",
"query": "prometheus",
"regex": "",
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 2,
"label": "cluster",
"name": "cluster",
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timezone": "utc",
"title": "Compute Resources / Cluster(Windows)",
"uid": "4d08557fd9391b100730f2494bccac68"
}

View File

@ -1,442 +0,0 @@
{
"editable": false,
"panels": [
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
}
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"legendFormat": "__auto"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/%/"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Pod"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Drill down to pods",
"url": "/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"
}
]
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 7
},
"id": 2,
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
}
],
"title": "CPU Quota",
"transformations": [
{
"id": "joinByField",
"options": {
"byField": "pod",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Time 1": true,
"Time 2": true,
"Time 3": true,
"Time 4": true,
"Time 5": true
},
"indexByName": {
"Time 1": 0,
"Time 2": 1,
"Time 3": 2,
"Time 4": 3,
"Time 5": 4,
"Value #A": 6,
"Value #B": 7,
"Value #C": 8,
"Value #D": 9,
"Value #E": 10,
"pod": 5
},
"renameByName": {
"Value #A": "CPU Usage",
"Value #B": "CPU Requests",
"Value #C": "CPU Requests %",
"Value #D": "CPU Limits",
"Value #E": "CPU Limits %",
"pod": "Pod"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "decbytes"
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 14
},
"id": 3,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"legendFormat": "__auto"
}
],
"title": "Memory Usage (Private Working Set)",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/%/"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Pod"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Drill down to pods",
"url": "/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"
}
]
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 21
},
"id": 4,
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true
}
],
"title": "Memory Quota",
"transformations": [
{
"id": "joinByField",
"options": {
"byField": "pod",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Time 1": true,
"Time 2": true,
"Time 3": true,
"Time 4": true,
"Time 5": true
},
"indexByName": {
"Time 1": 0,
"Time 2": 1,
"Time 3": 2,
"Time 4": 3,
"Time 5": 4,
"Value #A": 6,
"Value #B": 7,
"Value #C": 8,
"Value #D": 9,
"Value #E": 10,
"pod": 5
},
"renameByName": {
"Value #A": "Memory Usage",
"Value #B": "Memory Requests",
"Value #C": "Memory Requests %",
"Value #D": "Memory Limits",
"Value #E": "Memory Limits %",
"pod": "Pod"
}
}
}
],
"type": "table"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"kubernetes"
],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data source",
"name": "datasource",
"query": "prometheus",
"regex": "",
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 2,
"label": "cluster",
"name": "cluster",
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 0,
"label": "namespace",
"name": "namespace",
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timezone": "utc",
"title": "Compute Resources / Namespace(Windows)",
"uid": "490b402361724ab1d4c45666c1fa9b6f"
}

View File

@ -1,497 +0,0 @@
{
"editable": false,
"panels": [
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
}
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"legendFormat": "__auto"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/%/"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Namespace"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Drill down to pods",
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
}
]
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 7
},
"id": 2,
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
}
],
"title": "CPU Quota",
"transformations": [
{
"id": "joinByField",
"options": {
"byField": "container",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Time 1": true,
"Time 2": true,
"Time 3": true,
"Time 4": true,
"Time 5": true
},
"indexByName": {
"Time 1": 0,
"Time 2": 1,
"Time 3": 2,
"Time 4": 3,
"Time 5": 4,
"Value #A": 6,
"Value #B": 7,
"Value #C": 8,
"Value #D": 9,
"Value #E": 10,
"container": 5
},
"renameByName": {
"Value #A": "CPU Usage",
"Value #B": "CPU Requests",
"Value #C": "CPU Requests %",
"Value #D": "CPU Limits",
"Value #E": "CPU Limits %",
"container": "Container"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "decbytes"
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 14
},
"id": 3,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"legendFormat": "__auto"
}
],
"title": "Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/%/"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 21
},
"id": 4,
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true
}
],
"title": "Memory Quota",
"transformations": [
{
"id": "joinByField",
"options": {
"byField": "container",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Time 1": true,
"Time 2": true,
"Time 3": true,
"Time 4": true,
"Time 5": true
},
"indexByName": {
"Time 1": 0,
"Time 2": 1,
"Time 3": 2,
"Time 4": 3,
"Time 5": 4,
"Value #A": 6,
"Value #B": 7,
"Value #C": 8,
"Value #D": 9,
"Value #E": 10,
"container": 5
},
"renameByName": {
"Value #A": "Memory Usage",
"Value #B": "Memory Requests",
"Value #C": "Memory Requests %",
"Value #D": "Memory Limits",
"Value #E": "Memory Limits %",
"container": "Container"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "bytes"
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 28
},
"id": 5,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
"legendFormat": "Received : {{ container }}"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
"legendFormat": "Transmitted : {{ container }}"
}
],
"title": "Network I/O",
"type": "timeseries"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"kubernetes"
],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data source",
"name": "datasource",
"query": "prometheus",
"regex": "",
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 2,
"label": "cluster",
"name": "cluster",
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 0,
"label": "namespace",
"name": "namespace",
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 0,
"label": "pod",
"name": "pod",
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\",namespace=\"$namespace\"}, pod)",
"refresh": 2,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timezone": "utc",
"title": "Compute Resources / Pod(Windows)",
"uid": "40597a704a610e936dc6ed374a7ce023"
}

View File

@ -50,7 +50,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -109,7 +109,7 @@
"y": 7
},
"id": 2,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -240,7 +240,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -302,7 +302,7 @@
"y": 21
},
"id": 4,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -453,7 +453,7 @@
"y": 28
},
"id": 5,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -597,7 +597,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -648,7 +648,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -699,7 +699,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -750,7 +750,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -801,7 +801,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -852,7 +852,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -903,7 +903,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -954,7 +954,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -102,7 +102,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -189,7 +189,7 @@
"y": 7
},
"id": 2,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -397,7 +397,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -487,7 +487,7 @@
"y": 21
},
"id": 4,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -663,7 +663,7 @@
"y": 28
},
"id": 5,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -807,7 +807,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -858,7 +858,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -909,7 +909,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -960,7 +960,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1011,7 +1011,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1062,7 +1062,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1113,7 +1113,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1164,7 +1164,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -1,404 +0,0 @@
{
"editable": false,
"panels": [
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\"} * node:windows_node_num_cpu:sum{cluster=\"$cluster\"} / scalar(sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"}))",
"legendFormat": "{{instance}}"
}
],
"title": "CPU Utilisation",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 7
},
"id": 2,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_memory_utilisation:ratio{cluster=\"$cluster\"}",
"legendFormat": "{{instance}}"
}
],
"title": "Memory Utilisation",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "short"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 7
},
"id": 3,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\"}",
"legendFormat": "{{instance}}"
}
],
"title": "Memory Saturation (Swap I/O Pages)",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 14
},
"id": 4,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(node:windows_node:sum{cluster=\"$cluster\"})",
"legendFormat": "{{instance}}"
}
],
"title": "Disk IO Utilisation",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "Bps"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 21
},
"id": 5,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\"}",
"legendFormat": "{{instance}}"
}
],
"title": "Net Utilisation (Transmitted)",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "Bps"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 21
},
"id": 6,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\"}",
"legendFormat": "{{instance}}"
}
],
"title": "Net Utilisation (Dropped)",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 28
},
"id": 7,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum by (instance)(node:windows_node_filesystem_usage:{cluster=\"$cluster\"})",
"legendFormat": "{{instance}}"
}
],
"title": "Disk Capacity",
"type": "timeseries"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"kubernetes"
],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data source",
"name": "datasource",
"query": "prometheus",
"regex": "",
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 2,
"label": "cluster",
"name": "cluster",
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timezone": "utc",
"title": "USE Method / Cluster(Windows)",
"uid": "53a43377ec9aaf2ff64dfc7a1f539334"
}

View File

@ -1,615 +0,0 @@
{
"editable": false,
"panels": [
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\", instance=\"$instance\"}",
"legendFormat": "Utilisation"
}
],
"title": "CPU Utilisation",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "sum by (core) (irate(windows_cpu_time_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", mode!=\"idle\", instance=\"$instance\"}[$__rate_interval]))",
"legendFormat": "{{core}}"
}
],
"title": "CPU Usage Per Core",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 7
},
"id": 3,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_memory_utilisation:{cluster=\"$cluster\", instance=\"$instance\"}",
"legendFormat": "Memory"
}
],
"title": "Memory Utilisation %",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "bytes"
}
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 7
},
"id": 4,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max(\n windows_os_visible_memory_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}\n - windows_memory_available_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}\n)\n",
"legendFormat": "memory used"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max(node:windows_node_memory_totalCached_bytes:sum{cluster=\"$cluster\", instance=\"$instance\"})",
"legendFormat": "memory cached"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max(windows_memory_available_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"})",
"legendFormat": "memory free"
}
],
"title": "Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "short"
}
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 7
},
"id": 5,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\", instance=\"$instance\"}",
"legendFormat": "Swap IO"
}
],
"title": "Memory Saturation (Swap I/O) Pages",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 14
},
"id": 6,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\", instance=\"$instance\"}",
"legendFormat": "Utilisation"
}
],
"title": "Disk IO Utilisation",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/io time/"
},
"properties": [
{
"id": "unit",
"value": "ms"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 14
},
"id": 7,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max(rate(windows_logical_disk_read_bytes_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
"legendFormat": "read"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max(rate(windows_logical_disk_write_bytes_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
"legendFormat": "written"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "max(rate(windows_logical_disk_read_seconds_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]) + rate(windows_logical_disk_write_seconds_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
"legendFormat": "io time"
}
],
"title": "Disk IO",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 21
},
"id": 8,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}",
"legendFormat": "{{volume}}"
}
],
"title": "Disk Utilisation",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "Bps"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 28
},
"id": 9,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}",
"legendFormat": "Utilisation"
}
],
"title": "Net Utilisation (Transmitted)",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"showPoints": "never",
"spanNulls": true
},
"unit": "Bps"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 28
},
"id": 10,
"interval": "1m",
"options": {
"legend": {
"asTable": true,
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}",
"legendFormat": "Saturation"
}
],
"title": "Net Saturation (Dropped)",
"type": "timeseries"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [
"kubernetes"
],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data source",
"name": "datasource",
"query": "prometheus",
"regex": "",
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 2,
"label": "cluster",
"name": "cluster",
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"hide": 0,
"label": "instance",
"name": "instance",
"query": "label_values(windows_system_system_up_time{cluster=\"$cluster\"}, instance)",
"refresh": 2,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timezone": "utc",
"title": "USE Method / Node(Windows)",
"uid": "96e7484b0bb53b74fbc2bcb7723cd40b"
}

View File

@ -35,7 +35,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -70,7 +70,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -105,7 +105,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -140,7 +140,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -175,7 +175,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -210,7 +210,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -261,7 +261,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -312,7 +312,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -363,7 +363,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -414,7 +414,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -473,7 +473,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -532,7 +532,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -583,7 +583,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -634,7 +634,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -685,7 +685,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -736,7 +736,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -787,7 +787,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -838,7 +838,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -889,7 +889,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -940,7 +940,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1015,15 +1015,15 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, le))",
"legendFormat": "{{instance}} {{verb}}"
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
"legendFormat": "{{instance}} {{verb}} {{url}}"
}
],
"title": "Request duration 99th quantile",
@ -1066,7 +1066,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1117,7 +1117,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -1168,7 +1168,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -53,7 +53,7 @@
"y": 0
},
"id": 1,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -106,7 +106,7 @@
"y": 0
},
"id": 2,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -177,7 +177,7 @@
"y": 9
},
"id": 3,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -316,7 +316,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -362,7 +362,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -408,7 +408,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -454,7 +454,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -500,7 +500,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -546,7 +546,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -39,7 +39,7 @@
"displayMode": "basic",
"showUnfilled": false
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -78,7 +78,7 @@
"displayMode": "basic",
"showUnfilled": false
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -149,7 +149,7 @@
"y": 9
},
"id": 3,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -335,7 +335,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -386,7 +386,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -437,7 +437,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -488,7 +488,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -539,7 +539,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -590,7 +590,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -641,7 +641,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -692,7 +692,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -50,7 +50,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -112,7 +112,7 @@
},
"id": 2,
"interval": "1m",
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -162,7 +162,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -224,7 +224,7 @@
},
"id": 4,
"interval": "1m",
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -53,7 +53,7 @@
"y": 0
},
"id": 1,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -106,7 +106,7 @@
"y": 0
},
"id": 2,
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -152,7 +152,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -198,7 +198,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -244,7 +244,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -290,7 +290,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -336,7 +336,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -382,7 +382,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -35,7 +35,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -86,7 +86,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -137,7 +137,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -188,7 +188,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -239,7 +239,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -290,7 +290,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -365,15 +365,15 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, le))",
"legendFormat": "{{verb}}"
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"legendFormat": "{{verb}} {{url}}"
}
],
"title": "Post Request Latency 99th Quantile",
@ -416,15 +416,15 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
"legendFormat": "{{verb}}"
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"legendFormat": "{{verb}} {{url}}"
}
],
"title": "Get Request Latency 99th Quantile",
@ -467,7 +467,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -518,7 +518,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -569,7 +569,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -35,7 +35,7 @@
"options": {
"colorMode": "none"
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -86,7 +86,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -161,7 +161,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -236,7 +236,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -311,15 +311,15 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))",
"legendFormat": "{{verb}}"
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"legendFormat": "{{verb}} {{url}}"
}
],
"title": "Post Request Latency 99th Quantile",
@ -362,15 +362,15 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
"legendFormat": "{{verb}}"
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"legendFormat": "{{verb}} {{url}}"
}
],
"title": "Get Request Latency 99th Quantile",
@ -413,7 +413,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -464,7 +464,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -515,7 +515,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -39,7 +39,7 @@
"displayMode": "basic",
"showUnfilled": false
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -78,7 +78,7 @@
"displayMode": "basic",
"showUnfilled": false
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -117,7 +117,7 @@
"displayMode": "basic",
"showUnfilled": false
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -156,7 +156,7 @@
"displayMode": "basic",
"showUnfilled": false
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -207,7 +207,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -258,7 +258,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -309,7 +309,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -360,7 +360,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -411,7 +411,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {
@ -462,7 +462,7 @@
"mode": "single"
}
},
"pluginVersion": "v11.4.0",
"pluginVersion": "v11.1.0",
"targets": [
{
"datasource": {

View File

@ -18,8 +18,8 @@
"subdir": "contrib/mixin"
}
},
"version": "f30cbaac111aa01a310fe75360c759cdd4d9cd14",
"sum": "XmXkOCriQIZmXwlIIFhqlJMa0e6qGWdxZD+ZDYaN0Po="
"version": "2b323071a8bd4f02ddaf63e0dfa1fd98c221dccb",
"sum": "IXI3LQIT9NmTPJAk8WLUJd5+qZfcGpeNCyWIK7oEpws="
},
{
"source": {
@ -41,6 +41,16 @@
"version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
"sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet-7.0"
}
},
"version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
"sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
},
{
"source": {
"git": {
@ -48,8 +58,8 @@
"subdir": "gen/grafonnet-latest"
}
},
"version": "d20e609202733790caf5b554c9945d049f243ae3",
"sum": "V9vAj21qJOc2DlMPDgB1eEjSQU4A+sAA4AXuJ6bd4xc="
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
"sum": "64fMUPI3frXGj4X1FqFd1t7r04w3CUSmXaDcJ23EYbQ="
},
{
"source": {
@ -58,18 +68,18 @@
"subdir": "gen/grafonnet-v10.0.0"
}
},
"version": "d20e609202733790caf5b554c9945d049f243ae3",
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
"sum": "xdcrJPJlpkq4+5LpGwN4tPAuheNNLXZjE6tDcyvFjr0="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v11.4.0"
"subdir": "gen/grafonnet-v11.1.0"
}
},
"version": "d20e609202733790caf5b554c9945d049f243ae3",
"sum": "aVAX09paQYNOoCSKVpuk1exVIyBoMt/C50QJI+Q/3nA="
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
"sum": "41w7p/rwrNsITqNHMXtGSJAfAyKmnflg6rFhKBduUxM="
},
{
"source": {
@ -78,7 +88,7 @@
"subdir": "grafana-builder"
}
},
"version": "d6c38bb26f576b128cadca4137d73a037afdd872",
"version": "767befa8fb46a07be516dec2777d7d89909a529d",
"sum": "yxqWcq/N3E/a/XreeU6EuE6X7kYPnG0AspAQFKOjASo="
},
{
@ -98,8 +108,8 @@
"subdir": ""
}
},
"version": "1199b50e9d2ff53d4bb5fb2304ad1fb69d38e609",
"sum": "LfbgcJbilu4uBdKYZSvmkoOTPwEAzg10L3/VqKAIWtA="
"version": "63d430b69a95741061c2f7fc9d84b1a778511d9c",
"sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE="
},
{
"source": {
@ -108,8 +118,8 @@
"subdir": ""
}
},
"version": "e27267571be06c2bdc3d2fd8dbd70161cd709cb4",
"sum": "je1RPCp2aFNefYs5Q57Q5wDm93p8pL4pdBtA5rC7jLA="
"version": "a3fbf21977deb89b7d843eb8371170c011ea6835",
"sum": "57zW2IGJ9zbYd8BI0qe6JkoWTRSMNiBUWC6+YcnEsWo="
},
{
"source": {
@ -118,8 +128,8 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "2a95d4649b2fea55799032fb9c0b571c4ba7f776",
"sum": "3bioG7CfTfY9zeu5xU4yon6Zt3kYvNkyl492nOhQxnM="
"version": "32e7727ff4613b0f55dfc18aff15afb8c04d03c5",
"sum": "lO7jUSzAIy8Yk9pOWJIWgPRhubkWzVh56W6wtYfbVH4="
},
{
"source": {
@ -128,7 +138,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "2a95d4649b2fea55799032fb9c0b571c4ba7f776",
"version": "32e7727ff4613b0f55dfc18aff15afb8c04d03c5",
"sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c="
},
{
@ -138,8 +148,8 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "1eea946a1532f1e8cccfceea98d907bf3a10b1d9",
"sum": "17LhiwefVfoNDsF3DcFZw/UL4PMU7YpNNUaOdaYd1gE="
"version": "7e5a571a3fb735c78e17c76a637eb7e8bb5dd086",
"sum": "uTw/Mj+X91S+oqUpAX81xcfWPDlox0tdSZY/YBw7nGE="
},
{
"source": {
@ -148,7 +158,7 @@
"subdir": "jsonnet/mixin"
}
},
"version": "465bcbaf2a727c942e7f923aacfb9dff9af8d4a1",
"version": "a366602bacb2c8d773a9cee058b6971b8d2e3732",
"sum": "gi+knjdxs2T715iIQIntrimbHRgHnpM8IFBJDD1gYfs=",
"name": "prometheus-operator-mixin"
},
@ -159,8 +169,8 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "465bcbaf2a727c942e7f923aacfb9dff9af8d4a1",
"sum": "LctDdofQostvviE5y8vpRKWGGO1ZKO3dgJe7P9xifW0="
"version": "a366602bacb2c8d773a9cee058b6971b8d2e3732",
"sum": "z0/lCiMusMHTqntsosMVGYkVcSZjCpyZBmUMVUsK5nA="
},
{
"source": {
@ -169,7 +179,7 @@
"subdir": "doc/alertmanager-mixin"
}
},
"version": "b5d1a64ad5bb0ff879705714d1e40cea82efbd5c",
"version": "0f65e8fa5fc72d2678655105c0213b416ca6f34c",
"sum": "Mf4h1BYLle2nrgjf/HXrBbl0Zk8N+xaoEM017o0BC+k=",
"name": "alertmanager"
},
@ -180,8 +190,8 @@
"subdir": "docs/node-mixin"
}
},
"version": "11365f97bef6cb0e6259d536a7e21c49e3f5c065",
"sum": "xYj6VYFT/eafsbleNlC+Z2VfLy1CndyYrJs9BcTmnX8="
"version": "cf8c6891cc610e54f70383addd4bb6079f0add35",
"sum": "cQCW+1N0Xae5yXecCWDK2oAlN0luBS/5GrwBYSlaFms="
},
{
"source": {
@ -190,8 +200,8 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "509b978f0d675b4c9b3ccf8c0fc06961b0f03e8f",
"sum": "2c+wttfee9TwuQJZIkNV7Tekem74Qgc7iZ842P28rNw=",
"version": "b407c2930da4f50c0d17fc39404c6302a9eb740b",
"sum": "OYT5u3S8DbamuJV/v3gbWSteOvFzMeNwMj+u4Apk7jM=",
"name": "prometheus"
},
{
@ -212,7 +222,7 @@
"subdir": "mixin"
}
},
"version": "346d18bb0f8011c63d7106de494cf3b9253161a1",
"version": "7037331e6ea7dbe85a1b7af37bf8ea277a80663d",
"sum": "ieCD4eMgGbOlrI8GmckGPHBGQDcLasE1rULYq56W/bs=",
"name": "thanos-mixin"
}

View File

@ -6,5 +6,5 @@ dashboards:
url: https://grafana.com/api/dashboards/9578/revisions/4/download
tags: []
- name: Prometheus
url: https://grafana.com/api/dashboards/19105/revisions/7/download
url: https://grafana.com/api/dashboards/3662/revisions/2/download
tags: []

View File

@ -7,7 +7,7 @@
"app.kubernetes.io/instance": "main",
"app.kubernetes.io/name": "alertmanager",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.28.0",
"app.kubernetes.io/version": "0.27.0",
"prometheus": "k8s",
"role": "alert-rules"
},

View File

@ -20,9 +20,9 @@
"summary": "etcd cluster members are down."
},
"expr": "max without (endpoint) (\n sum without (instance, pod) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count without (To) (\n sum without (instance, pod) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s])) > 0.01\n )\n)\n> 0\n",
"for": "20m",
"for": "10m",
"labels": {
"severity": "warning"
"severity": "critical"
}
},
{

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "exporter",
"app.kubernetes.io/name": "kube-state-metrics",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "2.15.0",
"app.kubernetes.io/version": "2.14.0",
"prometheus": "k8s",
"role": "alert-rules"
},

View File

@ -19,7 +19,7 @@
{
"alert": "KubePodCrashLooping",
"annotations": {
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: \"CrashLoopBackOff\") on cluster {{ $labels.cluster }}.",
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: \"CrashLoopBackOff\").",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping",
"summary": "Pod is crash looping."
},
@ -32,7 +32,7 @@
{
"alert": "KubePodNotReady",
"annotations": {
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes on cluster {{ $labels.cluster }}.",
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready",
"summary": "Pod has been in a non-ready state for more than 15 minutes."
},
@ -45,7 +45,7 @@
{
"alert": "KubeDeploymentGenerationMismatch",
"annotations": {
"description": "Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{ $labels.cluster }}.",
"description": "Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch",
"summary": "Deployment generation mismatch due to possible roll-back"
},
@ -58,7 +58,7 @@
{
"alert": "KubeDeploymentReplicasMismatch",
"annotations": {
"description": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
"description": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch",
"summary": "Deployment has not matched the expected number of replicas."
},
@ -71,7 +71,7 @@
{
"alert": "KubeDeploymentRolloutStuck",
"annotations": {
"description": "Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes on cluster {{ $labels.cluster }}.",
"description": "Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck",
"summary": "Deployment rollout is not progressing."
},
@ -84,7 +84,7 @@
{
"alert": "KubeStatefulSetReplicasMismatch",
"annotations": {
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch",
"summary": "StatefulSet has not matched the expected number of replicas."
},
@ -97,7 +97,7 @@
{
"alert": "KubeStatefulSetGenerationMismatch",
"annotations": {
"description": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{ $labels.cluster }}.",
"description": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch",
"summary": "StatefulSet generation mismatch due to possible roll-back"
},
@ -110,7 +110,7 @@
{
"alert": "KubeStatefulSetUpdateNotRolledOut",
"annotations": {
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out on cluster {{ $labels.cluster }}.",
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout",
"summary": "StatefulSet update has not been rolled out."
},
@ -123,7 +123,7 @@
{
"alert": "KubeDaemonSetRolloutStuck",
"annotations": {
"description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m on cluster {{ $labels.cluster }}.",
"description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck",
"summary": "DaemonSet rollout is stuck."
},
@ -136,7 +136,7 @@
{
"alert": "KubeContainerWaiting",
"annotations": {
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: \"{{ $labels.reason }}\") on cluster {{ $labels.cluster }}.",
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: \"{{ $labels.reason }}\").",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting",
"summary": "Pod container waiting longer than 1 hour"
},
@ -149,7 +149,7 @@
{
"alert": "KubeDaemonSetNotScheduled",
"annotations": {
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled on cluster {{ $labels.cluster }}.",
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled",
"summary": "DaemonSet pods are not scheduled."
},
@ -162,7 +162,7 @@
{
"alert": "KubeDaemonSetMisScheduled",
"annotations": {
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run on cluster {{ $labels.cluster }}.",
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled",
"summary": "DaemonSet pods are misscheduled."
},
@ -175,7 +175,7 @@
{
"alert": "KubeJobNotCompleted",
"annotations": {
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete on cluster {{ $labels.cluster }}.",
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted",
"summary": "Job did not complete in time"
},
@ -187,7 +187,7 @@
{
"alert": "KubeJobFailed",
"annotations": {
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert on cluster {{ $labels.cluster }}.",
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed",
"summary": "Job failed to complete."
},
@ -200,7 +200,7 @@
{
"alert": "KubeHpaReplicasMismatch",
"annotations": {
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch",
"summary": "HPA has not matched desired number of replicas."
},
@ -213,7 +213,7 @@
{
"alert": "KubeHpaMaxedOut",
"annotations": {
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout",
"summary": "HPA is running at max replicas"
},
@ -257,7 +257,7 @@
{
"alert": "KubeQuotaAlmostFull",
"annotations": {
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull",
"summary": "Namespace quota is going to be full."
},
@ -270,7 +270,7 @@
{
"alert": "KubeQuotaFullyUsed",
"annotations": {
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused",
"summary": "Namespace quota is fully used."
},
@ -283,7 +283,7 @@
{
"alert": "KubeQuotaExceeded",
"annotations": {
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded",
"summary": "Namespace quota has exceeded the limits."
},
@ -296,7 +296,7 @@
{
"alert": "CPUThrottlingHigh",
"annotations": {
"description": "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }} on cluster {{ $labels.cluster }}.",
"description": "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh",
"summary": "Processes experience elevated CPU throttling."
},
@ -396,7 +396,7 @@
{
"alert": "KubeVersionMismatch",
"annotations": {
"description": "There are {{ $value }} different semantic versions of Kubernetes components running on cluster {{ $labels.cluster }}.",
"description": "There are {{ $value }} different semantic versions of Kubernetes components running.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch",
"summary": "Different semantic versions of Kubernetes components running."
},
@ -409,7 +409,7 @@
{
"alert": "KubeClientErrors",
"annotations": {
"description": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors on cluster {{ $labels.cluster }}.",
"description": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors",
"summary": "Kubernetes API server client is experiencing errors."
},
@ -427,7 +427,7 @@
{
"alert": "KubeAPIErrorBudgetBurn",
"annotations": {
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
"description": "The API server is burning too much error budget.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
"summary": "The API server is burning too much error budget."
},
@ -442,7 +442,7 @@
{
"alert": "KubeAPIErrorBudgetBurn",
"annotations": {
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
"description": "The API server is burning too much error budget.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
"summary": "The API server is burning too much error budget."
},
@ -457,7 +457,7 @@
{
"alert": "KubeAPIErrorBudgetBurn",
"annotations": {
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
"description": "The API server is burning too much error budget.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
"summary": "The API server is burning too much error budget."
},
@ -472,7 +472,7 @@
{
"alert": "KubeAPIErrorBudgetBurn",
"annotations": {
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
"description": "The API server is burning too much error budget.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
"summary": "The API server is burning too much error budget."
},
@ -518,12 +518,11 @@
{
"alert": "KubeAggregatedAPIErrors",
"annotations": {
"description": "Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster }}.",
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors",
"summary": "Kubernetes aggregated API has reported errors."
},
"expr": "sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job=\"apiserver\"}[1m])) > 0\n",
"for": "10m",
"expr": "sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job=\"apiserver\"}[10m])) > 4\n",
"labels": {
"severity": "warning"
}
@ -531,7 +530,7 @@
{
"alert": "KubeAggregatedAPIDown",
"annotations": {
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m on cluster {{ $labels.cluster }}.",
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown",
"summary": "Kubernetes aggregated API is down."
},
@ -557,7 +556,7 @@
{
"alert": "KubeAPITerminatedRequests",
"annotations": {
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests on cluster {{ $labels.cluster }}.",
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests",
"summary": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
},
@ -575,11 +574,11 @@
{
"alert": "KubeNodeNotReady",
"annotations": {
"description": "{{ $labels.node }} has been unready for more than 15 minutes on cluster {{ $labels.cluster }}.",
"description": "{{ $labels.node }} has been unready for more than 15 minutes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready",
"summary": "Node is not ready."
},
"expr": "kube_node_status_condition{job=\"kube-state-metrics\",condition=\"Ready\",status=\"true\"} == 0\nand on (cluster, node)\nkube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0\n",
"expr": "kube_node_status_condition{job=\"kube-state-metrics\",condition=\"Ready\",status=\"true\"} == 0\n",
"for": "15m",
"labels": {
"severity": "warning"
@ -588,7 +587,7 @@
{
"alert": "KubeNodeUnreachable",
"annotations": {
"description": "{{ $labels.node }} is unreachable and some workloads may be rescheduled on cluster {{ $labels.cluster }}.",
"description": "{{ $labels.node }} is unreachable and some workloads may be rescheduled.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable",
"summary": "Node is unreachable."
},
@ -601,11 +600,11 @@
{
"alert": "KubeletTooManyPods",
"annotations": {
"description": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity on cluster {{ $labels.cluster }}.",
"description": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods",
"summary": "Kubelet is running at capacity."
},
"expr": "(\n max by (cluster, instance) (\n kubelet_running_pods{job=\"kubelet\", metrics_path=\"/metrics\"} > 1\n )\n * on (cluster, instance) group_left(node)\n max by (cluster, instance, node) (\n kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"}\n )\n)\n/ on (cluster, node) group_left()\nmax by (cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"pods\"} != 1\n) > 0.95\n",
"expr": "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n",
"for": "15m",
"labels": {
"severity": "info"
@ -614,11 +613,11 @@
{
"alert": "KubeNodeReadinessFlapping",
"annotations": {
"description": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes on cluster {{ $labels.cluster }}.",
"description": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping",
"summary": "Node readiness status is flapping."
},
"expr": "sum(changes(kube_node_status_condition{job=\"kube-state-metrics\",status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2\nand on (cluster, node)\nkube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0\n",
"expr": "sum(changes(kube_node_status_condition{job=\"kube-state-metrics\",status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2\n",
"for": "15m",
"labels": {
"severity": "warning"
@ -627,7 +626,7 @@
{
"alert": "KubeletPlegDurationHigh",
"annotations": {
"description": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}.",
"description": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh",
"summary": "Kubelet Pod Lifecycle Event Generator is taking too long to relist."
},
@ -640,7 +639,7 @@
{
"alert": "KubeletPodStartUpLatencyHigh",
"annotations": {
"description": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}.",
"description": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh",
"summary": "Kubelet Pod startup latency is too high."
},
@ -653,7 +652,7 @@
{
"alert": "KubeletClientCertificateExpiration",
"annotations": {
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration",
"summary": "Kubelet client certificate is about to expire."
},
@ -665,7 +664,7 @@
{
"alert": "KubeletClientCertificateExpiration",
"annotations": {
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration",
"summary": "Kubelet client certificate is about to expire."
},
@ -677,7 +676,7 @@
{
"alert": "KubeletServerCertificateExpiration",
"annotations": {
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration",
"summary": "Kubelet server certificate is about to expire."
},
@ -689,7 +688,7 @@
{
"alert": "KubeletServerCertificateExpiration",
"annotations": {
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration",
"summary": "Kubelet server certificate is about to expire."
},
@ -701,7 +700,7 @@
{
"alert": "KubeletClientCertificateRenewalErrors",
"annotations": {
"description": "Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}.",
"description": "Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes).",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors",
"summary": "Kubelet has failed to renew its client certificate."
},
@ -714,7 +713,7 @@
{
"alert": "KubeletServerCertificateRenewalErrors",
"annotations": {
"description": "Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}.",
"description": "Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors",
"summary": "Kubelet has failed to renew its server certificate."
},
@ -810,25 +809,25 @@
"record": "cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h"
},
{
"expr": "sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le=\"+Inf\"})\n",
"expr": "sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le=\"+Inf\"} * 24 * 30)\n",
"record": "cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d"
},
{
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"labels": {
"verb": "all"
},
"record": "apiserver_request:availability30d"
},
{
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:availability30d"
},
{
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
"labels": {
"verb": "write"
},
@ -870,98 +869,98 @@
"name": "kube-apiserver-burnrate.rules",
"rules": [
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate1d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate1h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate2h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate30m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate3d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate5m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate6h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate1d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate1h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate2h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate30m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate3d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate5m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
"labels": {
"verb": "write"
},
@ -1238,144 +1237,6 @@
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
}
]
},
{
"name": "windows.node.rules",
"rules": [
{
"expr": "count by (cluster) (\n windows_system_system_up_time{job=\"kubernetes-windows-exporter\"}\n)\n",
"record": "node:windows_node:sum"
},
{
"expr": "count by (cluster, instance) (sum by (cluster, instance, core) (\n windows_cpu_time_total{job=\"kubernetes-windows-exporter\"}\n))\n",
"record": "node:windows_node_num_cpu:sum"
},
{
"expr": "1 - avg by (cluster) (rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m]))\n",
"record": ":windows_node_cpu_utilisation:avg1m"
},
{
"expr": "1 - avg by (cluster, instance) (\n rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m])\n)\n",
"record": "node:windows_node_cpu_utilisation:avg1m"
},
{
"expr": "1 -\nsum by (cluster) (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n/\nsum by (cluster) (windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"})\n",
"record": ":windows_node_memory_utilisation:"
},
{
"expr": "sum by (cluster) (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_cache_bytes{job=\"kubernetes-windows-exporter\"})\n",
"record": ":windows_node_memory_MemFreeCached_bytes:sum"
},
{
"expr": "(windows_memory_cache_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_modified_page_list_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_core_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_normal_priority_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_reserve_bytes{job=\"kubernetes-windows-exporter\"})\n",
"record": "node:windows_node_memory_totalCached_bytes:sum"
},
{
"expr": "sum by (cluster) (windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"})\n",
"record": ":windows_node_memory_MemTotal_bytes:sum"
},
{
"expr": "sum by (cluster, instance) (\n (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n)\n",
"record": "node:windows_node_memory_bytes_available:sum"
},
{
"expr": "sum by (cluster, instance) (\n windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"}\n)\n",
"record": "node:windows_node_memory_bytes_total:sum"
},
{
"expr": "(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum)\n/\nscalar(sum(node:windows_node_memory_bytes_total:sum))\n",
"record": "node:windows_node_memory_utilisation:ratio"
},
{
"expr": "1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)\n",
"record": "node:windows_node_memory_utilisation:"
},
{
"expr": "irate(windows_memory_swap_page_operations_total{job=\"kubernetes-windows-exporter\"}[5m])\n",
"record": "node:windows_node_memory_swap_io_pages:irate"
},
{
"expr": "avg by (cluster) (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m])\n )\n",
"record": ":windows_node_disk_utilisation:avg_irate"
},
{
"expr": "avg by (cluster, instance) (\n (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
"record": "node:windows_node_disk_utilisation:avg_irate"
},
{
"expr": "max by (cluster,instance,volume)(\n (windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n- windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"})\n/ windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n)\n",
"record": "node:windows_node_filesystem_usage:"
},
{
"expr": "max by (cluster, instance, volume) (windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"} / windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"})\n",
"record": "node:windows_node_filesystem_avail:"
},
{
"expr": "sum by (cluster) (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n",
"record": ":windows_node_net_utilisation:sum_irate"
},
{
"expr": "sum by (cluster, instance) (\n (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
"record": "node:windows_node_net_utilisation:sum_irate"
},
{
"expr": "sum by (cluster) (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m])) +\nsum by (cluster) (irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n",
"record": ":windows_node_net_saturation:sum_irate"
},
{
"expr": "sum by (cluster, instance) (\n (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
"record": "node:windows_node_net_saturation:sum_irate"
}
]
},
{
"name": "windows.pod.rules",
"rules": [
{
"expr": "windows_container_available{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
"record": "windows_pod_container_available"
},
{
"expr": "windows_container_cpu_usage_seconds_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
"record": "windows_container_total_runtime"
},
{
"expr": "windows_container_memory_usage_commit_bytes{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
"record": "windows_container_memory_usage"
},
{
"expr": "windows_container_memory_usage_private_working_set_bytes{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
"record": "windows_container_private_working_set_usage"
},
{
"expr": "windows_container_network_receive_bytes_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
"record": "windows_container_network_received_bytes_total"
},
{
"expr": "windows_container_network_transmit_bytes_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
"record": "windows_container_network_transmitted_bytes_total"
},
{
"expr": "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
"record": "kube_pod_windows_container_resource_memory_request"
},
{
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
"record": "kube_pod_windows_container_resource_memory_limit"
},
{
"expr": "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
"record": "kube_pod_windows_container_resource_cpu_cores_request"
},
{
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
"record": "kube_pod_windows_container_resource_cpu_cores_limit"
},
{
"expr": "sum by (cluster, namespace, pod, container) (\n rate(windows_container_total_runtime{}[5m])\n)\n",
"record": "namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate"
}
]
}
]
}

View File

@ -151,7 +151,7 @@
{
"alert": "NodeHighNumberConntrackEntriesUsed",
"annotations": {
"description": "{{ $labels.instance }} {{ $value | humanizePercentage }} of conntrack entries are used.",
"description": "{{ $value | humanizePercentage }} of conntrack entries are used.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused",
"summary": "Number of conntrack are getting close to the limit."
},
@ -256,7 +256,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage",
"summary": "High CPU usage."
},
"expr": "sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode!~\"idle|iowait\"}[2m]))) * 100 > 90\n",
"expr": "sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\"}[2m]))) * 100 > 90\n",
"for": "15m",
"labels": {
"severity": "info"

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "controller",
"app.kubernetes.io/name": "prometheus-operator",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.80.0",
"app.kubernetes.io/version": "0.78.2",
"prometheus": "k8s",
"role": "alert-rules"
},

View File

@ -7,7 +7,7 @@
"app.kubernetes.io/instance": "k8s",
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "3.1.0",
"app.kubernetes.io/version": "3.0.1",
"prometheus": "k8s",
"role": "alert-rules"
},

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -14,9 +14,9 @@ spec:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).'
summary: etcd cluster members are down.
expr: "max without (endpoint) (\n sum without (instance, pod) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count without (To) (\n sum without (instance, pod) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s])) > 0.01\n )\n)\n> 0\n"
for: 20m
for: 10m
labels:
severity: warning
severity: critical
- alert: etcdInsufficientMembers
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'

View File

@ -11,7 +11,7 @@ spec:
rules:
- alert: KubePodCrashLooping
annotations:
description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff") on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
summary: Pod is crash looping.
expr: 'max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics"}[5m]) >= 1
@ -22,7 +22,7 @@ spec:
severity: warning
- alert: KubePodNotReady
annotations:
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes.
expr: "sum by (namespace, pod, cluster) (\n max by(namespace, pod, cluster) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown|Failed\"}\n ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (\n 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n"
@ -31,7 +31,7 @@ spec:
severity: warning
- alert: KubeDeploymentGenerationMismatch
annotations:
description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
summary: Deployment generation mismatch due to possible roll-back
expr: "kube_deployment_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_deployment_metadata_generation{job=\"kube-state-metrics\"}\n"
@ -40,7 +40,7 @@ spec:
severity: warning
- alert: KubeDeploymentReplicasMismatch
annotations:
description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
summary: Deployment has not matched the expected number of replicas.
expr: "(\n kube_deployment_spec_replicas{job=\"kube-state-metrics\"}\n >\n kube_deployment_status_replicas_available{job=\"kube-state-metrics\"}\n) and (\n changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\"}[10m])\n ==\n 0\n)\n"
@ -49,7 +49,7 @@ spec:
severity: warning
- alert: KubeDeploymentRolloutStuck
annotations:
description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
summary: Deployment rollout is not progressing.
expr: 'kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics"}
@ -62,7 +62,7 @@ spec:
severity: warning
- alert: KubeStatefulSetReplicasMismatch
annotations:
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
summary: StatefulSet has not matched the expected number of replicas.
expr: "(\n kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas{job=\"kube-state-metrics\"}\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[10m])\n ==\n 0\n)\n"
@ -71,7 +71,7 @@ spec:
severity: warning
- alert: KubeStatefulSetGenerationMismatch
annotations:
description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
summary: StatefulSet generation mismatch due to possible roll-back
expr: "kube_statefulset_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_statefulset_metadata_generation{job=\"kube-state-metrics\"}\n"
@ -80,7 +80,7 @@ spec:
severity: warning
- alert: KubeStatefulSetUpdateNotRolledOut
annotations:
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
summary: StatefulSet update has not been rolled out.
expr: "(\n max by(namespace, statefulset, job, cluster) (\n kube_statefulset_status_current_revision{job=\"kube-state-metrics\"}\n unless\n kube_statefulset_status_update_revision{job=\"kube-state-metrics\"}\n )\n *\n (\n kube_statefulset_replicas{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
@ -89,7 +89,7 @@ spec:
severity: warning
- alert: KubeDaemonSetRolloutStuck
annotations:
description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
summary: DaemonSet rollout is stuck.
expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
@ -98,7 +98,7 @@ spec:
severity: warning
- alert: KubeContainerWaiting
annotations:
description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}") on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}").'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour
expr: 'kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="kube-state-metrics"} > 0
@ -109,7 +109,7 @@ spec:
severity: warning
- alert: KubeDaemonSetNotScheduled
annotations:
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
summary: DaemonSet pods are not scheduled.
expr: "kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n -\nkube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"} > 0\n"
@ -118,7 +118,7 @@ spec:
severity: warning
- alert: KubeDaemonSetMisScheduled
annotations:
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
summary: DaemonSet pods are misscheduled.
expr: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
@ -129,7 +129,7 @@ spec:
severity: warning
- alert: KubeJobNotCompleted
annotations:
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
summary: Job did not complete in time
expr: "time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\"}\n and\nkube_job_status_active{job=\"kube-state-metrics\"} > 0) > 43200\n"
@ -137,7 +137,7 @@ spec:
severity: warning
- alert: KubeJobFailed
annotations:
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
summary: Job failed to complete.
expr: 'kube_job_failed{job="kube-state-metrics"} > 0
@ -148,7 +148,7 @@ spec:
severity: warning
- alert: KubeHpaReplicasMismatch
annotations:
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
summary: HPA has not matched desired number of replicas.
expr: "(kube_horizontalpodautoscaler_status_desired_replicas{job=\"kube-state-metrics\"}\n !=\nkube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"})\n and\n(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n >\nkube_horizontalpodautoscaler_spec_min_replicas{job=\"kube-state-metrics\"})\n and\n(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n <\nkube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"})\n and\nchanges(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}[15m]) == 0\n"
@ -157,7 +157,7 @@ spec:
severity: warning
- alert: KubeHpaMaxedOut
annotations:
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
summary: HPA is running at max replicas
expr: "kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n ==\nkube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"}\n"
@ -186,7 +186,7 @@ spec:
severity: warning
- alert: KubeQuotaAlmostFull
annotations:
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
summary: Namespace quota is going to be full.
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 0.9 < 1\n"
@ -195,7 +195,7 @@ spec:
severity: info
- alert: KubeQuotaFullyUsed
annotations:
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
summary: Namespace quota is fully used.
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n == 1\n"
@ -204,7 +204,7 @@ spec:
severity: info
- alert: KubeQuotaExceeded
annotations:
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
summary: Namespace quota has exceeded the limits.
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 1\n"
@ -213,7 +213,7 @@ spec:
severity: warning
- alert: CPUThrottlingHigh
annotations:
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
summary: Processes experience elevated CPU throttling.
expr: "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)\n /\nsum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)\n > ( 25 / 100 )\n"
@ -281,7 +281,7 @@ spec:
rules:
- alert: KubeVersionMismatch
annotations:
description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch
summary: Different semantic versions of Kubernetes components running.
expr: 'count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1
@ -292,7 +292,7 @@ spec:
severity: warning
- alert: KubeClientErrors
annotations:
description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
summary: Kubernetes API server client is experiencing errors.
expr: "(sum(rate(rest_client_requests_total{job=\"apiserver\",code=~\"5..\"}[5m])) by (cluster, instance, job, namespace)\n /\nsum(rate(rest_client_requests_total{job=\"apiserver\"}[5m])) by (cluster, instance, job, namespace))\n> 0.01\n"
@ -303,7 +303,7 @@ spec:
rules:
- alert: KubeAPIErrorBudgetBurn
annotations:
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: The API server is burning too much error budget.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
summary: The API server is burning too much error budget.
expr: 'sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000)
@ -320,7 +320,7 @@ spec:
short: 5m
- alert: KubeAPIErrorBudgetBurn
annotations:
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: The API server is burning too much error budget.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
summary: The API server is burning too much error budget.
expr: 'sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000)
@ -337,7 +337,7 @@ spec:
short: 30m
- alert: KubeAPIErrorBudgetBurn
annotations:
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: The API server is burning too much error budget.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
summary: The API server is burning too much error budget.
expr: 'sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000)
@ -354,7 +354,7 @@ spec:
short: 2h
- alert: KubeAPIErrorBudgetBurn
annotations:
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: The API server is burning too much error budget.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
summary: The API server is burning too much error budget.
expr: 'sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000)
@ -403,18 +403,17 @@ spec:
severity: critical
- alert: KubeAggregatedAPIErrors
annotations:
description: Kubernetes aggregated API {{`{{`}} $labels.instance {{`}}`}}/{{`{{`}} $labels.name {{`}}`}} has reported {{`{{`}} $labels.reason {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors
summary: Kubernetes aggregated API has reported errors.
expr: 'sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0
expr: 'sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4
'
for: 10m
labels:
severity: warning
- alert: KubeAggregatedAPIDown
annotations:
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown
summary: Kubernetes aggregated API is down.
expr: '(1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m]))) * 100 < 85
@ -436,7 +435,7 @@ spec:
severity: critical
- alert: KubeAPITerminatedRequests
annotations:
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
expr: 'sum by(cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum by(cluster) (rate(apiserver_request_total{job="apiserver"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
@ -449,22 +448,18 @@ spec:
rules:
- alert: KubeNodeNotReady
annotations:
description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready
summary: Node is not ready.
expr: 'kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
and on (cluster, node)
kube_node_spec_unschedulable{job="kube-state-metrics"} == 0
'
for: 15m
labels:
severity: warning
- alert: KubeNodeUnreachable
annotations:
description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable
summary: Node is unreachable.
expr: '(kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1
@ -475,31 +470,27 @@ spec:
severity: warning
- alert: KubeletTooManyPods
annotations:
description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods
summary: Kubelet is running at capacity.
expr: "(\n max by (cluster, instance) (\n kubelet_running_pods{job=\"kubelet\", metrics_path=\"/metrics\"} > 1\n )\n * on (cluster, instance) group_left(node)\n max by (cluster, instance, node) (\n kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"}\n )\n)\n/ on (cluster, node) group_left()\nmax by (cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"pods\"} != 1\n) > 0.95\n"
expr: "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n"
for: 15m
labels:
severity: info
- alert: KubeNodeReadinessFlapping
annotations:
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping
summary: Node readiness status is flapping.
expr: 'sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
and on (cluster, node)
kube_node_spec_unschedulable{job="kube-state-metrics"} == 0
'
for: 15m
labels:
severity: warning
- alert: KubeletPlegDurationHigh
annotations:
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
expr: 'node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
@ -510,7 +501,7 @@ spec:
severity: warning
- alert: KubeletPodStartUpLatencyHigh
annotations:
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh
summary: Kubelet Pod startup latency is too high.
expr: 'histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
@ -521,7 +512,7 @@ spec:
severity: warning
- alert: KubeletClientCertificateExpiration
annotations:
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire.
expr: 'kubelet_certificate_manager_client_ttl_seconds < 604800
@ -531,7 +522,7 @@ spec:
severity: warning
- alert: KubeletClientCertificateExpiration
annotations:
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire.
expr: 'kubelet_certificate_manager_client_ttl_seconds < 86400
@ -541,7 +532,7 @@ spec:
severity: critical
- alert: KubeletServerCertificateExpiration
annotations:
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire.
expr: 'kubelet_certificate_manager_server_ttl_seconds < 604800
@ -551,7 +542,7 @@ spec:
severity: warning
- alert: KubeletServerCertificateExpiration
annotations:
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire.
expr: 'kubelet_certificate_manager_server_ttl_seconds < 86400
@ -561,7 +552,7 @@ spec:
severity: critical
- alert: KubeletClientCertificateRenewalErrors
annotations:
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors
summary: Kubelet has failed to renew its client certificate.
expr: 'increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0
@ -572,7 +563,7 @@ spec:
severity: warning
- alert: KubeletServerCertificateRenewalErrors
annotations:
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}.
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors
summary: Kubelet has failed to renew its server certificate.
expr: 'increase(kubelet_server_expiration_renew_errors[5m]) > 0
@ -649,20 +640,20 @@ spec:
'
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h
- expr: 'sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"})
- expr: 'sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"} * 24 * 30)
'
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\
LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"\
cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
labels:
verb: all
record: apiserver_request:availability30d
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
labels:
verb: read
record: apiserver_request:availability30d
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
labels:
verb: write
record: apiserver_request:availability30d
@ -696,66 +687,66 @@ spec:
record: code_verb:apiserver_request_total:increase1h
- name: kube-apiserver-burnrate.rules
rules:
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
,verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
labels:
verb: read
record: apiserver_request:burnrate1d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
,verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
labels:
verb: read
record: apiserver_request:burnrate1h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
,verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
labels:
verb: read
record: apiserver_request:burnrate2h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"\
apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"\
,code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
labels:
verb: read
record: apiserver_request:burnrate30m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
,verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
labels:
verb: read
record: apiserver_request:burnrate3d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
,verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
labels:
verb: read
record: apiserver_request:burnrate5m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
,verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
labels:
verb: read
record: apiserver_request:burnrate6h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
labels:
verb: write
record: apiserver_request:burnrate1d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
labels:
verb: write
record: apiserver_request:burnrate1h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
labels:
verb: write
record: apiserver_request:burnrate2h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
labels:
verb: write
record: apiserver_request:burnrate30m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
labels:
verb: write
record: apiserver_request:burnrate3d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
labels:
verb: write
record: apiserver_request:burnrate5m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
labels:
verb: write
record: apiserver_request:burnrate6h
@ -925,122 +916,4 @@ spec:
labels:
quantile: '0.5'
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- name: windows.node.rules
rules:
- expr: "count by (cluster) (\n windows_system_system_up_time{job=\"kubernetes-windows-exporter\"}\n)\n"
record: node:windows_node:sum
- expr: "count by (cluster, instance) (sum by (cluster, instance, core) (\n windows_cpu_time_total{job=\"kubernetes-windows-exporter\"}\n))\n"
record: node:windows_node_num_cpu:sum
- expr: '1 - avg by (cluster) (rate(windows_cpu_time_total{job="kubernetes-windows-exporter",mode="idle"}[1m]))
'
record: :windows_node_cpu_utilisation:avg1m
- expr: "1 - avg by (cluster, instance) (\n rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m])\n)\n"
record: node:windows_node_cpu_utilisation:avg1m
- expr: '1 -
sum by (cluster) (windows_memory_available_bytes{job="kubernetes-windows-exporter"})
/
sum by (cluster) (windows_os_visible_memory_bytes{job="kubernetes-windows-exporter"})
'
record: ':windows_node_memory_utilisation:'
- expr: 'sum by (cluster) (windows_memory_available_bytes{job="kubernetes-windows-exporter"} + windows_memory_cache_bytes{job="kubernetes-windows-exporter"})
'
record: :windows_node_memory_MemFreeCached_bytes:sum
- expr: '(windows_memory_cache_bytes{job="kubernetes-windows-exporter"} + windows_memory_modified_page_list_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_core_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_normal_priority_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_reserve_bytes{job="kubernetes-windows-exporter"})
'
record: node:windows_node_memory_totalCached_bytes:sum
- expr: 'sum by (cluster) (windows_os_visible_memory_bytes{job="kubernetes-windows-exporter"})
'
record: :windows_node_memory_MemTotal_bytes:sum
- expr: "sum by (cluster, instance) (\n (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n)\n"
record: node:windows_node_memory_bytes_available:sum
- expr: "sum by (cluster, instance) (\n windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"}\n)\n"
record: node:windows_node_memory_bytes_total:sum
- expr: '(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum)
/
scalar(sum(node:windows_node_memory_bytes_total:sum))
'
record: node:windows_node_memory_utilisation:ratio
- expr: '1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)
'
record: 'node:windows_node_memory_utilisation:'
- expr: 'irate(windows_memory_swap_page_operations_total{job="kubernetes-windows-exporter"}[5m])
'
record: node:windows_node_memory_swap_io_pages:irate
- expr: "avg by (cluster) (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m])\n )\n"
record: :windows_node_disk_utilisation:avg_irate
- expr: "avg by (cluster, instance) (\n (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
record: node:windows_node_disk_utilisation:avg_irate
- expr: "max by (cluster,instance,volume)(\n (windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n- windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"})\n/ windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n)\n"
record: 'node:windows_node_filesystem_usage:'
- expr: 'max by (cluster, instance, volume) (windows_logical_disk_free_bytes{job="kubernetes-windows-exporter"} / windows_logical_disk_size_bytes{job="kubernetes-windows-exporter"})
'
record: 'node:windows_node_filesystem_avail:'
- expr: 'sum by (cluster) (irate(windows_net_bytes_total{job="kubernetes-windows-exporter"}[1m]))
'
record: :windows_node_net_utilisation:sum_irate
- expr: "sum by (cluster, instance) (\n (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
record: node:windows_node_net_utilisation:sum_irate
- expr: 'sum by (cluster) (irate(windows_net_packets_received_discarded_total{job="kubernetes-windows-exporter"}[1m])) +
sum by (cluster) (irate(windows_net_packets_outbound_discarded_total{job="kubernetes-windows-exporter"}[1m]))
'
record: :windows_node_net_saturation:sum_irate
- expr: "sum by (cluster, instance) (\n (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
record: node:windows_node_net_saturation:sum_irate
- name: windows.pod.rules
rules:
- expr: 'windows_container_available{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
'
record: windows_pod_container_available
- expr: 'windows_container_cpu_usage_seconds_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
'
record: windows_container_total_runtime
- expr: 'windows_container_memory_usage_commit_bytes{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
'
record: windows_container_memory_usage
- expr: 'windows_container_memory_usage_private_working_set_bytes{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
'
record: windows_container_private_working_set_usage
- expr: 'windows_container_network_receive_bytes_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
'
record: windows_container_network_received_bytes_total
- expr: 'windows_container_network_transmit_bytes_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
'
record: windows_container_network_transmitted_bytes_total
- expr: "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n"
record: kube_pod_windows_container_resource_memory_request
- expr: 'kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on(container,pod,namespace,cluster) (windows_pod_container_available)
'
record: kube_pod_windows_container_resource_memory_limit
- expr: "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n"
record: kube_pod_windows_container_resource_cpu_cores_request
- expr: 'kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on(container,pod,namespace,cluster) (windows_pod_container_available)
'
record: kube_pod_windows_container_resource_cpu_cores_limit
- expr: "sum by (cluster, namespace, pod, container) (\n rate(windows_container_total_runtime{}[5m])\n)\n"
record: namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate

Some files were not shown because too many files have changed in this diff Show More