Metrics module version bump

This commit is contained in:
Stefan Reimer 2023-11-27 12:45:06 +00:00
parent f6336e5df5
commit a65b515f8c
97 changed files with 4744 additions and 1217 deletions

View File

@ -19,7 +19,7 @@ dependencies:
version: ">= 0.1.6"
repository: https://cdn.zero-downtime.net/charts/
- name: kube-prometheus-stack
version: 48.6.0
version: 54.2.2
repository: https://prometheus-community.github.io/helm-charts
- name: prometheus-adapter
version: 4.9.0

View File

@ -1,6 +1,6 @@
# kubezero-metrics
![Version: 0.9.2](https://img.shields.io/badge/Version-0.9.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.9.5](https://img.shields.io/badge/Version-0.9.5-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
@ -14,14 +14,14 @@ KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all
## Requirements
Kubernetes: `>= 1.25.0`
Kubernetes: `>= 1.26.0`
| Repository | Name | Version |
|------------|------|---------|
| | kube-prometheus-stack | 45.27.2 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
| https://prometheus-community.github.io/helm-charts | prometheus-adapter | 4.1.1 |
| https://prometheus-community.github.io/helm-charts | prometheus-pushgateway | 2.1.3 |
| https://prometheus-community.github.io/helm-charts | kube-prometheus-stack | 54.2.2 |
| https://prometheus-community.github.io/helm-charts | prometheus-adapter | 4.9.0 |
| https://prometheus-community.github.io/helm-charts | prometheus-pushgateway | 2.4.2 |
## Values
@ -31,16 +31,19 @@ Kubernetes: `>= 1.25.0`
| istio.alertmanager.enabled | bool | `false` | |
| istio.alertmanager.gateway | string | `"istio-ingress/ingressgateway"` | |
| istio.alertmanager.ipBlocks | list | `[]` | |
| istio.alertmanager.port | int | `9093` | |
| istio.alertmanager.url | string | `""` | |
| istio.grafana.destination | string | `"metrics-grafana"` | |
| istio.grafana.enabled | bool | `false` | |
| istio.grafana.gateway | string | `"istio-ingress/ingressgateway"` | |
| istio.grafana.ipBlocks | list | `[]` | |
| istio.grafana.port | int | `80` | |
| istio.grafana.url | string | `""` | |
| istio.prometheus.destination | string | `"metrics-kube-prometheus-st-prometheus"` | |
| istio.prometheus.enabled | bool | `false` | |
| istio.prometheus.gateway | string | `"istio-ingress/ingressgateway"` | |
| istio.prometheus.ipBlocks | list | `[]` | |
| istio.prometheus.port | int | `9090` | |
| istio.prometheus.url | string | `""` | |
| kube-prometheus-stack.alertmanager.alertmanagerSpec.containers[0].env[0].name | string | `"SNS_FORWARDER_ARN_PREFIX"` | |
| kube-prometheus-stack.alertmanager.alertmanagerSpec.containers[0].env[0].valueFrom.fieldRef.fieldPath | string | `"metadata.annotations['kubezero.com/sns_forwarder_ARN_PREFIX']"` | |
@ -162,7 +165,7 @@ Kubernetes: `>= 1.25.0`
| kube-prometheus-stack.prometheus.prometheusSpec.portName | string | `"http-prometheus"` | |
| kube-prometheus-stack.prometheus.prometheusSpec.resources.limits.memory | string | `"4Gi"` | |
| kube-prometheus-stack.prometheus.prometheusSpec.resources.requests.cpu | string | `"500m"` | |
| kube-prometheus-stack.prometheus.prometheusSpec.resources.requests.memory | string | `"512Mi"` | |
| kube-prometheus-stack.prometheus.prometheusSpec.resources.requests.memory | string | `"2Gi"` | |
| kube-prometheus-stack.prometheus.prometheusSpec.retention | string | `"8d"` | |
| kube-prometheus-stack.prometheus.prometheusSpec.ruleSelectorNilUsesHelmValues | bool | `false` | |
| kube-prometheus-stack.prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues | bool | `false` | |

View File

@ -7,7 +7,7 @@ annotations:
url: https://github.com/prometheus-operator/kube-prometheus
artifacthub.io/operator: "true"
apiVersion: v2
appVersion: v0.66.0
appVersion: v0.69.1
dependencies:
- condition: crds.enabled
name: crds
@ -16,15 +16,15 @@ dependencies:
- condition: kubeStateMetrics.enabled
name: kube-state-metrics
repository: https://prometheus-community.github.io/helm-charts
version: 5.10.*
version: 5.15.*
- condition: nodeExporter.enabled
name: prometheus-node-exporter
repository: https://prometheus-community.github.io/helm-charts
version: 4.21.*
version: 4.24.*
- condition: grafana.enabled
name: grafana
repository: https://grafana.github.io/helm-charts
version: 6.58.*
version: 7.0.*
- condition: windowsMonitoring.enabled
name: prometheus-windows-exporter
repository: https://prometheus-community.github.io/helm-charts
@ -39,7 +39,7 @@ keywords:
- operator
- prometheus
- kube-prometheus
kubeVersion: '>=1.16.0-0'
kubeVersion: '>=1.19.0-0'
maintainers:
- email: andrew@quadcorps.co.uk
name: andrewgkew
@ -60,4 +60,4 @@ sources:
- https://github.com/prometheus-community/helm-charts
- https://github.com/prometheus-operator/kube-prometheus
type: application
version: 48.3.3
version: 54.2.2

View File

@ -8,7 +8,7 @@ _Note: This chart was formerly named `prometheus-operator` chart, now renamed to
## Prerequisites
- Kubernetes 1.16+
- Kubernetes 1.19+
- Helm 3+
## Get Helm Repository Info
@ -82,6 +82,144 @@ _See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documen
A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions.
### From 53.x to 54.x
Grafana Helm Chart has bumped to version 7
Please note Grafana Helm Chart [changelog](https://github.com/grafana/helm-charts/tree/main/charts/grafana#to-700).
### From 52.x to 53.x
This version upgrades Prometheus-Operator to v0.69.1, Prometheus to 2.47.2
Run these commands to update the CRDs before applying the upgrade.
```console
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
```
### From 51.x to 52.x
This includes the ability to select between using existing secrets or create new secret objects for various thanos config. The defaults have not changed but if you were setting:
- `thanosRuler.thanosRulerSpec.alertmanagersConfig` or
- `thanosRuler.thanosRulerSpec.objectStorageConfig` or
- `thanosRuler.thanosRulerSpec.queryConfig` or
- `prometheus.prometheusSpec.thanos.objectStorageConfig`
you will have to need to set `existingSecret` or `secret` based on your requirement
For instance, the `thanosRuler.thanosRulerSpec.alertmanagersConfig` used to be configured as follow:
```yaml
thanosRuler:
thanosRulerSpec:
alertmanagersConfig:
alertmanagers:
- api_version: v2
http_config:
basic_auth:
username: some_user
password: some_pass
static_configs:
- alertmanager.thanos.io
scheme: http
timeout: 10s
```
But it now moved to:
```yaml
thanosRuler:
thanosRulerSpec:
alertmanagersConfig:
secret:
alertmanagers:
- api_version: v2
http_config:
basic_auth:
username: some_user
password: some_pass
static_configs:
- alertmanager.thanos.io
scheme: http
timeout: 10s
```
or the `thanosRuler.thanosRulerSpec.objectStorageConfig` used to be configured as follow:
```yaml
thanosRuler:
thanosRulerSpec:
objectStorageConfig:
name: existing-secret-not-created-by-this-chart
key: object-storage-configs.yaml
```
But it now moved to:
```yaml
thanosRuler:
thanosRulerSpec:
objectStorageConfig:
existingSecret:
name: existing-secret-not-created-by-this-chart
key: object-storage-configs.yaml
```
### From 50.x to 51.x
This version upgrades Prometheus-Operator to v0.68.0, Prometheus to 2.47.0 and Thanos to v0.32.2
Run these commands to update the CRDs before applying the upgrade.
```console
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
```
### From 49.x to 50.x
This version requires Kubernetes 1.19+.
We do not expect any breaking changes in this version.
### From 48.x to 49.x
This version upgrades Prometheus-Operator to v0.67.1, 0, Alertmanager to v0.26.0, Prometheus to 2.46.0 and Thanos to v0.32.0
Run these commands to update the CRDs before applying the upgrade.
```console
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
```
### From 47.x to 48.x
This version moved all CRDs into a dedicated sub-chart. No new CRDs are introduced in this version.

View File

@ -1,10 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: alertmanagers.monitoring.coreos.com
@ -1029,8 +1030,8 @@ spec:
is only available starting from Alertmanager v0.22+.
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace
that contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -1051,8 +1052,9 @@ spec:
type: object
x-kubernetes-map-type: atomic
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
description: "Defines the authentication type. The
value is case-insensitive. \n \"Basic\" is not a
supported value. \n Default: \"Bearer\""
type: string
type: object
basicAuth:
@ -1425,6 +1427,88 @@ spec:
- key
type: object
x-kubernetes-map-type: atomic
smtp:
description: Configures global SMTP parameters.
properties:
authIdentity:
description: SMTP Auth using PLAIN
type: string
authPassword:
description: SMTP Auth using LOGIN and PLAIN.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
authSecret:
description: SMTP Auth using CRAM-MD5.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
authUsername:
description: SMTP Auth using CRAM-MD5, LOGIN and PLAIN.
If empty, Alertmanager doesn't authenticate to the SMTP
server.
type: string
from:
description: The default SMTP From header field.
type: string
hello:
description: The default hostname to identify to the SMTP
server.
type: string
requireTLS:
description: The default SMTP TLS requirement. Note that
Go does not support unencrypted connections to remote
SMTP endpoints.
type: boolean
smartHost:
description: The default SMTP smarthost used for sending
emails.
properties:
host:
description: Defines the host's address, it can be
a DNS name or a literal IP address.
minLength: 1
type: string
port:
description: Defines the host's port, it can be a
literal port number or a port name.
minLength: 1
type: string
required:
- host
- port
type: object
type: object
type: object
name:
description: The name of the AlertmanagerConfig resource which
@ -2362,6 +2446,25 @@ spec:
cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartPolicy:
description: 'RestartPolicy defines the restart behavior of
individual containers in a pod. This field may only be set
for init containers, and the only allowed value is "Always".
For non-init containers or when this field is not specified,
the restart behavior is defined by the Pod''s restart policy
and the container type. Setting the RestartPolicy as "Always"
for the init container will have the following effect: this
init container will be continually restarted on exit until
all regular containers have terminated. Once all regular containers
have completed, all init containers with restartPolicy "Always"
will be shut down. This lifecycle differs from normal init
containers and is often referred to as a "sidecar" container.
Although this init container still starts in the init container
sequence, it does not wait for the container to complete before
proceeding to the next init container. Instead, the next init
container starts immediately after this init container is
started, or after any startupProbe has successfully completed.'
type: string
securityContext:
description: 'SecurityContext defines the security options the
container should be run with. If set, the fields of SecurityContext
@ -2483,8 +2586,8 @@ spec:
in a file on the node should be used. The profile
must be preconfigured on the node to work. Must be
a descending path, relative to the kubelet's configured
seccomp profile location. Must only be set if type
is "Localhost".
seccomp profile location. Must be set if type is "Localhost".
Must NOT be set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -2517,16 +2620,12 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is
alpha-level and will only be honored by components
that enable the WindowsHostProcessContainers feature
flag. Setting this field without the feature flag
will result in errors when validating the Pod. All
of a Pod's containers must have the same effective
HostProcess value (it is not allowed to have a mix
of HostProcess containers and non-HostProcess containers). In
addition, if HostProcess is true then HostNetwork
must also be set to true.
be run as a 'Host Process' container. All of a Pod's
containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess
containers and non-HostProcess containers). In addition,
if HostProcess is true then HostNetwork must also
be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -3705,6 +3804,25 @@ spec:
cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartPolicy:
description: 'RestartPolicy defines the restart behavior of
individual containers in a pod. This field may only be set
for init containers, and the only allowed value is "Always".
For non-init containers or when this field is not specified,
the restart behavior is defined by the Pod''s restart policy
and the container type. Setting the RestartPolicy as "Always"
for the init container will have the following effect: this
init container will be continually restarted on exit until
all regular containers have terminated. Once all regular containers
have completed, all init containers with restartPolicy "Always"
will be shut down. This lifecycle differs from normal init
containers and is often referred to as a "sidecar" container.
Although this init container still starts in the init container
sequence, it does not wait for the container to complete before
proceeding to the next init container. Instead, the next init
container starts immediately after this init container is
started, or after any startupProbe has successfully completed.'
type: string
securityContext:
description: 'SecurityContext defines the security options the
container should be run with. If set, the fields of SecurityContext
@ -3826,8 +3944,8 @@ spec:
in a file on the node should be used. The profile
must be preconfigured on the node to work. Must be
a descending path, relative to the kubelet's configured
seccomp profile location. Must only be set if type
is "Localhost".
seccomp profile location. Must be set if type is "Localhost".
Must NOT be set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -3860,16 +3978,12 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is
alpha-level and will only be honored by components
that enable the WindowsHostProcessContainers feature
flag. Setting this field without the feature flag
will result in errors when validating the Pod. All
of a Pod's containers must have the same effective
HostProcess value (it is not allowed to have a mix
of HostProcess containers and non-HostProcess containers). In
addition, if HostProcess is true then HostNetwork
must also be set to true.
be run as a 'Host Process' container. All of a Pod's
containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess
containers and non-HostProcess containers). In addition,
if HostProcess is true then HostNetwork must also
be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -4188,8 +4302,15 @@ spec:
objects are not goint to be performed, except for delete actions.
type: boolean
podMetadata:
description: PodMetadata configures Labels and Annotations which are
propagated to the alertmanager pods.
description: "PodMetadata configures labels and annotations which
are propagated to the Alertmanager pods. \n The following items
are reserved and cannot be overridden: * \"alertmanager\" label,
set to the name of the Alertmanager instance. * \"app.kubernetes.io/instance\"
label, set to the name of the Alertmanager instance. * \"app.kubernetes.io/managed-by\"
label, set to \"prometheus-operator\". * \"app.kubernetes.io/name\"
label, set to \"alertmanager\". * \"app.kubernetes.io/version\"
label, set to the Alertmanager version. * \"kubectl.kubernetes.io/default-container\"
annotation, set to \"alertmanager\"."
properties:
annotations:
additionalProperties:
@ -4387,7 +4508,8 @@ spec:
in a file on the node should be used. The profile must be
preconfigured on the node to work. Must be a descending
path, relative to the kubelet's configured seccomp profile
location. Must only be set if type is "Localhost".
location. Must be set if type is "Localhost". Must NOT be
set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -4451,14 +4573,11 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is alpha-level
and will only be honored by components that enable the WindowsHostProcessContainers
feature flag. Setting this field without the feature flag
will result in errors when validating the Pod. All of a
Pod's containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess containers
and non-HostProcess containers). In addition, if HostProcess
is true then HostNetwork must also be set to true.
be run as a 'Host Process' container. All of a Pod's containers
must have the same effective HostProcess value (it is not
allowed to have a mix of HostProcess containers and non-HostProcess
containers). In addition, if HostProcess is true then HostNetwork
must also be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -5046,6 +5165,51 @@ spec:
items:
type: string
type: array
allocatedResourceStatuses:
additionalProperties:
description: When a controller receives persistentvolume
claim update with ClaimResourceStatus for a resource
that it does not recognizes, then it should ignore
that update and let other controllers handle it.
type: string
description: "allocatedResourceStatuses stores status
of resource being resized for the given PVC. Key names
follow standard Kubernetes label syntax. Valid values
are either: * Un-prefixed keys: - storage - the capacity
of the volume. * Custom resources must use implementation-defined
prefixed names such as \"example.com/my-custom-resource\"
Apart from above values - keys that are unprefixed or
have kubernetes.io prefix are considered reserved and
hence may not be used. \n ClaimResourceStatus can be
in any of following states: - ControllerResizeInProgress:
State set when resize controller starts resizing the
volume in control-plane. - ControllerResizeFailed: State
set when resize has failed in resize controller with
a terminal error. - NodeResizePending: State set when
resize controller has finished resizing the volume but
further resizing of volume is needed on the node. -
NodeResizeInProgress: State set when kubelet starts
resizing the volume. - NodeResizeFailed: State set when
resizing has failed in kubelet with a terminal error.
Transient errors don't set NodeResizeFailed. For example:
if expanding a PVC for more capacity - this field can
be one of the following states: - pvc.status.allocatedResourceStatus['storage']
= \"ControllerResizeInProgress\" - pvc.status.allocatedResourceStatus['storage']
= \"ControllerResizeFailed\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizePending\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizeInProgress\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizeFailed\" When this field is not set, it
means that no resize operation is in progress for the
given PVC. \n A controller that receives PVC update
with previously unknown resourceName or ClaimResourceStatus
should ignore the update for the purpose it was designed.
For example - a controller that only is responsible
for resizing capacity of the volume, should ignore PVC
updates that change other valid resources associated
with PVC. \n This is an alpha field and requires enabling
RecoverVolumeExpansionFailure feature."
type: object
x-kubernetes-map-type: granular
allocatedResources:
additionalProperties:
anyOf:
@ -5053,19 +5217,31 @@ spec:
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: allocatedResources is the storage resource
within AllocatedResources tracks the capacity allocated
to a PVC. It may be larger than the actual capacity
when a volume expansion operation is requested. For
storage quota, the larger value from allocatedResources
and PVC.spec.resources is used. If allocatedResources
is not set, PVC.spec.resources alone is used for quota
calculation. If a volume expansion capacity request
is lowered, allocatedResources is only lowered if there
are no expansion operations in progress and if the actual
volume capacity is equal or lower than the requested
capacity. This is an alpha field and requires enabling
RecoverVolumeExpansionFailure feature.
description: "allocatedResources tracks the resources
allocated to a PVC including its capacity. Key names
follow standard Kubernetes label syntax. Valid values
are either: * Un-prefixed keys: - storage - the capacity
of the volume. * Custom resources must use implementation-defined
prefixed names such as \"example.com/my-custom-resource\"
Apart from above values - keys that are unprefixed or
have kubernetes.io prefix are considered reserved and
hence may not be used. \n Capacity reported here may
be larger than the actual capacity when a volume expansion
operation is requested. For storage quota, the larger
value from allocatedResources and PVC.spec.resources
is used. If allocatedResources is not set, PVC.spec.resources
alone is used for quota calculation. If a volume expansion
capacity request is lowered, allocatedResources is only
lowered if there are no expansion operations in progress
and if the actual volume capacity is equal or lower
than the requested capacity. \n A controller that receives
PVC update with previously unknown resourceName should
ignore the update for the purpose it was designed. For
example - a controller that only is responsible for
resizing capacity of the volume, should ignore PVC updates
that change other valid resources associated with PVC.
\n This is an alpha field and requires enabling RecoverVolumeExpansionFailure
feature."
type: object
capacity:
additionalProperties:
@ -5120,13 +5296,6 @@ spec:
phase:
description: phase represents the current phase of PersistentVolumeClaim.
type: string
resizeStatus:
description: resizeStatus stores status of resize operation.
ResizeStatus is not set by default but when expansion
is complete resizeStatus is set to empty string by resize
controller or kubelet. This is an alpha field and requires
enabling RecoverVolumeExpansionFailure feature.
type: string
type: object
type: object
type: object

View File

@ -1,10 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: podmonitors.monitoring.coreos.com
@ -54,6 +55,12 @@ spec:
jobLabel:
description: The label to use to retrieve the job name from.
type: string
keepDroppedTargets:
description: "Per-scrape limit on the number of targets dropped by
relabeling that will be kept in memory. 0 means no limit. \n It
requires Prometheus >= v2.47.0."
format: int64
type: integer
labelLimit:
description: Per-scrape limit on number of labels that will be accepted
for a sample. Only valid in Prometheus versions 2.27.0 and newer.
@ -95,8 +102,8 @@ spec:
description: Authorization section for this endpoint
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace
that contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -115,8 +122,9 @@ spec:
type: object
x-kubernetes-map-type: atomic
type:
description: Set the authentication type. Defaults to Bearer,
Basic will cause an error
description: "Defines the authentication type. The value
is case-insensitive. \n \"Basic\" is not a supported value.
\n Default: \"Bearer\""
type: string
type: object
basicAuth:
@ -213,16 +221,16 @@ spec:
description: MetricRelabelConfigs to apply to samples before
ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of the
label set for targets, alerts, scraped samples and remote
write samples. \n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching.
Default is 'replace'. uppercase and lowercase actions
require Prometheus >= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require
Prometheus >= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -248,28 +256,29 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: "Modulus to take of the hash of the source
label values. \n Only applicable when the action is
`HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
description: "Replacement value against which a Replace
action is performed if the regular expression matches.
\n Regex capture groups are available."
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
Separator and matched against the configured regular
expression.
items:
description: LabelName is a valid Prometheus label name
which may only contain ASCII letters, numbers, as
@ -278,9 +287,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
description: "Label to which the resulting string is written
in a replacement. \n It is mandatory for `Replace`,
`HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and
`DropEqual` actions. \n Regex capture groups are available."
type: string
type: object
type: array
@ -397,16 +407,16 @@ spec:
is available via the `__tmp_prometheus_job_name` label. More
info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of the
label set for targets, alerts, scraped samples and remote
write samples. \n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching.
Default is 'replace'. uppercase and lowercase actions
require Prometheus >= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require
Prometheus >= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -432,28 +442,29 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: "Modulus to take of the hash of the source
label values. \n Only applicable when the action is
`HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
description: "Replacement value against which a Replace
action is performed if the regular expression matches.
\n Regex capture groups are available."
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
Separator and matched against the configured regular
expression.
items:
description: LabelName is a valid Prometheus label name
which may only contain ASCII letters, numbers, as
@ -462,9 +473,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
description: "Label to which the resulting string is written
in a replacement. \n It is mandatory for `Replace`,
`HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and
`DropEqual` actions. \n Regex capture groups are available."
type: string
type: object
type: array

View File

@ -1,10 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: probes.monitoring.coreos.com
@ -46,8 +47,8 @@ spec:
description: Authorization section for this endpoint
properties:
credentials:
description: The secret's key that contains the credentials of
the request
description: Selects a key of a Secret in the namespace that contains
the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must be
@ -66,8 +67,8 @@ spec:
type: object
x-kubernetes-map-type: atomic
type:
description: Set the authentication type. Defaults to Bearer,
Basic will cause an error
description: "Defines the authentication type. The value is case-insensitive.
\n \"Basic\" is not a supported value. \n Default: \"Bearer\""
type: string
type: object
basicAuth:
@ -143,6 +144,12 @@ spec:
jobName:
description: The job name assigned to scraped metrics by default.
type: string
keepDroppedTargets:
description: "Per-scrape limit on the number of targets dropped by
relabeling that will be kept in memory. 0 means no limit. \n It
requires Prometheus >= v2.47.0."
format: int64
type: integer
labelLimit:
description: Per-scrape limit on number of labels that will be accepted
for a sample. Only valid in Prometheus versions 2.27.0 and newer.
@ -163,15 +170,16 @@ spec:
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the label
set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section
of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of the label
set for targets, alerts, scraped samples and remote write samples.
\n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching. Default
is 'replace'. uppercase and lowercase actions require Prometheus
>= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require Prometheus
>= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -197,28 +205,26 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
description: "Modulus to take of the hash of the source label
values. \n Only applicable when the action is `HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex capture
groups are available. Default is '$1'
description: "Replacement value against which a Replace action
is performed if the regular expression matches. \n Regex capture
groups are available."
type: string
separator:
description: Separator placed between concatenated source label
values. default is ';'.
description: Separator is the string between concatenated SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing labels.
Their content is concatenated using the configured separator
and matched against the configured regular expression for
the replace, keep, and drop actions.
Their content is concatenated using the configured Separator
and matched against the configured regular expression.
items:
description: LabelName is a valid Prometheus label name which
may only contain ASCII letters, numbers, as well as underscores.
@ -226,9 +232,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written in
a replace action. It is mandatory for replace actions. Regex
capture groups are available.
description: "Label to which the resulting string is written
in a replacement. \n It is mandatory for `Replace`, `HashMod`,
`Lowercase`, `Uppercase`, `KeepEqual` and `DropEqual` actions.
\n Regex capture groups are available."
type: string
type: object
type: array
@ -393,16 +400,16 @@ spec:
scrape job''s name is available via the `__tmp_prometheus_job_name`
label. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of
the label set, being applied to samples before ingestion.
It defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of
the label set for targets, alerts, scraped samples and
remote write samples. \n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching.
Default is 'replace'. uppercase and lowercase actions
require Prometheus >= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require
Prometheus >= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -428,28 +435,29 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: "Modulus to take of the hash of the source
label values. \n Only applicable when the action is
`HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex
replace is performed if the regular expression matches.
Regex capture groups are available. Default is '$1'
description: "Replacement value against which a Replace
action is performed if the regular expression matches.
\n Regex capture groups are available."
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
Separator and matched against the configured regular
expression.
items:
description: LabelName is a valid Prometheus label
name which may only contain ASCII letters, numbers,
@ -458,9 +466,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
description: "Label to which the resulting string is
written in a replacement. \n It is mandatory for `Replace`,
`HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and
`DropEqual` actions. \n Regex capture groups are available."
type: string
type: object
type: array
@ -526,16 +535,16 @@ spec:
description: 'RelabelConfigs to apply to the label set of
the targets before it gets scraped. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of
the label set, being applied to samples before ingestion.
It defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of
the label set for targets, alerts, scraped samples and
remote write samples. \n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching.
Default is 'replace'. uppercase and lowercase actions
require Prometheus >= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require
Prometheus >= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -561,28 +570,29 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: "Modulus to take of the hash of the source
label values. \n Only applicable when the action is
`HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex
replace is performed if the regular expression matches.
Regex capture groups are available. Default is '$1'
description: "Replacement value against which a Replace
action is performed if the regular expression matches.
\n Regex capture groups are available."
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
Separator and matched against the configured regular
expression.
items:
description: LabelName is a valid Prometheus label
name which may only contain ASCII letters, numbers,
@ -591,9 +601,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
description: "Label to which the resulting string is
written in a replacement. \n It is mandatory for `Replace`,
`HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and
`DropEqual` actions. \n Regex capture groups are available."
type: string
type: object
type: array

View File

@ -1,10 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: prometheusagents.monitoring.coreos.com
@ -957,11 +958,12 @@ spec:
and use the Pod''s CA certificate and bearer token file at /var/run/secrets/kubernetes.io/serviceaccount/.'
properties:
authorization:
description: Authorization section for accessing apiserver
description: "Authorization section for the API server. \n Cannot
be set at the same time as `basicAuth`, `bearerToken`, or `bearerTokenFile`."
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace that
contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -981,16 +983,18 @@ spec:
x-kubernetes-map-type: atomic
credentialsFile:
description: File to read a secret from, mutually exclusive
with Credentials (from SafeAuthorization)
with `credentials`.
type: string
type:
description: Set the authentication type. Defaults to Bearer,
Basic will cause an error
description: "Defines the authentication type. The value is
case-insensitive. \n \"Basic\" is not a supported value.
\n Default: \"Bearer\""
type: string
type: object
basicAuth:
description: BasicAuth allow an endpoint to authenticate over
basic authentication
description: "BasicAuth configuration for the API server. \n Cannot
be set at the same time as `authorization`, `bearerToken`, or
`bearerTokenFile`."
properties:
password:
description: The secret in the service monitor namespace that
@ -1034,17 +1038,22 @@ spec:
x-kubernetes-map-type: atomic
type: object
bearerToken:
description: Bearer token for accessing apiserver.
description: "*Warning: this field shouldn't be used because the
token value appears in clear-text. Prefer using `authorization`.*
\n *Deprecated: this will be removed in a future release.*"
type: string
bearerTokenFile:
description: File to read bearer token for accessing apiserver.
description: "File to read bearer token for accessing apiserver.
\n Cannot be set at the same time as `basicAuth`, `authorization`,
or `bearerToken`. \n *Deprecated: this will be removed in a
future release. Prefer using `authorization`.*"
type: string
host:
description: Host of apiserver. A valid string consisting of a
hostname or IP followed by an optional port number
description: Kubernetes API address consisting of a hostname or
IP address followed by an optional port number.
type: string
tlsConfig:
description: TLS Config to use for accessing apiserver.
description: TLS Config to use for the API server.
properties:
ca:
description: Certificate authority used when verifying server
@ -1190,6 +1199,11 @@ spec:
deny:
type: boolean
type: object
bodySizeLimit:
description: BodySizeLimit defines per-scrape on response body size.
Only valid in Prometheus versions 2.45.0 and newer.
pattern: (^0|([0-9]*[.])?[0-9]+((K|M|G|T|E|P)i?)?B)$
type: string
configMaps:
description: ConfigMaps is a list of ConfigMaps in the same namespace
as the Prometheus object, which shall be mounted into the Prometheus
@ -2035,6 +2049,25 @@ spec:
cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartPolicy:
description: 'RestartPolicy defines the restart behavior of
individual containers in a pod. This field may only be set
for init containers, and the only allowed value is "Always".
For non-init containers or when this field is not specified,
the restart behavior is defined by the Pod''s restart policy
and the container type. Setting the RestartPolicy as "Always"
for the init container will have the following effect: this
init container will be continually restarted on exit until
all regular containers have terminated. Once all regular containers
have completed, all init containers with restartPolicy "Always"
will be shut down. This lifecycle differs from normal init
containers and is often referred to as a "sidecar" container.
Although this init container still starts in the init container
sequence, it does not wait for the container to complete before
proceeding to the next init container. Instead, the next init
container starts immediately after this init container is
started, or after any startupProbe has successfully completed.'
type: string
securityContext:
description: 'SecurityContext defines the security options the
container should be run with. If set, the fields of SecurityContext
@ -2156,8 +2189,8 @@ spec:
in a file on the node should be used. The profile
must be preconfigured on the node to work. Must be
a descending path, relative to the kubelet's configured
seccomp profile location. Must only be set if type
is "Localhost".
seccomp profile location. Must be set if type is "Localhost".
Must NOT be set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -2190,16 +2223,12 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is
alpha-level and will only be honored by components
that enable the WindowsHostProcessContainers feature
flag. Setting this field without the feature flag
will result in errors when validating the Pod. All
of a Pod's containers must have the same effective
HostProcess value (it is not allowed to have a mix
of HostProcess containers and non-HostProcess containers). In
addition, if HostProcess is true then HostNetwork
must also be set to true.
be run as a 'Host Process' container. All of a Pod's
containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess
containers and non-HostProcess containers). In addition,
if HostProcess is true then HostNetwork must also
be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -2504,6 +2533,15 @@ spec:
v2.28.0."
pattern: (^0|([0-9]*[.])?[0-9]+((K|M|G|T|E|P)i?)?B)$
type: string
enforcedKeepDroppedTargets:
description: "When defined, enforcedKeepDroppedTargets specifies a
global limit on the number of targets dropped by relabeling that
will be kept in memory. The value overrides any `spec.keepDroppedTargets`
set by ServiceMonitor, PodMonitor, Probe objects unless `spec.keepDroppedTargets`
is greater than zero and less than `spec.enforcedKeepDroppedTargets`.
\n It requires Prometheus >= v2.47.0."
format: int64
type: integer
enforcedLabelLimit:
description: "When defined, enforcedLabelLimit specifies a global
limit on the number of labels per sample. The value overrides any
@ -2592,6 +2630,7 @@ spec:
- servicemonitors
- podmonitors
- probes
- scrapeconfigs
type: string
required:
- namespace
@ -2645,7 +2684,7 @@ spec:
description: When true, `spec.namespaceSelector` from all PodMonitor,
ServiceMonitor and Probe objects will be ignored. They will only
discover targets within the namespace of the PodMonitor, ServiceMonitor
and Probe objec.
and Probe object.
type: boolean
image:
description: "Container image name for Prometheus. If specified, it
@ -3517,6 +3556,25 @@ spec:
cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartPolicy:
description: 'RestartPolicy defines the restart behavior of
individual containers in a pod. This field may only be set
for init containers, and the only allowed value is "Always".
For non-init containers or when this field is not specified,
the restart behavior is defined by the Pod''s restart policy
and the container type. Setting the RestartPolicy as "Always"
for the init container will have the following effect: this
init container will be continually restarted on exit until
all regular containers have terminated. Once all regular containers
have completed, all init containers with restartPolicy "Always"
will be shut down. This lifecycle differs from normal init
containers and is often referred to as a "sidecar" container.
Although this init container still starts in the init container
sequence, it does not wait for the container to complete before
proceeding to the next init container. Instead, the next init
container starts immediately after this init container is
started, or after any startupProbe has successfully completed.'
type: string
securityContext:
description: 'SecurityContext defines the security options the
container should be run with. If set, the fields of SecurityContext
@ -3638,8 +3696,8 @@ spec:
in a file on the node should be used. The profile
must be preconfigured on the node to work. Must be
a descending path, relative to the kubelet's configured
seccomp profile location. Must only be set if type
is "Localhost".
seccomp profile location. Must be set if type is "Localhost".
Must NOT be set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -3672,16 +3730,12 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is
alpha-level and will only be honored by components
that enable the WindowsHostProcessContainers feature
flag. Setting this field without the feature flag
will result in errors when validating the Pod. All
of a Pod's containers must have the same effective
HostProcess value (it is not allowed to have a mix
of HostProcess containers and non-HostProcess containers). In
addition, if HostProcess is true then HostNetwork
must also be set to true.
be run as a 'Host Process' container. All of a Pod's
containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess
containers and non-HostProcess containers). In addition,
if HostProcess is true then HostNetwork must also
be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -3960,6 +4014,29 @@ spec:
- name
type: object
type: array
keepDroppedTargets:
description: "Per-scrape limit on the number of targets dropped by
relabeling that will be kept in memory. 0 means no limit. \n It
requires Prometheus >= v2.47.0."
format: int64
type: integer
labelLimit:
description: Per-scrape limit on number of labels that will be accepted
for a sample. Only valid in Prometheus versions 2.45.0 and newer.
format: int64
type: integer
labelNameLengthLimit:
description: Per-scrape limit on length of labels name that will be
accepted for a sample. Only valid in Prometheus versions 2.45.0
and newer.
format: int64
type: integer
labelValueLengthLimit:
description: Per-scrape limit on length of labels value that will
be accepted for a sample. Only valid in Prometheus versions 2.45.0
and newer.
format: int64
type: integer
listenLocal:
description: When true, the Prometheus server listens on the loopback
address instead of the Pod IP's address.
@ -4011,8 +4088,17 @@ spec:
for deletion will be performed on the underlying objects.
type: boolean
podMetadata:
description: PodMetadata configures labels and annotations which are
propagated to the Prometheus pods.
description: "PodMetadata configures labels and annotations which
are propagated to the Prometheus pods. \n The following items are
reserved and cannot be overridden: * \"prometheus\" label, set to
the name of the Prometheus object. * \"app.kubernetes.io/instance\"
label, set to the name of the Prometheus object. * \"app.kubernetes.io/managed-by\"
label, set to \"prometheus-operator\". * \"app.kubernetes.io/name\"
label, set to \"prometheus\". * \"app.kubernetes.io/version\" label,
set to the Prometheus version. * \"operator.prometheus.io/name\"
label, set to the name of the Prometheus object. * \"operator.prometheus.io/shard\"
label, set to the shard number of the Prometheus object. * \"kubectl.kubernetes.io/default-container\"
annotation, set to \"prometheus\"."
properties:
annotations:
additionalProperties:
@ -4271,11 +4357,11 @@ spec:
authorization:
description: "Authorization section for the URL. \n It requires
Prometheus >= v2.26.0. \n Cannot be set at the same time as
`sigv4`, `basicAuth`, or `oauth2`."
`sigv4`, `basicAuth`, `oauth2`, or `azureAd`."
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace
that contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -4295,16 +4381,44 @@ spec:
x-kubernetes-map-type: atomic
credentialsFile:
description: File to read a secret from, mutually exclusive
with Credentials (from SafeAuthorization)
with `credentials`.
type: string
type:
description: Set the authentication type. Defaults to Bearer,
Basic will cause an error
description: "Defines the authentication type. The value
is case-insensitive. \n \"Basic\" is not a supported value.
\n Default: \"Bearer\""
type: string
type: object
azureAd:
description: "AzureAD for the URL. \n It requires Prometheus
>= v2.45.0. \n Cannot be set at the same time as `authorization`,
`basicAuth`, `oauth2`, or `sigv4`."
properties:
cloud:
description: The Azure Cloud. Options are 'AzurePublic',
'AzureChina', or 'AzureGovernment'.
enum:
- AzureChina
- AzureGovernment
- AzurePublic
type: string
managedIdentity:
description: ManagedIdentity defines the Azure User-assigned
Managed identity.
properties:
clientId:
description: The client id
type: string
required:
- clientId
type: object
required:
- managedIdentity
type: object
basicAuth:
description: "BasicAuth configuration for the URL. \n Cannot
be set at the same time as `sigv4`, `authorization`, or `oauth2`."
be set at the same time as `sigv4`, `authorization`, `oauth2`,
or `azureAd`."
properties:
password:
description: The secret in the service monitor namespace
@ -4348,8 +4462,8 @@ spec:
x-kubernetes-map-type: atomic
type: object
bearerToken:
description: "*Warning: this field shouldn't used because the
token value appears in clear-text. Prefer using `authorization`.*
description: "*Warning: this field shouldn't be used because
the token value appears in clear-text. Prefer using `authorization`.*
\n *Deprecated: this will be removed in a future release.*"
type: string
bearerTokenFile:
@ -4370,12 +4484,12 @@ spec:
metadata to the remote storage.
properties:
send:
description: Whether metric metadata is sent to the remote
storage or not.
description: Defines whether metric metadata is sent to
the remote storage or not.
type: boolean
sendInterval:
description: How frequently metric metadata is sent to the
remote storage.
description: Defines how frequently metric metadata is sent
to the remote storage.
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
type: object
@ -4388,7 +4502,7 @@ spec:
oauth2:
description: "OAuth2 configuration for the URL. \n It requires
Prometheus >= v2.27.0. \n Cannot be set at the same time as
`sigv4`, `authorization`, or `basicAuth`."
`sigv4`, `authorization`, `basicAuth`, or `azureAd`."
properties:
clientId:
description: The secret or configmap containing the OAuth2
@ -4535,8 +4649,8 @@ spec:
sigv4:
description: "Sigv4 allows to configures AWS's Signature Verification
4 for the URL. \n It requires Prometheus >= v2.26.0. \n Cannot
be set at the same time as `authorization`, `basicAuth`, or
`oauth2`."
be set at the same time as `authorization`, `basicAuth`, `oauth2`,
or `azureAd`."
properties:
accessKey:
description: AccessKey is the AWS API key. If not specified,
@ -4726,16 +4840,16 @@ spec:
writeRelabelConfigs:
description: The list of remote write relabel configurations.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of the
label set for targets, alerts, scraped samples and remote
write samples. \n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching.
Default is 'replace'. uppercase and lowercase actions
require Prometheus >= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require
Prometheus >= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -4761,28 +4875,29 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: "Modulus to take of the hash of the source
label values. \n Only applicable when the action is
`HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
description: "Replacement value against which a Replace
action is performed if the regular expression matches.
\n Regex capture groups are available."
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
Separator and matched against the configured regular
expression.
items:
description: LabelName is a valid Prometheus label name
which may only contain ASCII letters, numbers, as
@ -4791,9 +4906,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
description: "Label to which the resulting string is written
in a replacement. \n It is mandatory for `Replace`,
`HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and
`DropEqual` actions. \n Regex capture groups are available."
type: string
type: object
type: array
@ -4868,6 +4984,12 @@ spec:
is still true, but the server serves requests under a different
route prefix. For example for use with `kubectl proxy`."
type: string
sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped
samples that will be accepted. Only valid in Prometheus versions
2.45.0 and newer.
format: int64
type: integer
scrapeConfigNamespaceSelector:
description: Namespaces to match for ScrapeConfig discovery. An empty
label selector matches all namespaces. A null label selector matches
@ -5076,7 +5198,8 @@ spec:
in a file on the node should be used. The profile must be
preconfigured on the node to work. Must be a descending
path, relative to the kubelet's configured seccomp profile
location. Must only be set if type is "Localhost".
location. Must be set if type is "Localhost". Must NOT be
set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -5140,14 +5263,11 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is alpha-level
and will only be honored by components that enable the WindowsHostProcessContainers
feature flag. Setting this field without the feature flag
will result in errors when validating the Pod. All of a
Pod's containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess containers
and non-HostProcess containers). In addition, if HostProcess
is true then HostNetwork must also be set to true.
be run as a 'Host Process' container. All of a Pod's containers
must have the same effective HostProcess value (it is not
allowed to have a mix of HostProcess containers and non-HostProcess
containers). In addition, if HostProcess is true then HostNetwork
must also be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -5842,6 +5962,51 @@ spec:
items:
type: string
type: array
allocatedResourceStatuses:
additionalProperties:
description: When a controller receives persistentvolume
claim update with ClaimResourceStatus for a resource
that it does not recognizes, then it should ignore
that update and let other controllers handle it.
type: string
description: "allocatedResourceStatuses stores status
of resource being resized for the given PVC. Key names
follow standard Kubernetes label syntax. Valid values
are either: * Un-prefixed keys: - storage - the capacity
of the volume. * Custom resources must use implementation-defined
prefixed names such as \"example.com/my-custom-resource\"
Apart from above values - keys that are unprefixed or
have kubernetes.io prefix are considered reserved and
hence may not be used. \n ClaimResourceStatus can be
in any of following states: - ControllerResizeInProgress:
State set when resize controller starts resizing the
volume in control-plane. - ControllerResizeFailed: State
set when resize has failed in resize controller with
a terminal error. - NodeResizePending: State set when
resize controller has finished resizing the volume but
further resizing of volume is needed on the node. -
NodeResizeInProgress: State set when kubelet starts
resizing the volume. - NodeResizeFailed: State set when
resizing has failed in kubelet with a terminal error.
Transient errors don't set NodeResizeFailed. For example:
if expanding a PVC for more capacity - this field can
be one of the following states: - pvc.status.allocatedResourceStatus['storage']
= \"ControllerResizeInProgress\" - pvc.status.allocatedResourceStatus['storage']
= \"ControllerResizeFailed\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizePending\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizeInProgress\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizeFailed\" When this field is not set, it
means that no resize operation is in progress for the
given PVC. \n A controller that receives PVC update
with previously unknown resourceName or ClaimResourceStatus
should ignore the update for the purpose it was designed.
For example - a controller that only is responsible
for resizing capacity of the volume, should ignore PVC
updates that change other valid resources associated
with PVC. \n This is an alpha field and requires enabling
RecoverVolumeExpansionFailure feature."
type: object
x-kubernetes-map-type: granular
allocatedResources:
additionalProperties:
anyOf:
@ -5849,19 +6014,31 @@ spec:
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: allocatedResources is the storage resource
within AllocatedResources tracks the capacity allocated
to a PVC. It may be larger than the actual capacity
when a volume expansion operation is requested. For
storage quota, the larger value from allocatedResources
and PVC.spec.resources is used. If allocatedResources
is not set, PVC.spec.resources alone is used for quota
calculation. If a volume expansion capacity request
is lowered, allocatedResources is only lowered if there
are no expansion operations in progress and if the actual
volume capacity is equal or lower than the requested
capacity. This is an alpha field and requires enabling
RecoverVolumeExpansionFailure feature.
description: "allocatedResources tracks the resources
allocated to a PVC including its capacity. Key names
follow standard Kubernetes label syntax. Valid values
are either: * Un-prefixed keys: - storage - the capacity
of the volume. * Custom resources must use implementation-defined
prefixed names such as \"example.com/my-custom-resource\"
Apart from above values - keys that are unprefixed or
have kubernetes.io prefix are considered reserved and
hence may not be used. \n Capacity reported here may
be larger than the actual capacity when a volume expansion
operation is requested. For storage quota, the larger
value from allocatedResources and PVC.spec.resources
is used. If allocatedResources is not set, PVC.spec.resources
alone is used for quota calculation. If a volume expansion
capacity request is lowered, allocatedResources is only
lowered if there are no expansion operations in progress
and if the actual volume capacity is equal or lower
than the requested capacity. \n A controller that receives
PVC update with previously unknown resourceName should
ignore the update for the purpose it was designed. For
example - a controller that only is responsible for
resizing capacity of the volume, should ignore PVC updates
that change other valid resources associated with PVC.
\n This is an alpha field and requires enabling RecoverVolumeExpansionFailure
feature."
type: object
capacity:
additionalProperties:
@ -5916,16 +6093,15 @@ spec:
phase:
description: phase represents the current phase of PersistentVolumeClaim.
type: string
resizeStatus:
description: resizeStatus stores status of resize operation.
ResizeStatus is not set by default but when expansion
is complete resizeStatus is set to empty string by resize
controller or kubelet. This is an alpha field and requires
enabling RecoverVolumeExpansionFailure feature.
type: string
type: object
type: object
type: object
targetLimit:
description: TargetLimit defines a limit on the number of scraped
targets that will be accepted. Only valid in Prometheus versions
2.45.0 and newer.
format: int64
type: integer
tolerations:
description: Defines the Pods' tolerations if specified.
items:

View File

@ -1,10 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: prometheuses.monitoring.coreos.com
@ -1012,7 +1013,7 @@ spec:
against.
items:
description: AlertmanagerEndpoints defines a selection of a
single Endpoints object containing alertmanager IPs to fire
single Endpoints object containing Alertmanager IPs to fire
alerts against.
properties:
apiVersion:
@ -1020,12 +1021,13 @@ spec:
uses to send alerts. It can be "v1" or "v2".
type: string
authorization:
description: Authorization section for this alertmanager
endpoint
description: "Authorization section for Alertmanager. \n
Cannot be set at the same time as `basicAuth`, `bearerTokenFile`
or `sigv4`."
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace
that contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -1045,13 +1047,15 @@ spec:
type: object
x-kubernetes-map-type: atomic
type:
description: Set the authentication type. Defaults to
Bearer, Basic will cause an error
description: "Defines the authentication type. The value
is case-insensitive. \n \"Basic\" is not a supported
value. \n Default: \"Bearer\""
type: string
type: object
basicAuth:
description: BasicAuth allow an endpoint to authenticate
over basic authentication
description: "BasicAuth configuration for Alertmanager.
\n Cannot be set at the same time as `bearerTokenFile`,
`authorization` or `sigv4`."
properties:
password:
description: The secret in the service monitor namespace
@ -1097,17 +1101,19 @@ spec:
x-kubernetes-map-type: atomic
type: object
bearerTokenFile:
description: BearerTokenFile to read from filesystem to
use when authenticating to Alertmanager.
description: "File to read bearer token for Alertmanager.
\n Cannot be set at the same time as `basicAuth`, `authorization`,
or `sigv4`. \n *Deprecated: this will be removed in a
future release. Prefer using `authorization`.*"
type: string
enableHttp2:
description: Whether to enable HTTP2.
type: boolean
name:
description: Name of Endpoints object in Namespace.
description: Name of the Endpoints object in the namespace.
type: string
namespace:
description: Namespace of Endpoints object.
description: Namespace of the Endpoints object.
type: string
pathPrefix:
description: Prefix for the HTTP path alerts are pushed
@ -1117,18 +1123,80 @@ spec:
anyOf:
- type: integer
- type: string
description: Port the Alertmanager API is exposed on.
description: Port on which the Alertmanager API is exposed.
x-kubernetes-int-or-string: true
scheme:
description: Scheme to use when firing alerts.
type: string
sigv4:
description: "Sigv4 allows to configures AWS's Signature
Verification 4 for the URL. \n It requires Prometheus
>= v2.48.0. \n Cannot be set at the same time as `basicAuth`,
`bearerTokenFile` or `authorization`."
properties:
accessKey:
description: AccessKey is the AWS API key. If not specified,
the environment variable `AWS_ACCESS_KEY_ID` is used.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
profile:
description: Profile is the named AWS profile used to
authenticate.
type: string
region:
description: Region is the AWS region. If blank, the
region from the default credentials chain used.
type: string
roleArn:
description: RoleArn is the named AWS profile used to
authenticate.
type: string
secretKey:
description: SecretKey is the AWS API secret. If not
specified, the environment variable `AWS_SECRET_ACCESS_KEY`
is used.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
timeout:
description: Timeout is a per-target Alertmanager timeout
when pushing alerts.
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
tlsConfig:
description: TLS Config to use for alertmanager connection.
description: TLS Config to use for Alertmanager.
properties:
ca:
description: Certificate authority used when verifying
@ -1288,11 +1356,12 @@ spec:
and use the Pod''s CA certificate and bearer token file at /var/run/secrets/kubernetes.io/serviceaccount/.'
properties:
authorization:
description: Authorization section for accessing apiserver
description: "Authorization section for the API server. \n Cannot
be set at the same time as `basicAuth`, `bearerToken`, or `bearerTokenFile`."
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace that
contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -1312,16 +1381,18 @@ spec:
x-kubernetes-map-type: atomic
credentialsFile:
description: File to read a secret from, mutually exclusive
with Credentials (from SafeAuthorization)
with `credentials`.
type: string
type:
description: Set the authentication type. Defaults to Bearer,
Basic will cause an error
description: "Defines the authentication type. The value is
case-insensitive. \n \"Basic\" is not a supported value.
\n Default: \"Bearer\""
type: string
type: object
basicAuth:
description: BasicAuth allow an endpoint to authenticate over
basic authentication
description: "BasicAuth configuration for the API server. \n Cannot
be set at the same time as `authorization`, `bearerToken`, or
`bearerTokenFile`."
properties:
password:
description: The secret in the service monitor namespace that
@ -1365,17 +1436,22 @@ spec:
x-kubernetes-map-type: atomic
type: object
bearerToken:
description: Bearer token for accessing apiserver.
description: "*Warning: this field shouldn't be used because the
token value appears in clear-text. Prefer using `authorization`.*
\n *Deprecated: this will be removed in a future release.*"
type: string
bearerTokenFile:
description: File to read bearer token for accessing apiserver.
description: "File to read bearer token for accessing apiserver.
\n Cannot be set at the same time as `basicAuth`, `authorization`,
or `bearerToken`. \n *Deprecated: this will be removed in a
future release. Prefer using `authorization`.*"
type: string
host:
description: Host of apiserver. A valid string consisting of a
hostname or IP followed by an optional port number
description: Kubernetes API address consisting of a hostname or
IP address followed by an optional port number.
type: string
tlsConfig:
description: TLS Config to use for accessing apiserver.
description: TLS Config to use for the API server.
properties:
ca:
description: Certificate authority used when verifying server
@ -1524,6 +1600,11 @@ spec:
baseImage:
description: '*Deprecated: use ''spec.image'' instead.*'
type: string
bodySizeLimit:
description: BodySizeLimit defines per-scrape on response body size.
Only valid in Prometheus versions 2.45.0 and newer.
pattern: (^0|([0-9]*[.])?[0-9]+((K|M|G|T|E|P)i?)?B)$
type: string
configMaps:
description: ConfigMaps is a list of ConfigMaps in the same namespace
as the Prometheus object, which shall be mounted into the Prometheus
@ -2369,6 +2450,25 @@ spec:
cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartPolicy:
description: 'RestartPolicy defines the restart behavior of
individual containers in a pod. This field may only be set
for init containers, and the only allowed value is "Always".
For non-init containers or when this field is not specified,
the restart behavior is defined by the Pod''s restart policy
and the container type. Setting the RestartPolicy as "Always"
for the init container will have the following effect: this
init container will be continually restarted on exit until
all regular containers have terminated. Once all regular containers
have completed, all init containers with restartPolicy "Always"
will be shut down. This lifecycle differs from normal init
containers and is often referred to as a "sidecar" container.
Although this init container still starts in the init container
sequence, it does not wait for the container to complete before
proceeding to the next init container. Instead, the next init
container starts immediately after this init container is
started, or after any startupProbe has successfully completed.'
type: string
securityContext:
description: 'SecurityContext defines the security options the
container should be run with. If set, the fields of SecurityContext
@ -2490,8 +2590,8 @@ spec:
in a file on the node should be used. The profile
must be preconfigured on the node to work. Must be
a descending path, relative to the kubelet's configured
seccomp profile location. Must only be set if type
is "Localhost".
seccomp profile location. Must be set if type is "Localhost".
Must NOT be set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -2524,16 +2624,12 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is
alpha-level and will only be honored by components
that enable the WindowsHostProcessContainers feature
flag. Setting this field without the feature flag
will result in errors when validating the Pod. All
of a Pod's containers must have the same effective
HostProcess value (it is not allowed to have a mix
of HostProcess containers and non-HostProcess containers). In
addition, if HostProcess is true then HostNetwork
must also be set to true.
be run as a 'Host Process' container. All of a Pod's
containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess
containers and non-HostProcess containers). In addition,
if HostProcess is true then HostNetwork must also
be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -2849,6 +2945,15 @@ spec:
v2.28.0."
pattern: (^0|([0-9]*[.])?[0-9]+((K|M|G|T|E|P)i?)?B)$
type: string
enforcedKeepDroppedTargets:
description: "When defined, enforcedKeepDroppedTargets specifies a
global limit on the number of targets dropped by relabeling that
will be kept in memory. The value overrides any `spec.keepDroppedTargets`
set by ServiceMonitor, PodMonitor, Probe objects unless `spec.keepDroppedTargets`
is greater than zero and less than `spec.enforcedKeepDroppedTargets`.
\n It requires Prometheus >= v2.47.0."
format: int64
type: integer
enforcedLabelLimit:
description: "When defined, enforcedLabelLimit specifies a global
limit on the number of labels per sample. The value overrides any
@ -2942,6 +3047,7 @@ spec:
- servicemonitors
- podmonitors
- probes
- scrapeconfigs
type: string
required:
- namespace
@ -2954,9 +3060,11 @@ spec:
effective.
properties:
maxSize:
description: Maximum number of exemplars stored in memory for
all series. If not set, Prometheus uses its default value. A
value of zero or less than zero disables the storage.
description: "Maximum number of exemplars stored in memory for
all series. \n exemplar-storage itself must be enabled using
the `spec.enableFeature` option for exemplars to be scraped
in the first place. \n If not set, Prometheus uses its default
value. A value of zero or less than zero disables the storage."
format: int64
type: integer
type: object
@ -3007,7 +3115,7 @@ spec:
description: When true, `spec.namespaceSelector` from all PodMonitor,
ServiceMonitor and Probe objects will be ignored. They will only
discover targets within the namespace of the PodMonitor, ServiceMonitor
and Probe objec.
and Probe object.
type: boolean
image:
description: "Container image name for Prometheus. If specified, it
@ -3879,6 +3987,25 @@ spec:
cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartPolicy:
description: 'RestartPolicy defines the restart behavior of
individual containers in a pod. This field may only be set
for init containers, and the only allowed value is "Always".
For non-init containers or when this field is not specified,
the restart behavior is defined by the Pod''s restart policy
and the container type. Setting the RestartPolicy as "Always"
for the init container will have the following effect: this
init container will be continually restarted on exit until
all regular containers have terminated. Once all regular containers
have completed, all init containers with restartPolicy "Always"
will be shut down. This lifecycle differs from normal init
containers and is often referred to as a "sidecar" container.
Although this init container still starts in the init container
sequence, it does not wait for the container to complete before
proceeding to the next init container. Instead, the next init
container starts immediately after this init container is
started, or after any startupProbe has successfully completed.'
type: string
securityContext:
description: 'SecurityContext defines the security options the
container should be run with. If set, the fields of SecurityContext
@ -4000,8 +4127,8 @@ spec:
in a file on the node should be used. The profile
must be preconfigured on the node to work. Must be
a descending path, relative to the kubelet's configured
seccomp profile location. Must only be set if type
is "Localhost".
seccomp profile location. Must be set if type is "Localhost".
Must NOT be set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -4034,16 +4161,12 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is
alpha-level and will only be honored by components
that enable the WindowsHostProcessContainers feature
flag. Setting this field without the feature flag
will result in errors when validating the Pod. All
of a Pod's containers must have the same effective
HostProcess value (it is not allowed to have a mix
of HostProcess containers and non-HostProcess containers). In
addition, if HostProcess is true then HostNetwork
must also be set to true.
be run as a 'Host Process' container. All of a Pod's
containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess
containers and non-HostProcess containers). In addition,
if HostProcess is true then HostNetwork must also
be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -4322,6 +4445,29 @@ spec:
- name
type: object
type: array
keepDroppedTargets:
description: "Per-scrape limit on the number of targets dropped by
relabeling that will be kept in memory. 0 means no limit. \n It
requires Prometheus >= v2.47.0."
format: int64
type: integer
labelLimit:
description: Per-scrape limit on number of labels that will be accepted
for a sample. Only valid in Prometheus versions 2.45.0 and newer.
format: int64
type: integer
labelNameLengthLimit:
description: Per-scrape limit on length of labels name that will be
accepted for a sample. Only valid in Prometheus versions 2.45.0
and newer.
format: int64
type: integer
labelValueLengthLimit:
description: Per-scrape limit on length of labels value that will
be accepted for a sample. Only valid in Prometheus versions 2.45.0
and newer.
format: int64
type: integer
listenLocal:
description: When true, the Prometheus server listens on the loopback
address instead of the Pod IP's address.
@ -4373,8 +4519,17 @@ spec:
for deletion will be performed on the underlying objects.
type: boolean
podMetadata:
description: PodMetadata configures labels and annotations which are
propagated to the Prometheus pods.
description: "PodMetadata configures labels and annotations which
are propagated to the Prometheus pods. \n The following items are
reserved and cannot be overridden: * \"prometheus\" label, set to
the name of the Prometheus object. * \"app.kubernetes.io/instance\"
label, set to the name of the Prometheus object. * \"app.kubernetes.io/managed-by\"
label, set to \"prometheus-operator\". * \"app.kubernetes.io/name\"
label, set to \"prometheus\". * \"app.kubernetes.io/version\" label,
set to the Prometheus version. * \"operator.prometheus.io/name\"
label, set to the name of the Prometheus object. * \"operator.prometheus.io/shard\"
label, set to the shard number of the Prometheus object. * \"kubectl.kubernetes.io/default-container\"
annotation, set to \"prometheus\"."
properties:
annotations:
additionalProperties:
@ -4693,8 +4848,8 @@ spec:
`basicAuth`, or `oauth2`."
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace
that contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -4714,11 +4869,12 @@ spec:
x-kubernetes-map-type: atomic
credentialsFile:
description: File to read a secret from, mutually exclusive
with Credentials (from SafeAuthorization)
with `credentials`.
type: string
type:
description: Set the authentication type. Defaults to Bearer,
Basic will cause an error
description: "Defines the authentication type. The value
is case-insensitive. \n \"Basic\" is not a supported value.
\n Default: \"Bearer\""
type: string
type: object
basicAuth:
@ -4767,13 +4923,13 @@ spec:
x-kubernetes-map-type: atomic
type: object
bearerToken:
description: "*Warning: this field shouldn't used because the
token value appears in clear-text. Prefer using `authorization`.*
description: "*Warning: this field shouldn't be used because
the token value appears in clear-text. Prefer using `authorization`.*
\n *Deprecated: this will be removed in a future release.*"
type: string
bearerTokenFile:
description: "File from which to read bearer token for the URL.
\n *Deprecated: this will be removed in a future release.
description: "File from which to read the bearer token for the
URL. \n *Deprecated: this will be removed in a future release.
Prefer using `authorization`.*"
type: string
filterExternalLabels:
@ -5051,11 +5207,11 @@ spec:
authorization:
description: "Authorization section for the URL. \n It requires
Prometheus >= v2.26.0. \n Cannot be set at the same time as
`sigv4`, `basicAuth`, or `oauth2`."
`sigv4`, `basicAuth`, `oauth2`, or `azureAd`."
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace
that contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -5075,16 +5231,44 @@ spec:
x-kubernetes-map-type: atomic
credentialsFile:
description: File to read a secret from, mutually exclusive
with Credentials (from SafeAuthorization)
with `credentials`.
type: string
type:
description: Set the authentication type. Defaults to Bearer,
Basic will cause an error
description: "Defines the authentication type. The value
is case-insensitive. \n \"Basic\" is not a supported value.
\n Default: \"Bearer\""
type: string
type: object
azureAd:
description: "AzureAD for the URL. \n It requires Prometheus
>= v2.45.0. \n Cannot be set at the same time as `authorization`,
`basicAuth`, `oauth2`, or `sigv4`."
properties:
cloud:
description: The Azure Cloud. Options are 'AzurePublic',
'AzureChina', or 'AzureGovernment'.
enum:
- AzureChina
- AzureGovernment
- AzurePublic
type: string
managedIdentity:
description: ManagedIdentity defines the Azure User-assigned
Managed identity.
properties:
clientId:
description: The client id
type: string
required:
- clientId
type: object
required:
- managedIdentity
type: object
basicAuth:
description: "BasicAuth configuration for the URL. \n Cannot
be set at the same time as `sigv4`, `authorization`, or `oauth2`."
be set at the same time as `sigv4`, `authorization`, `oauth2`,
or `azureAd`."
properties:
password:
description: The secret in the service monitor namespace
@ -5128,8 +5312,8 @@ spec:
x-kubernetes-map-type: atomic
type: object
bearerToken:
description: "*Warning: this field shouldn't used because the
token value appears in clear-text. Prefer using `authorization`.*
description: "*Warning: this field shouldn't be used because
the token value appears in clear-text. Prefer using `authorization`.*
\n *Deprecated: this will be removed in a future release.*"
type: string
bearerTokenFile:
@ -5150,12 +5334,12 @@ spec:
metadata to the remote storage.
properties:
send:
description: Whether metric metadata is sent to the remote
storage or not.
description: Defines whether metric metadata is sent to
the remote storage or not.
type: boolean
sendInterval:
description: How frequently metric metadata is sent to the
remote storage.
description: Defines how frequently metric metadata is sent
to the remote storage.
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
type: object
@ -5168,7 +5352,7 @@ spec:
oauth2:
description: "OAuth2 configuration for the URL. \n It requires
Prometheus >= v2.27.0. \n Cannot be set at the same time as
`sigv4`, `authorization`, or `basicAuth`."
`sigv4`, `authorization`, `basicAuth`, or `azureAd`."
properties:
clientId:
description: The secret or configmap containing the OAuth2
@ -5315,8 +5499,8 @@ spec:
sigv4:
description: "Sigv4 allows to configures AWS's Signature Verification
4 for the URL. \n It requires Prometheus >= v2.26.0. \n Cannot
be set at the same time as `authorization`, `basicAuth`, or
`oauth2`."
be set at the same time as `authorization`, `basicAuth`, `oauth2`,
or `azureAd`."
properties:
accessKey:
description: AccessKey is the AWS API key. If not specified,
@ -5506,16 +5690,16 @@ spec:
writeRelabelConfigs:
description: The list of remote write relabel configurations.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of the
label set for targets, alerts, scraped samples and remote
write samples. \n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching.
Default is 'replace'. uppercase and lowercase actions
require Prometheus >= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require
Prometheus >= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -5541,28 +5725,29 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: "Modulus to take of the hash of the source
label values. \n Only applicable when the action is
`HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
description: "Replacement value against which a Replace
action is performed if the regular expression matches.
\n Regex capture groups are available."
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
Separator and matched against the configured regular
expression.
items:
description: LabelName is a valid Prometheus label name
which may only contain ASCII letters, numbers, as
@ -5571,9 +5756,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
description: "Label to which the resulting string is written
in a replacement. \n It is mandatory for `Replace`,
`HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and
`DropEqual` actions. \n Regex capture groups are available."
type: string
type: object
type: array
@ -5755,12 +5941,14 @@ spec:
description: Defines the configuration of the Prometheus rules' engine.
properties:
alert:
description: /--rules.alert.*/ command-line arguments
description: "Defines the parameters of the Prometheus rules'
engine. \n Any update to these parameters trigger a restart
of the pods."
properties:
forGracePeriod:
description: Minimum duration between alert and restored 'for'
state. This is maintained only for alerts with configured
'for' time greater than grace period.
description: "Minimum duration between alert and restored
'for' state. \n This is maintained only for alerts with
a configured 'for' time greater than the grace period."
type: string
forOutageTolerance:
description: Max time to tolerate prometheus outage for restoring
@ -5772,6 +5960,12 @@ spec:
type: string
type: object
type: object
sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped
samples that will be accepted. Only valid in Prometheus versions
2.45.0 and newer.
format: int64
type: integer
scrapeConfigNamespaceSelector:
description: Namespaces to match for ScrapeConfig discovery. An empty
label selector matches all namespaces. A null label selector matches
@ -5980,7 +6174,8 @@ spec:
in a file on the node should be used. The profile must be
preconfigured on the node to work. Must be a descending
path, relative to the kubelet's configured seccomp profile
location. Must only be set if type is "Localhost".
location. Must be set if type is "Localhost". Must NOT be
set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -6044,14 +6239,11 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is alpha-level
and will only be honored by components that enable the WindowsHostProcessContainers
feature flag. Setting this field without the feature flag
will result in errors when validating the Pod. All of a
Pod's containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess containers
and non-HostProcess containers). In addition, if HostProcess
is true then HostNetwork must also be set to true.
be run as a 'Host Process' container. All of a Pod's containers
must have the same effective HostProcess value (it is not
allowed to have a mix of HostProcess containers and non-HostProcess
containers). In addition, if HostProcess is true then HostNetwork
must also be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -6750,6 +6942,51 @@ spec:
items:
type: string
type: array
allocatedResourceStatuses:
additionalProperties:
description: When a controller receives persistentvolume
claim update with ClaimResourceStatus for a resource
that it does not recognizes, then it should ignore
that update and let other controllers handle it.
type: string
description: "allocatedResourceStatuses stores status
of resource being resized for the given PVC. Key names
follow standard Kubernetes label syntax. Valid values
are either: * Un-prefixed keys: - storage - the capacity
of the volume. * Custom resources must use implementation-defined
prefixed names such as \"example.com/my-custom-resource\"
Apart from above values - keys that are unprefixed or
have kubernetes.io prefix are considered reserved and
hence may not be used. \n ClaimResourceStatus can be
in any of following states: - ControllerResizeInProgress:
State set when resize controller starts resizing the
volume in control-plane. - ControllerResizeFailed: State
set when resize has failed in resize controller with
a terminal error. - NodeResizePending: State set when
resize controller has finished resizing the volume but
further resizing of volume is needed on the node. -
NodeResizeInProgress: State set when kubelet starts
resizing the volume. - NodeResizeFailed: State set when
resizing has failed in kubelet with a terminal error.
Transient errors don't set NodeResizeFailed. For example:
if expanding a PVC for more capacity - this field can
be one of the following states: - pvc.status.allocatedResourceStatus['storage']
= \"ControllerResizeInProgress\" - pvc.status.allocatedResourceStatus['storage']
= \"ControllerResizeFailed\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizePending\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizeInProgress\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizeFailed\" When this field is not set, it
means that no resize operation is in progress for the
given PVC. \n A controller that receives PVC update
with previously unknown resourceName or ClaimResourceStatus
should ignore the update for the purpose it was designed.
For example - a controller that only is responsible
for resizing capacity of the volume, should ignore PVC
updates that change other valid resources associated
with PVC. \n This is an alpha field and requires enabling
RecoverVolumeExpansionFailure feature."
type: object
x-kubernetes-map-type: granular
allocatedResources:
additionalProperties:
anyOf:
@ -6757,19 +6994,31 @@ spec:
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: allocatedResources is the storage resource
within AllocatedResources tracks the capacity allocated
to a PVC. It may be larger than the actual capacity
when a volume expansion operation is requested. For
storage quota, the larger value from allocatedResources
and PVC.spec.resources is used. If allocatedResources
is not set, PVC.spec.resources alone is used for quota
calculation. If a volume expansion capacity request
is lowered, allocatedResources is only lowered if there
are no expansion operations in progress and if the actual
volume capacity is equal or lower than the requested
capacity. This is an alpha field and requires enabling
RecoverVolumeExpansionFailure feature.
description: "allocatedResources tracks the resources
allocated to a PVC including its capacity. Key names
follow standard Kubernetes label syntax. Valid values
are either: * Un-prefixed keys: - storage - the capacity
of the volume. * Custom resources must use implementation-defined
prefixed names such as \"example.com/my-custom-resource\"
Apart from above values - keys that are unprefixed or
have kubernetes.io prefix are considered reserved and
hence may not be used. \n Capacity reported here may
be larger than the actual capacity when a volume expansion
operation is requested. For storage quota, the larger
value from allocatedResources and PVC.spec.resources
is used. If allocatedResources is not set, PVC.spec.resources
alone is used for quota calculation. If a volume expansion
capacity request is lowered, allocatedResources is only
lowered if there are no expansion operations in progress
and if the actual volume capacity is equal or lower
than the requested capacity. \n A controller that receives
PVC update with previously unknown resourceName should
ignore the update for the purpose it was designed. For
example - a controller that only is responsible for
resizing capacity of the volume, should ignore PVC updates
that change other valid resources associated with PVC.
\n This is an alpha field and requires enabling RecoverVolumeExpansionFailure
feature."
type: object
capacity:
additionalProperties:
@ -6824,13 +7073,6 @@ spec:
phase:
description: phase represents the current phase of PersistentVolumeClaim.
type: string
resizeStatus:
description: resizeStatus stores status of resize operation.
ResizeStatus is not set by default but when expansion
is complete resizeStatus is set to empty string by resize
controller or kubelet. This is an alpha field and requires
enabling RecoverVolumeExpansionFailure feature.
type: string
type: object
type: object
type: object
@ -6838,6 +7080,12 @@ spec:
description: '*Deprecated: use ''spec.image'' instead. The image''s
tag can be specified as part of the image name.*'
type: string
targetLimit:
description: TargetLimit defines a limit on the number of scraped
targets that will be accepted. Only valid in Prometheus versions
2.45.0 and newer.
format: int64
type: integer
thanos:
description: "Defines the configuration of the optional Thanos sidecar.
\n This section is experimental, it may change significantly without
@ -7635,12 +7883,12 @@ spec:
database (TSDB).
properties:
outOfOrderTimeWindow:
description: Configures how old an out-of-order/out-of-bounds
sample can be w.r.t. the TSDB max time. An out-of-order/out-of-bounds
description: "Configures how old an out-of-order/out-of-bounds
sample can be with respect to the TSDB max time. \n An out-of-order/out-of-bounds
sample is ingested into the TSDB as long as the timestamp of
the sample is >= (TSDB.MaxTime - outOfOrderTimeWindow). Out
of order ingestion is an experimental feature and requires Prometheus
>= v2.39.0.
the sample is >= (TSDB.MaxTime - outOfOrderTimeWindow). \n Out
of order ingestion is an experimental feature. \n It requires
Prometheus >= v2.39.0."
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
type: object

View File

@ -1,10 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: prometheusrules.monitoring.coreos.com

View File

@ -1,10 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: servicemonitors.monitoring.coreos.com
@ -61,8 +62,8 @@ spec:
description: Authorization section for this endpoint
properties:
credentials:
description: The secret's key that contains the credentials
of the request
description: Selects a key of a Secret in the namespace
that contains the credentials for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -81,8 +82,9 @@ spec:
type: object
x-kubernetes-map-type: atomic
type:
description: Set the authentication type. Defaults to Bearer,
Basic will cause an error
description: "Defines the authentication type. The value
is case-insensitive. \n \"Basic\" is not a supported value.
\n Default: \"Bearer\""
type: string
type: object
basicAuth:
@ -182,16 +184,16 @@ spec:
description: MetricRelabelConfigs to apply to samples before
ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of the
label set for targets, alerts, scraped samples and remote
write samples. \n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching.
Default is 'replace'. uppercase and lowercase actions
require Prometheus >= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require
Prometheus >= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -217,28 +219,29 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: "Modulus to take of the hash of the source
label values. \n Only applicable when the action is
`HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
description: "Replacement value against which a Replace
action is performed if the regular expression matches.
\n Regex capture groups are available."
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
Separator and matched against the configured regular
expression.
items:
description: LabelName is a valid Prometheus label name
which may only contain ASCII letters, numbers, as
@ -247,9 +250,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
description: "Label to which the resulting string is written
in a replacement. \n It is mandatory for `Replace`,
`HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and
`DropEqual` actions. \n Regex capture groups are available."
type: string
type: object
type: array
@ -366,16 +370,16 @@ spec:
is available via the `__tmp_prometheus_job_name` label. More
info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
description: "RelabelConfig allows dynamic rewriting of the
label set for targets, alerts, scraped samples and remote
write samples. \n More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config"
properties:
action:
default: replace
description: Action to perform based on regex matching.
Default is 'replace'. uppercase and lowercase actions
require Prometheus >= 2.36.
description: "Action to perform based on the regex matching.
\n `Uppercase` and `Lowercase` actions require Prometheus
>= v2.36.0. `DropEqual` and `KeepEqual` actions require
Prometheus >= v2.41.0. \n Default: \"Replace\""
enum:
- replace
- Replace
@ -401,28 +405,29 @@ spec:
- DropEqual
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: "Modulus to take of the hash of the source
label values. \n Only applicable when the action is
`HashMod`."
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
value is matched.
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
description: "Replacement value against which a Replace
action is performed if the regular expression matches.
\n Regex capture groups are available."
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
description: Separator is the string between concatenated
SourceLabels.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
Separator and matched against the configured regular
expression.
items:
description: LabelName is a valid Prometheus label name
which may only contain ASCII letters, numbers, as
@ -431,9 +436,10 @@ spec:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
description: "Label to which the resulting string is written
in a replacement. \n It is mandatory for `Replace`,
`HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and
`DropEqual` actions. \n Regex capture groups are available."
type: string
type: object
type: array
@ -602,6 +608,12 @@ spec:
the given Service, the `job` label of the metrics defaults to the
name of the Kubernetes Service."
type: string
keepDroppedTargets:
description: "Per-scrape limit on the number of targets dropped by
relabeling that will be kept in memory. 0 means no limit. \n It
requires Prometheus >= v2.47.0."
format: int64
type: integer
labelLimit:
description: Per-scrape limit on number of labels that will be accepted
for a sample. Only valid in Prometheus versions 2.27.0 and newer.

View File

@ -1,10 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: thanosrulers.monitoring.coreos.com
@ -1823,6 +1824,25 @@ spec:
cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartPolicy:
description: 'RestartPolicy defines the restart behavior of
individual containers in a pod. This field may only be set
for init containers, and the only allowed value is "Always".
For non-init containers or when this field is not specified,
the restart behavior is defined by the Pod''s restart policy
and the container type. Setting the RestartPolicy as "Always"
for the init container will have the following effect: this
init container will be continually restarted on exit until
all regular containers have terminated. Once all regular containers
have completed, all init containers with restartPolicy "Always"
will be shut down. This lifecycle differs from normal init
containers and is often referred to as a "sidecar" container.
Although this init container still starts in the init container
sequence, it does not wait for the container to complete before
proceeding to the next init container. Instead, the next init
container starts immediately after this init container is
started, or after any startupProbe has successfully completed.'
type: string
securityContext:
description: 'SecurityContext defines the security options the
container should be run with. If set, the fields of SecurityContext
@ -1944,8 +1964,8 @@ spec:
in a file on the node should be used. The profile
must be preconfigured on the node to work. Must be
a descending path, relative to the kubelet's configured
seccomp profile location. Must only be set if type
is "Localhost".
seccomp profile location. Must be set if type is "Localhost".
Must NOT be set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -1978,16 +1998,12 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is
alpha-level and will only be honored by components
that enable the WindowsHostProcessContainers feature
flag. Setting this field without the feature flag
will result in errors when validating the Pod. All
of a Pod's containers must have the same effective
HostProcess value (it is not allowed to have a mix
of HostProcess containers and non-HostProcess containers). In
addition, if HostProcess is true then HostNetwork
must also be set to true.
be run as a 'Host Process' container. All of a Pod's
containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess
containers and non-HostProcess containers). In addition,
if HostProcess is true then HostNetwork must also
be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -2306,6 +2322,7 @@ spec:
- servicemonitors
- podmonitors
- probes
- scrapeconfigs
type: string
required:
- namespace
@ -3327,6 +3344,25 @@ spec:
cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartPolicy:
description: 'RestartPolicy defines the restart behavior of
individual containers in a pod. This field may only be set
for init containers, and the only allowed value is "Always".
For non-init containers or when this field is not specified,
the restart behavior is defined by the Pod''s restart policy
and the container type. Setting the RestartPolicy as "Always"
for the init container will have the following effect: this
init container will be continually restarted on exit until
all regular containers have terminated. Once all regular containers
have completed, all init containers with restartPolicy "Always"
will be shut down. This lifecycle differs from normal init
containers and is often referred to as a "sidecar" container.
Although this init container still starts in the init container
sequence, it does not wait for the container to complete before
proceeding to the next init container. Instead, the next init
container starts immediately after this init container is
started, or after any startupProbe has successfully completed.'
type: string
securityContext:
description: 'SecurityContext defines the security options the
container should be run with. If set, the fields of SecurityContext
@ -3448,8 +3484,8 @@ spec:
in a file on the node should be used. The profile
must be preconfigured on the node to work. Must be
a descending path, relative to the kubelet's configured
seccomp profile location. Must only be set if type
is "Localhost".
seccomp profile location. Must be set if type is "Localhost".
Must NOT be set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -3482,16 +3518,12 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is
alpha-level and will only be honored by components
that enable the WindowsHostProcessContainers feature
flag. Setting this field without the feature flag
will result in errors when validating the Pod. All
of a Pod's containers must have the same effective
HostProcess value (it is not allowed to have a mix
of HostProcess containers and non-HostProcess containers). In
addition, if HostProcess is true then HostNetwork
must also be set to true.
be run as a 'Host Process' container. All of a Pod's
containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess
containers and non-HostProcess containers). In addition,
if HostProcess is true then HostNetwork must also
be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -3841,8 +3873,14 @@ spec:
for deletion will be performed on the underlying objects.
type: boolean
podMetadata:
description: PodMetadata contains Labels and Annotations gets propagated
to the thanos ruler pods.
description: "PodMetadata configures labels and annotations which
are propagated to the ThanosRuler pods. \n The following items are
reserved and cannot be overridden: * \"app.kubernetes.io/name\"
label, set to \"thanos-ruler\". * \"app.kubernetes.io/managed-by\"
label, set to \"prometheus-operator\". * \"app.kubernetes.io/instance\"
label, set to the name of the ThanosRuler instance. * \"thanos-ruler\"
label, set to the name of the ThanosRuler instance. * \"kubectl.kubernetes.io/default-container\"
annotation, set to \"thanos-ruler\"."
properties:
annotations:
additionalProperties:
@ -4168,7 +4206,8 @@ spec:
in a file on the node should be used. The profile must be
preconfigured on the node to work. Must be a descending
path, relative to the kubelet's configured seccomp profile
location. Must only be set if type is "Localhost".
location. Must be set if type is "Localhost". Must NOT be
set for any other type.
type: string
type:
description: "type indicates which kind of seccomp profile
@ -4232,14 +4271,11 @@ spec:
type: string
hostProcess:
description: HostProcess determines if a container should
be run as a 'Host Process' container. This field is alpha-level
and will only be honored by components that enable the WindowsHostProcessContainers
feature flag. Setting this field without the feature flag
will result in errors when validating the Pod. All of a
Pod's containers must have the same effective HostProcess
value (it is not allowed to have a mix of HostProcess containers
and non-HostProcess containers). In addition, if HostProcess
is true then HostNetwork must also be set to true.
be run as a 'Host Process' container. All of a Pod's containers
must have the same effective HostProcess value (it is not
allowed to have a mix of HostProcess containers and non-HostProcess
containers). In addition, if HostProcess is true then HostNetwork
must also be set to true.
type: boolean
runAsUserName:
description: The UserName in Windows to run the entrypoint
@ -4819,6 +4855,51 @@ spec:
items:
type: string
type: array
allocatedResourceStatuses:
additionalProperties:
description: When a controller receives persistentvolume
claim update with ClaimResourceStatus for a resource
that it does not recognizes, then it should ignore
that update and let other controllers handle it.
type: string
description: "allocatedResourceStatuses stores status
of resource being resized for the given PVC. Key names
follow standard Kubernetes label syntax. Valid values
are either: * Un-prefixed keys: - storage - the capacity
of the volume. * Custom resources must use implementation-defined
prefixed names such as \"example.com/my-custom-resource\"
Apart from above values - keys that are unprefixed or
have kubernetes.io prefix are considered reserved and
hence may not be used. \n ClaimResourceStatus can be
in any of following states: - ControllerResizeInProgress:
State set when resize controller starts resizing the
volume in control-plane. - ControllerResizeFailed: State
set when resize has failed in resize controller with
a terminal error. - NodeResizePending: State set when
resize controller has finished resizing the volume but
further resizing of volume is needed on the node. -
NodeResizeInProgress: State set when kubelet starts
resizing the volume. - NodeResizeFailed: State set when
resizing has failed in kubelet with a terminal error.
Transient errors don't set NodeResizeFailed. For example:
if expanding a PVC for more capacity - this field can
be one of the following states: - pvc.status.allocatedResourceStatus['storage']
= \"ControllerResizeInProgress\" - pvc.status.allocatedResourceStatus['storage']
= \"ControllerResizeFailed\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizePending\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizeInProgress\" - pvc.status.allocatedResourceStatus['storage']
= \"NodeResizeFailed\" When this field is not set, it
means that no resize operation is in progress for the
given PVC. \n A controller that receives PVC update
with previously unknown resourceName or ClaimResourceStatus
should ignore the update for the purpose it was designed.
For example - a controller that only is responsible
for resizing capacity of the volume, should ignore PVC
updates that change other valid resources associated
with PVC. \n This is an alpha field and requires enabling
RecoverVolumeExpansionFailure feature."
type: object
x-kubernetes-map-type: granular
allocatedResources:
additionalProperties:
anyOf:
@ -4826,19 +4907,31 @@ spec:
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: allocatedResources is the storage resource
within AllocatedResources tracks the capacity allocated
to a PVC. It may be larger than the actual capacity
when a volume expansion operation is requested. For
storage quota, the larger value from allocatedResources
and PVC.spec.resources is used. If allocatedResources
is not set, PVC.spec.resources alone is used for quota
calculation. If a volume expansion capacity request
is lowered, allocatedResources is only lowered if there
are no expansion operations in progress and if the actual
volume capacity is equal or lower than the requested
capacity. This is an alpha field and requires enabling
RecoverVolumeExpansionFailure feature.
description: "allocatedResources tracks the resources
allocated to a PVC including its capacity. Key names
follow standard Kubernetes label syntax. Valid values
are either: * Un-prefixed keys: - storage - the capacity
of the volume. * Custom resources must use implementation-defined
prefixed names such as \"example.com/my-custom-resource\"
Apart from above values - keys that are unprefixed or
have kubernetes.io prefix are considered reserved and
hence may not be used. \n Capacity reported here may
be larger than the actual capacity when a volume expansion
operation is requested. For storage quota, the larger
value from allocatedResources and PVC.spec.resources
is used. If allocatedResources is not set, PVC.spec.resources
alone is used for quota calculation. If a volume expansion
capacity request is lowered, allocatedResources is only
lowered if there are no expansion operations in progress
and if the actual volume capacity is equal or lower
than the requested capacity. \n A controller that receives
PVC update with previously unknown resourceName should
ignore the update for the purpose it was designed. For
example - a controller that only is responsible for
resizing capacity of the volume, should ignore PVC updates
that change other valid resources associated with PVC.
\n This is an alpha field and requires enabling RecoverVolumeExpansionFailure
feature."
type: object
capacity:
additionalProperties:
@ -4893,13 +4986,6 @@ spec:
phase:
description: phase represents the current phase of PersistentVolumeClaim.
type: string
resizeStatus:
description: resizeStatus stores status of resize operation.
ResizeStatus is not set by default but when expansion
is complete resizeStatus is set to empty string by resize
controller or kubelet. This is an alpha field and requires
enabling RecoverVolumeExpansionFailure feature.
type: string
type: object
type: object
type: object

View File

@ -6,7 +6,7 @@ annotations:
- name: Upstream Project
url: https://github.com/grafana/grafana
apiVersion: v2
appVersion: 10.0.3
appVersion: 10.1.5
description: The leading tool for querying and visualizing time series and metrics.
home: https://grafana.net
icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png
@ -30,4 +30,4 @@ sources:
- https://github.com/grafana/grafana
- https://github.com/grafana/helm-charts
type: application
version: 6.58.9
version: 7.0.8

View File

@ -46,6 +46,13 @@ You have to add --force to your helm upgrade command as the labels of the chart
This version requires Helm >= 3.1.0.
### To 7.0.0
For consistency with other Helm charts, the `global.image.registry` parameter was renamed
to `global.imageRegistry`. If you were not previously setting `global.image.registry`, no action
is required on upgrade. If you were previously setting `global.image.registry`, you will
need to instead set `global.imageRegistry`.
## Configuration
| Parameter | Description | Default |
@ -53,11 +60,13 @@ This version requires Helm >= 3.1.0.
| `replicas` | Number of nodes | `1` |
| `podDisruptionBudget.minAvailable` | Pod disruption minimum available | `nil` |
| `podDisruptionBudget.maxUnavailable` | Pod disruption maximum unavailable | `nil` |
| `podDisruptionBudget.apiVersion` | Pod disruption apiVersion | `nil` |
| `deploymentStrategy` | Deployment strategy | `{ "type": "RollingUpdate" }` |
| `livenessProbe` | Liveness Probe settings | `{ "httpGet": { "path": "/api/health", "port": 3000 } "initialDelaySeconds": 60, "timeoutSeconds": 30, "failureThreshold": 10 }` |
| `readinessProbe` | Readiness Probe settings | `{ "httpGet": { "path": "/api/health", "port": 3000 } }`|
| `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` |
| `priorityClassName` | Name of Priority Class to assign pods | `nil` |
| `image.registry` | Image registry | `docker.io` |
| `image.repository` | Image repository | `grafana/grafana` |
| `image.tag` | Overrides the Grafana image tag whose default is the chart appVersion (`Must be >= 5.0.0`) | `` |
| `image.sha` | Image sha (optional) | `` |
@ -76,6 +85,7 @@ This version requires Helm >= 3.1.0.
| `service.loadBalancerIP` | IP address to assign to load balancer (if supported) | `nil` |
| `service.loadBalancerSourceRanges` | list of IP CIDRs allowed access to lb (if supported) | `[]` |
| `service.externalIPs` | service external IP addresses | `[]` |
| `service.externalTrafficPolicy` | change the default externalTrafficPolicy | `nil` |
| `headlessService` | Create a headless service | `false` |
| `extraExposePorts` | Additional service ports for sidecar containers| `[]` |
| `hostAliases` | adds rules to the pod's /etc/hosts | `[]` |
@ -85,7 +95,7 @@ This version requires Helm >= 3.1.0.
| `ingress.path` | Ingress accepted path | `/` |
| `ingress.pathType` | Ingress type of path | `Prefix` |
| `ingress.hosts` | Ingress accepted hostnames | `["chart-example.local"]` |
| `ingress.extraPaths` | Ingress extra paths to prepend to every host configuration. Useful when configuring [custom actions with AWS ALB Ingress Controller](https://kubernetes-sigs.github.io/aws-alb-ingress-controller/guide/ingress/annotation/#actions). Requires `ingress.hosts` to have one or more host entries. | `[]` |
| `ingress.extraPaths` | Ingress extra paths to prepend to every host configuration. Useful when configuring [custom actions with AWS ALB Ingress Controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.6/guide/ingress/annotations/#actions). Requires `ingress.hosts` to have one or more host entries. | `[]` |
| `ingress.tls` | Ingress TLS configuration | `[]` |
| `ingress.ingressClassName` | Ingress Class Name. MAY be required for Kubernetes versions >= 1.18 | `""` |
| `resources` | CPU/Memory resource requests/limits | `{}` |
@ -110,6 +120,7 @@ This version requires Helm >= 3.1.0.
| `persistence.inMemory.enabled` | If persistence is not enabled, whether to mount the local storage in-memory to improve performance | `false` |
| `persistence.inMemory.sizeLimit` | SizeLimit for the in-memory local storage | `nil` |
| `initChownData.enabled` | If false, don't reset data ownership at startup | true |
| `initChownData.image.registry` | init-chown-data container image registry | `docker.io` |
| `initChownData.image.repository` | init-chown-data container image repository | `busybox` |
| `initChownData.image.tag` | init-chown-data container image tag | `1.31.1` |
| `initChownData.image.sha` | init-chown-data container image sha (optional)| `""` |
@ -136,6 +147,7 @@ This version requires Helm >= 3.1.0.
| `dashboards` | Dashboards to import | `{}` |
| `dashboardsConfigMaps` | ConfigMaps reference that contains dashboards | `{}` |
| `grafana.ini` | Grafana's primary configuration | `{}` |
| `global.imageRegistry` | Global image pull registry for all images. | `null` |
| `global.imagePullSecrets` | Global image pull secrets (can be templated). Allows either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). | `[]` |
| `ldap.enabled` | Enable LDAP authentication | `false` |
| `ldap.existingSecret` | The name of an existing secret containing the `ldap.toml` file, this must have the key `ldap-toml`. | `""` |
@ -146,7 +158,8 @@ This version requires Helm >= 3.1.0.
| `podLabels` | Pod labels | `{}` |
| `podPortName` | Name of the grafana port on the pod | `grafana` |
| `lifecycleHooks` | Lifecycle hooks for podStart and preStop [Example](https://kubernetes.io/docs/tasks/configure-pod-container/attach-handler-lifecycle-event/#define-poststart-and-prestop-handlers) | `{}` |
| `sidecar.image.repository` | Sidecar image repository | `quay.io/kiwigrid/k8s-sidecar` |
| `sidecar.image.registry` | Sidecar image registry | `quay.io` |
| `sidecar.image.repository` | Sidecar image repository | `kiwigrid/k8s-sidecar` |
| `sidecar.image.tag` | Sidecar image tag | `1.24.6` |
| `sidecar.image.sha` | Sidecar image sha (optional) | `""` |
| `sidecar.imagePullPolicy` | Sidecar image pull policy | `IfNotPresent` |
@ -161,7 +174,7 @@ This version requires Helm >= 3.1.0.
| `sidecar.alerts.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` |
| `sidecar.alerts.reloadURL` | Full url of datasource configuration reload API endpoint, to invoke after a config-map change | `"http://localhost:3000/api/admin/provisioning/alerting/reload"` |
| `sidecar.alerts.skipReload` | Enabling this omits defining the REQ_URL and REQ_METHOD environment variables | `false` |
| `sidecar.alerts.initDatasources` | Set to true to deploy the datasource sidecar as an initContainer in addition to a container. This is needed if skipReload is true, to load any alerts defined at startup time. | `false` |
| `sidecar.alerts.initDatasources` | Set to true to deploy the datasource sidecar as an initContainer. This is needed if skipReload is true, to load any alerts defined at startup time. | `false` |
| `sidecar.alerts.extraMounts` | Additional alerts sidecar volume mounts. | `[]` |
| `sidecar.dashboards.enabled` | Enables the cluster wide search for dashboards and adds/updates/deletes them in grafana | `false` |
| `sidecar.dashboards.SCProvider` | Enables creation of sidecar provider | `true` |
@ -225,14 +238,16 @@ This version requires Helm >= 3.1.0.
| `command` | Define command to be executed by grafana container at startup | `nil` |
| `args` | Define additional args if command is used | `nil` |
| `testFramework.enabled` | Whether to create test-related resources | `true` |
| `testFramework.image` | `test-framework` image repository. | `bats/bats` |
| `testFramework.tag` | `test-framework` image tag. | `v1.4.1` |
| `testFramework.image.registry` | `test-framework` image registry. | `docker.io` |
| `testFramework.image.repository` | `test-framework` image repository. | `bats/bats` |
| `testFramework.image.tag` | `test-framework` image tag. | `v1.4.1` |
| `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` |
| `testFramework.securityContext` | `test-framework` securityContext | `{}` |
| `downloadDashboards.env` | Environment variables to be passed to the `download-dashboards` container | `{}` |
| `downloadDashboards.envFromSecret` | Name of a Kubernetes secret (must be manually created in the same namespace) containing values to be added to the environment. Can be templated | `""` |
| `downloadDashboards.resources` | Resources of `download-dashboards` container | `{}` |
| `downloadDashboardsImage.repository` | Curl docker image repo | `curlimages/curl` |
| `downloadDashboardsImage.registry` | Curl docker image registry | `docker.io` |
| `downloadDashboardsImage.repository` | Curl docker image repository | `curlimages/curl` |
| `downloadDashboardsImage.tag` | Curl docker image tag | `7.73.0` |
| `downloadDashboardsImage.sha` | Curl docker image sha (optional) | `""` |
| `downloadDashboardsImage.pullPolicy` | Curl docker image pull policy | `IfNotPresent` |
@ -249,6 +264,7 @@ This version requires Helm >= 3.1.0.
| `serviceMonitor.metricRelabelings` | MetricRelabelConfigs to apply to samples before ingestion. | `[]` |
| `revisionHistoryLimit` | Number of old ReplicaSets to retain | `10` |
| `imageRenderer.enabled` | Enable the image-renderer deployment & service | `false` |
| `imageRenderer.image.registry` | image-renderer Image registry | `docker.io` |
| `imageRenderer.image.repository` | image-renderer Image repository | `grafana/grafana-image-renderer` |
| `imageRenderer.image.tag` | image-renderer Image tag | `latest` |
| `imageRenderer.image.sha` | image-renderer Image sha (optional) | `""` |
@ -257,6 +273,7 @@ This version requires Helm >= 3.1.0.
| `imageRenderer.envValueFrom` | Environment variables for image-renderer from alternate sources. See the API docs on [EnvVarSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#envvarsource-v1-core) for format details. Can be templated | `{}` |
| `imageRenderer.serviceAccountName` | image-renderer deployment serviceAccountName | `""` |
| `imageRenderer.securityContext` | image-renderer deployment securityContext | `{}` |
| `imageRenderer.podAnnotations ` | image-renderer image-renderer pod annotation | `{}` |
| `imageRenderer.hostAliases` | image-renderer deployment Host Aliases | `[]` |
| `imageRenderer.priorityClassName` | image-renderer deployment priority class | `''` |
| `imageRenderer.service.enabled` | Enable the image-renderer service | `true` |

View File

@ -158,7 +158,9 @@ Return the appropriate apiVersion for Horizontal Pod Autoscaler.
Return the appropriate apiVersion for podDisruptionBudget.
*/}}
{{- define "grafana.podDisruptionBudget.apiVersion" -}}
{{- if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" }}
{{- if $.Values.podDisruptionBudget.apiVersion }}
{{- print $.Values.podDisruptionBudget.apiVersion }}
{{- else if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" }}
{{- print "policy/v1" }}
{{- else }}
{{- print "policy/v1beta1" }}
@ -199,3 +201,27 @@ Formats imagePullSecrets. Input is (dict "root" . "imagePullSecrets" .{specific
{{- end }}
{{- end }}
{{- end }}
{{/*
Checks whether or not the configSecret secret has to be created
*/}}
{{- define "grafana.shouldCreateConfigSecret" -}}
{{- $secretFound := false -}}
{{- range $key, $value := .Values.datasources }}
{{- if hasKey $value "secret" }}
{{- $secretFound = true}}
{{- end }}
{{- end }}
{{- range $key, $value := .Values.notifiers }}
{{- if hasKey $value "secret" }}
{{- $secretFound = true}}
{{- end }}
{{- end }}
{{- range $key, $value := .Values.alerting }}
{{- if (or (hasKey $value "secret") (hasKey $value "secretFile")) }}
{{- $secretFound = true}}
{{- end }}
{{- end }}
{{- $secretFound}}
{{- end -}}

View File

@ -17,15 +17,16 @@ hostAliases:
{{- with .Values.priorityClassName }}
priorityClassName: {{ . }}
{{- end }}
{{- if ( or .Values.persistence.enabled .Values.dashboards .Values.extraInitContainers (and .Values.sidecar.datasources.enabled .Values.sidecar.datasources.initDatasources) (and .Values.sidecar.notifiers.enabled .Values.sidecar.notifiers.initNotifiers)) }}
{{- if ( or .Values.persistence.enabled .Values.dashboards .Values.extraInitContainers (and .Values.sidecar.alerts.enabled .Values.sidecar.alerts.initAlerts) (and .Values.sidecar.datasources.enabled .Values.sidecar.datasources.initDatasources) (and .Values.sidecar.notifiers.enabled .Values.sidecar.notifiers.initNotifiers)) }}
initContainers:
{{- end }}
{{- if ( and .Values.persistence.enabled .Values.initChownData.enabled ) }}
- name: init-chown-data
{{- $registry := .Values.global.imageRegistry | default .Values.initChownData.image.registry -}}
{{- if .Values.initChownData.image.sha }}
image: "{{ .Values.initChownData.image.repository }}:{{ .Values.initChownData.image.tag }}@sha256:{{ .Values.initChownData.image.sha }}"
image: "{{ $registry }}/{{ .Values.initChownData.image.repository }}:{{ .Values.initChownData.image.tag }}@sha256:{{ .Values.initChownData.image.sha }}"
{{- else }}
image: "{{ .Values.initChownData.image.repository }}:{{ .Values.initChownData.image.tag }}"
image: "{{ $registry }}/{{ .Values.initChownData.image.repository }}:{{ .Values.initChownData.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.initChownData.image.pullPolicy }}
{{- with .Values.initChownData.securityContext }}
@ -50,10 +51,11 @@ initContainers:
{{- end }}
{{- if .Values.dashboards }}
- name: download-dashboards
{{- $registry := .Values.global.imageRegistry | default .Values.downloadDashboardsImage.registry -}}
{{- if .Values.downloadDashboardsImage.sha }}
image: "{{ .Values.downloadDashboardsImage.repository }}:{{ .Values.downloadDashboardsImage.tag }}@sha256:{{ .Values.downloadDashboardsImage.sha }}"
image: "{{ $registry }}/{{ .Values.downloadDashboardsImage.repository }}:{{ .Values.downloadDashboardsImage.tag }}@sha256:{{ .Values.downloadDashboardsImage.sha }}"
{{- else }}
image: "{{ .Values.downloadDashboardsImage.repository }}:{{ .Values.downloadDashboardsImage.tag }}"
image: "{{ $registry }}/{{ .Values.downloadDashboardsImage.repository }}:{{ .Values.downloadDashboardsImage.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }}
command: ["/bin/sh"]
@ -96,12 +98,86 @@ initContainers:
readOnly: {{ .readOnly }}
{{- end }}
{{- end }}
{{- if and .Values.sidecar.alerts.enabled .Values.sidecar.alerts.initAlerts }}
- name: {{ include "grafana.name" . }}-init-sc-alerts
{{- $registry := .Values.global.imageRegistry | default .Values.sidecar.image.registry -}}
{{- if .Values.sidecar.image.sha }}
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
{{- range $key, $value := .Values.sidecar.alerts.env }}
- name: "{{ $key }}"
value: "{{ $value }}"
{{- end }}
{{- if .Values.sidecar.alerts.ignoreAlreadyProcessed }}
- name: IGNORE_ALREADY_PROCESSED
value: "true"
{{- end }}
- name: METHOD
value: "LIST"
- name: LABEL
value: "{{ .Values.sidecar.alerts.label }}"
{{- with .Values.sidecar.alerts.labelValue }}
- name: LABEL_VALUE
value: {{ quote . }}
{{- end }}
{{- if or .Values.sidecar.logLevel .Values.sidecar.alerts.logLevel }}
- name: LOG_LEVEL
value: {{ default .Values.sidecar.logLevel .Values.sidecar.alerts.logLevel }}
{{- end }}
- name: FOLDER
value: "/etc/grafana/provisioning/alerting"
- name: RESOURCE
value: {{ quote .Values.sidecar.alerts.resource }}
{{- with .Values.sidecar.enableUniqueFilenames }}
- name: UNIQUE_FILENAMES
value: "{{ . }}"
{{- end }}
{{- with .Values.sidecar.alerts.searchNamespace }}
- name: NAMESPACE
value: {{ . | join "," | quote }}
{{- end }}
{{- with .Values.sidecar.alerts.skipTlsVerify }}
- name: SKIP_TLS_VERIFY
value: {{ quote . }}
{{- end }}
{{- with .Values.sidecar.alerts.script }}
- name: SCRIPT
value: {{ quote . }}
{{- end }}
{{- with .Values.sidecar.livenessProbe }}
livenessProbe:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.sidecar.readinessProbe }}
readinessProbe:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.sidecar.resources }}
resources:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.sidecar.securityContext }}
securityContext:
{{- toYaml . | nindent 6 }}
{{- end }}
volumeMounts:
- name: sc-alerts-volume
mountPath: "/etc/grafana/provisioning/alerting"
{{- with .Values.sidecar.alerts.extraMounts }}
{{- toYaml . | trim | nindent 6 }}
{{- end }}
{{- end }}
{{- if and .Values.sidecar.datasources.enabled .Values.sidecar.datasources.initDatasources }}
- name: {{ include "grafana.name" . }}-init-sc-datasources
{{- $registry := .Values.global.imageRegistry | default .Values.sidecar.image.registry -}}
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@ -155,10 +231,11 @@ initContainers:
{{- end }}
{{- if and .Values.sidecar.notifiers.enabled .Values.sidecar.notifiers.initNotifiers }}
- name: {{ include "grafana.name" . }}-init-sc-notifiers
{{- $registry := .Values.global.imageRegistry | default .Values.sidecar.image.registry -}}
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@ -229,12 +306,13 @@ imagePullSecrets:
enableServiceLinks: {{ .Values.enableServiceLinks }}
{{- end }}
containers:
{{- if .Values.sidecar.alerts.enabled }}
{{- if and .Values.sidecar.alerts.enabled (not .Values.sidecar.alerts.initAlerts) }}
- name: {{ include "grafana.name" . }}-sc-alerts
{{- $registry := .Values.global.imageRegistry | default .Values.sidecar.image.registry -}}
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@ -337,10 +415,11 @@ containers:
{{- end}}
{{- if .Values.sidecar.dashboards.enabled }}
- name: {{ include "grafana.name" . }}-sc-dashboard
{{- $registry := .Values.global.imageRegistry | default .Values.sidecar.image.registry -}}
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@ -445,12 +524,13 @@ containers:
{{- toYaml . | trim | nindent 6 }}
{{- end }}
{{- end}}
{{- if .Values.sidecar.datasources.enabled }}
{{- if and .Values.sidecar.datasources.enabled (not .Values.sidecar.datasources.initDatasources) }}
- name: {{ include "grafana.name" . }}-sc-datasources
{{- $registry := .Values.global.imageRegistry | default .Values.sidecar.image.registry -}}
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@ -550,10 +630,11 @@ containers:
{{- end}}
{{- if .Values.sidecar.notifiers.enabled }}
- name: {{ include "grafana.name" . }}-sc-notifiers
{{- $registry := .Values.global.imageRegistry | default .Values.sidecar.image.registry -}}
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@ -653,10 +734,11 @@ containers:
{{- end}}
{{- if .Values.sidecar.plugins.enabled }}
- name: {{ include "grafana.name" . }}-sc-plugins
{{- $registry := .Values.global.imageRegistry | default .Values.sidecar.image.registry -}}
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
image: "{{ $registry }}/{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@ -755,10 +837,11 @@ containers:
mountPath: "/etc/grafana/provisioning/plugins"
{{- end}}
- name: {{ .Chart.Name }}
{{- $registry := .Values.global.imageRegistry | default .Values.image.registry -}}
{{- if .Values.image.sha }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.image.sha }}"
image: "{{ $registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.image.sha }}"
{{- else }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
image: "{{ $registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
{{- end }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- if .Values.command }}
@ -1137,17 +1220,20 @@ volumes:
{{- toYaml .csi | nindent 6 }}
{{- end }}
{{- end }}
{{- range .Values.extraVolumeMounts }}
{{- range .Values.extraVolumes }}
- name: {{ .name }}
{{- if .existingClaim }}
persistentVolumeClaim:
claimName: {{ .existingClaim }}
{{- else if .hostPath }}
hostPath:
path: {{ .hostPath }}
{{ toYaml .hostPath | nindent 6 }}
{{- else if .csi }}
csi:
{{- toYaml .data | nindent 6 }}
{{- else if .configMap }}
configMap:
{{- toYaml .configMap | nindent 6 }}
{{- else }}
emptyDir: {}
{{- end }}
@ -1160,3 +1246,4 @@ volumes:
{{- tpl (toYaml .) $root | nindent 2 }}
{{- end }}
{{- end }}

View File

@ -1,4 +1,4 @@
{{- if and .Values.rbac.create (or (not .Values.rbac.namespaced) .Values.rbac.extraClusterRoleRules) (not .Values.rbac.useExistingRole) }}
{{- if and .Values.rbac.create (or (not .Values.rbac.namespaced) .Values.rbac.extraClusterRoleRules) (not .Values.rbac.useExistingClusterRole) }}
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:

View File

@ -15,8 +15,8 @@ subjects:
namespace: {{ include "grafana.namespace" . }}
roleRef:
kind: ClusterRole
{{- if .Values.rbac.useExistingRole }}
name: {{ .Values.rbac.useExistingRole }}
{{- if .Values.rbac.useExistingClusterRole }}
name: {{ .Values.rbac.useExistingClusterRole }}
{{- else }}
name: {{ include "grafana.fullname" . }}-clusterrole
{{- end }}

View File

@ -0,0 +1,43 @@
{{- $createConfigSecret := eq (include "grafana.shouldCreateConfigSecret" .) "true" -}}
{{- if and .Values.createConfigmap $createConfigSecret }}
{{- $files := .Files }}
{{- $root := . -}}
apiVersion: v1
kind: Secret
metadata:
name: "{{ include "grafana.fullname" . }}-config-secret"
namespace: {{ include "grafana.namespace" . }}
labels:
{{- include "grafana.labels" . | nindent 4 }}
{{- with .Values.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
data:
{{- range $key, $value := .Values.alerting }}
{{- if (hasKey $value "secretFile") }}
{{- $key | nindent 2 }}:
{{- toYaml ( $files.Get $value.secretFile ) | b64enc | nindent 4}}
{{/* as of https://helm.sh/docs/chart_template_guide/accessing_files/ this will only work if you fork this chart and add files to it*/}}
{{- end }}
{{- end }}
stringData:
{{- range $key, $value := .Values.datasources }}
{{- if (hasKey $value "secret") }}
{{- $key | nindent 2 }}: |
{{- tpl (toYaml $value | nindent 4) $root }}
{{- end }}
{{- end }}
{{- range $key, $value := .Values.notifiers }}
{{- if (hasKey $value "secret") }}
{{- $key | nindent 2 }}: |
{{- tpl (toYaml $value | nindent 4) $root }}
{{- end }}
{{- end }}
{{- range $key, $value := .Values.alerting }}
{{ if (hasKey $value "secret") }}
{{- $key | nindent 2 }}: |
{{- tpl (toYaml $value.secret | nindent 4) $root }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -44,19 +44,25 @@ data:
{{- end }}
{{- range $key, $value := .Values.datasources }}
{{- if not (hasKey $value "secret") }}
{{- $key | nindent 2 }}: |
{{- tpl (toYaml $value | nindent 4) $root }}
{{- end }}
{{- end }}
{{- range $key, $value := .Values.notifiers }}
{{- if not (hasKey $value "secret") }}
{{- $key | nindent 2 }}: |
{{- toYaml $value | nindent 4 }}
{{- end }}
{{- end }}
{{- range $key, $value := .Values.alerting }}
{{- if (hasKey $value "file") }}
{{- $key | nindent 2 }}:
{{- toYaml ( $files.Get $value.file ) | nindent 4}}
{{- else if (or (hasKey $value "secret") (hasKey $value "secretFile"))}}
{{/* will be stored inside secret generated by "configSecret.yaml"*/}}
{{- else }}
{{- $key | nindent 2 }}: |
{{- tpl (toYaml $value | nindent 4) $root }}

View File

@ -9,6 +9,9 @@ metadata:
labels:
{{- include "grafana.labels" $ | nindent 4 }}
dashboard-provider: {{ $provider }}
{{- if $.Values.sidecar.dashboards.enabled }}
{{ $.Values.sidecar.dashboards.label }}: {{ $.Values.sidecar.dashboards.labelValue | quote }}
{{- end }}
{{- if $dashboards }}
data:
{{- $dashboardFound := false }}

View File

@ -65,10 +65,11 @@ spec:
{{- end }}
containers:
- name: {{ .Chart.Name }}-image-renderer
{{- $registry := .Values.global.imageRegistry | default .Values.imageRenderer.image.registry -}}
{{- if .Values.imageRenderer.image.sha }}
image: "{{ .Values.imageRenderer.image.repository }}:{{ .Values.imageRenderer.image.tag }}@sha256:{{ .Values.imageRenderer.image.sha }}"
image: "{{ $registry }}/{{ .Values.imageRenderer.image.repository }}:{{ .Values.imageRenderer.image.tag }}@sha256:{{ .Values.imageRenderer.image.sha }}"
{{- else }}
image: "{{ .Values.imageRenderer.image.repository }}:{{ .Values.imageRenderer.image.tag }}"
image: "{{ $registry }}/{{ .Values.imageRenderer.image.repository }}:{{ .Values.imageRenderer.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.imageRenderer.image.pullPolicy }}
{{- if .Values.imageRenderer.command }}

View File

@ -27,8 +27,17 @@ spec:
{{- if .Values.networkPolicy.egress.enabled }}
egress:
{{- if not .Values.networkPolicy.egress.blockDNSResolution }}
- ports:
- port: 53
protocol: UDP
{{- end }}
- ports:
{{ .Values.networkPolicy.egress.ports | toJson }}
{{- with .Values.networkPolicy.egress.to }}
to:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- end }}
{{- if .Values.networkPolicy.ingress }}
ingress:

View File

@ -1,5 +1,5 @@
{{- if and .Values.rbac.create (not .Values.rbac.useExistingRole) -}}
apiVersion: {{ include "grafana.rbac.apiVersion" . }}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "grafana.fullname" . }}

View File

@ -1,5 +1,5 @@
{{- if .Values.rbac.create }}
apiVersion: {{ include "grafana.rbac.apiVersion" . }}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "grafana.fullname" . }}

View File

@ -36,6 +36,9 @@ spec:
externalIPs:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .Values.service.externalTrafficPolicy }}
externalTrafficPolicy: {{ . }}
{{- end }}
ports:
- name: {{ .Values.service.portName }}
port: {{ .Values.service.port }}

View File

@ -34,7 +34,7 @@ spec:
{{- end }}
containers:
- name: {{ .Release.Name }}-test
image: "{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}"
image: "{{ .Values.global.imageRegistry | default .Values.testFramework.image.registry }}/{{ .Values.testFramework.image.repository }}:{{ .Values.testFramework.image.tag }}"
imagePullPolicy: "{{ .Values.testFramework.imagePullPolicy}}"
command: ["/opt/bats/bin/bats", "-t", "/tests/run.sh"]
volumeMounts:

View File

@ -1,4 +1,7 @@
global:
# -- Overrides the Docker registry globally for all images
imageRegistry: null
# To help compatibility with other charts which use global.imagePullSecrets.
# Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style).
# Can be tempalted.
@ -16,7 +19,8 @@ global:
rbac:
create: true
## Use an existing ClusterRole/Role (depending on rbac.namespaced false/true)
# useExistingRole: name-of-some-(cluster)role
# useExistingRole: name-of-some-role
# useExistingClusterRole: name-of-some-clusterRole
pspEnabled: false
pspUseAppArmor: false
namespaced: false
@ -57,6 +61,7 @@ autoscaling:
## See `kubectl explain poddisruptionbudget.spec` for more
## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
podDisruptionBudget: {}
# apiVersion: ""
# minAvailable: 1
# maxUnavailable: 1
@ -84,7 +89,10 @@ livenessProbe:
# schedulerName: "default-scheduler"
image:
repository: docker.io/grafana/grafana
# -- The Docker registry
registry: docker.io
# -- Docker image repository
repository: grafana/grafana
# Overrides the Grafana image tag whose default is the chart appVersion
tag: ""
sha: ""
@ -100,8 +108,11 @@ image:
testFramework:
enabled: true
image: docker.io/bats/bats
tag: "v1.4.1"
image:
# -- The Docker registry
registry: docker.io
repository: bats/bats
tag: "v1.4.1"
imagePullPolicy: IfNotPresent
securityContext: {}
@ -144,7 +155,9 @@ extraLabels: {}
# priorityClassName:
downloadDashboardsImage:
repository: docker.io/curlimages/curl
# -- The Docker registry
registry: docker.io
repository: curlimages/curl
tag: 7.85.0
sha: ""
pullPolicy: IfNotPresent
@ -202,7 +215,7 @@ serviceMonitor:
path: /metrics
# namespace: monitoring (defaults to use the namespace this chart is deployed to)
labels: {}
interval: 1m
interval: 30s
scheme: http
tlsConfig: {}
scrapeTimeout: 30s
@ -214,7 +227,6 @@ extraExposePorts: []
# - name: keycloak
# port: 8080
# targetPort: 8080
# type: ClusterIP
# overrides pod.spec.hostAliases in the grafana deployment's pods
hostAliases: []
@ -360,7 +372,9 @@ initChownData:
## initChownData container image
##
image:
repository: docker.io/library/busybox
# -- The Docker registry
registry: docker.io
repository: library/busybox
tag: "1.31.1"
sha: ""
pullPolicy: IfNotPresent
@ -617,21 +631,22 @@ alerting: {}
# labels:
# team: sre_team_1
# contactpoints.yaml:
# apiVersion: 1
# contactPoints:
# - orgId: 1
# name: cp_1
# receivers:
# - uid: first_uid
# type: pagerduty
# settings:
# integrationKey: XXX
# severity: critical
# class: ping failure
# component: Grafana
# group: app-stack
# summary: |
# {{ `{{ include "default.message" . }}` }}
# secret:
# apiVersion: 1
# contactPoints:
# - orgId: 1
# name: cp_1
# receivers:
# - uid: first_uid
# type: pagerduty
# settings:
# integrationKey: XXX
# severity: critical
# class: ping failure
# component: Grafana
# group: app-stack
# summary: |
# {{ `{{ include "default.message" . }}` }}
## Configure notifiers
## ref: http://docs.grafana.org/administration/provisioning/#alert-notification-channels
@ -793,8 +808,10 @@ smtp:
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
sidecar:
image:
repository: quay.io/kiwigrid/k8s-sidecar
tag: 1.24.6
# -- The Docker registry
registry: quay.io
repository: kiwigrid/k8s-sidecar
tag: 1.25.2
sha: ""
imagePullPolicy: IfNotPresent
resources: {}
@ -853,7 +870,9 @@ sidecar:
# Absolute path to shell script to execute after a alert got reloaded
script: null
skipReload: false
# Deploy the alert sidecar as an initContainer in addition to a container.
# This is needed if skipReload is true, to load any alerts defined at startup time.
# Deploy the alert sidecar as an initContainer.
initAlerts: false
# Additional alert sidecar volume mounts
extraMounts: []
# Sets the size limit of the alert sidecar emptyDir volume
@ -1064,8 +1083,10 @@ imageRenderer:
targetMemory: ""
behavior: {}
image:
# -- The Docker registry
registry: docker.io
# image-renderer Image repository
repository: docker.io/grafana/grafana-image-renderer
repository: grafana/grafana-image-renderer
# image-renderer Image tag
tag: latest
# image-renderer Image sha (optional)
@ -1105,6 +1126,8 @@ imageRenderer:
drop: ['ALL']
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
## image-renderer pod annotation
podAnnotations: {}
# image-renderer deployment Host Aliases
hostAliases: []
# image-renderer deployment priority class
@ -1218,14 +1241,25 @@ networkPolicy:
## created allowing grafana to connect to external data sources from kubernetes cluster.
enabled: false
##
## @param networkPolicy.egress.blockDNSResolution When enabled, DNS resolution will be blocked
## for all pods in the grafana namespace.
blockDNSResolution: false
##
## @param networkPolicy.egress.ports Add individual ports to be allowed by the egress
ports: []
## Add ports to the egress by specifying - port: <port number>
## E.X.
## ports:
## - port: 80
## - port: 443
##
## - port: 80
## - port: 443
##
## @param networkPolicy.egress.to Allow egress traffic to specific destinations
to: []
## Add destinations to the egress by specifying - ipBlock: <CIDR>
## E.X.
## to:
## - namespaceSelector:
## matchExpressions:
## - {key: role, operator: In, values: [grafana]}
##
##
##

View File

@ -4,7 +4,7 @@ annotations:
- name: Chart Source
url: https://github.com/prometheus-community/helm-charts
apiVersion: v2
appVersion: 2.9.2
appVersion: 2.10.1
description: Install kube-state-metrics to generate and expose cluster-level metrics
home: https://github.com/kubernetes/kube-state-metrics/
keywords:
@ -23,4 +23,4 @@ name: kube-state-metrics
sources:
- https://github.com/kubernetes/kube-state-metrics/
type: application
version: 5.10.1
version: 5.15.2

View File

@ -3,6 +3,13 @@ apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "kube-state-metrics.fullname" . }}-customresourcestate-config
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
{{- include "kube-state-metrics.labels" . | indent 4 }}
{{- if .Values.annotations }}
annotations:
{{ toYaml .Values.annotations | nindent 4 }}
{{- end }}
data:
config.yaml: |
{{- toYaml .Values.customResourceState.config | nindent 4 }}

View File

@ -18,6 +18,11 @@ spec:
matchLabels:
{{- include "kube-state-metrics.selectorLabels" . | indent 6 }}
replicas: {{ .Values.replicas }}
{{- if not .Values.autosharding.enabled }}
strategy:
type: {{ .Values.updateStrategy | default "RollingUpdate" }}
{{- end }}
revisionHistoryLimit: {{ .Values.revisionHistoryLimit }}
{{- if .Values.autosharding.enabled }}
serviceName: {{ template "kube-state-metrics.fullname" . }}
volumeClaimTemplates: []
@ -39,6 +44,10 @@ spec:
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
{{- with .Values.initContainers }}
initContainers:
{{- toYaml . | nindent 6 }}
{{- end }}
containers:
{{- $httpPort := ternary 9090 (.Values.service.port | default 8080) .Values.kubeRBACProxy.enabled}}
{{- $telemetryPort := ternary 9091 (.Values.selfMonitor.telemetryPort | default 8081) .Values.kubeRBACProxy.enabled}}
@ -235,6 +244,9 @@ spec:
{{- end }}
{{- end }}
{{- end }}
{{- with .Values.containers }}
{{- toYaml . | nindent 6 }}
{{- end }}
{{- if or .Values.imagePullSecrets .Values.global.imagePullSecrets }}
imagePullSecrets:
{{- include "kube-state-metrics.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.imagePullSecrets) | indent 8 }}

View File

@ -4,6 +4,12 @@ kind: ConfigMap
metadata:
name: {{ template "kube-state-metrics.fullname" . }}-rbac-config
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
{{- include "kube-state-metrics.labels" . | indent 4 }}
{{- if .Values.annotations }}
annotations:
{{ toYaml .Values.annotations | nindent 4 }}
{{- end }}
data:
config-file.yaml: |+
authorization:
@ -13,4 +19,4 @@ data:
resource: services
subresource: {{ template "kube-state-metrics.fullname" . }}
name: {{ template "kube-state-metrics.fullname" . }}
{{- end }}
{{- end }}

View File

@ -7,11 +7,11 @@ metadata:
labels:
{{- include "kube-state-metrics.labels" . | indent 4 }}
{{- with .Values.prometheus.monitor.additionalLabels }}
{{- toYaml . | nindent 4 }}
{{- tpl (toYaml . | nindent 4) $ }}
{{- end }}
{{- with .Values.prometheus.monitor.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- tpl (toYaml . | nindent 4) $ }}
{{- end }}
spec:
jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }}
@ -24,6 +24,13 @@ spec:
{{- toYaml . | trim | nindent 4 }}
{{- end }}
{{- include "servicemonitor.scrapeLimits" .Values.prometheus.monitor | indent 2 }}
{{- if .Values.prometheus.monitor.namespaceSelector }}
namespaceSelector:
matchNames:
{{- with .Values.prometheus.monitor.namespaceSelector }}
{{- toYaml . | nindent 6 }}
{{- end }}
{{- end }}
selector:
matchLabels:
{{- with .Values.prometheus.monitor.selectorOverride }}
@ -42,6 +49,9 @@ spec:
{{- if .Values.prometheus.monitor.proxyUrl }}
proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}}
{{- end }}
{{- if .Values.prometheus.monitor.enableHttp2 }}
enableHttp2: {{ .Values.prometheus.monitor.enableHttp2}}
{{- end }}
{{- if .Values.prometheus.monitor.honorLabels }}
honorLabels: true
{{- end }}
@ -78,6 +88,9 @@ spec:
{{- if .Values.prometheus.monitor.proxyUrl }}
proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}}
{{- end }}
{{- if .Values.prometheus.monitor.enableHttp2 }}
enableHttp2: {{ .Values.prometheus.monitor.enableHttp2}}
{{- end }}
{{- if .Values.prometheus.monitor.honorLabels }}
honorLabels: true
{{- end }}

View File

@ -37,6 +37,13 @@ autosharding:
replicas: 1
# Change the deployment strategy when autosharding is disabled
# updateStrategy: Recreate
# Number of old history to retain to allow rollback
# Default Kubernetes value is set to 10
revisionHistoryLimit: 10
# List of additional cli arguments to configure kube-state-metrics
# for example: --enable-gzip-encoding, --log-file, etc.
# all the possible args can be found here: https://github.com/kubernetes/kube-state-metrics/blob/master/docs/cli-arguments.md
@ -142,6 +149,7 @@ prometheus:
annotations: {}
additionalLabels: {}
namespace: ""
namespaceSelector: []
jobLabel: ""
targetLabels: []
podTargetLabels: []
@ -167,6 +175,8 @@ prometheus:
labelValueLengthLimit: 0
scrapeTimeout: ""
proxyUrl: ""
## Whether to enable HTTP2 for servicemonitor
# enableHttp2: false
selectorOverride: {}
honorLabels: false
metricRelabelings: []
@ -434,3 +444,13 @@ extraManifests: []
# name: prometheus-extra
# data:
# extra-data: "value"
## Containers allows injecting additional containers.
containers: []
# - name: crd-init
# image: kiwigrid/k8s-sidecar:latest
## InitContainers allows injecting additional initContainers.
initContainers: []
# - name: crd-sidecar
# image: kiwigrid/k8s-sidecar:latest

View File

@ -4,7 +4,7 @@ annotations:
- name: Chart Source
url: https://github.com/prometheus-community/helm-charts
apiVersion: v2
appVersion: 1.6.0
appVersion: 1.7.0
description: A Helm chart for prometheus node-exporter
home: https://github.com/prometheus/node_exporter/
keywords:
@ -22,4 +22,4 @@ name: prometheus-node-exporter
sources:
- https://github.com/prometheus/node_exporter/
type: application
version: 4.21.0
version: 4.24.0

View File

@ -1,18 +1,18 @@
# Prometheus `Node Exporter`
# Prometheus Node Exporter
Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors.
This chart bootstraps a prometheus [`Node Exporter`](http://github.com/prometheus/node_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
This chart bootstraps a Prometheus [Node Exporter](http://github.com/prometheus/node_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
## Get Repository Info
<!-- textlint-disable terminology -->
```console
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
```
_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._
_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._
<!-- textlint-enable -->
## Install Chart
```console
@ -36,15 +36,11 @@ _See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command doc
## Upgrading Chart
```console
helm upgrade [RELEASE_NAME] [CHART] --install
helm upgrade [RELEASE_NAME] prometheus-community/prometheus-node-exporter --install
```
_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._
### 4.16 to 4.17+
`containerSecurityContext.readOnlyRootFilesystem` is set to `true` by default.
### 3.x to 4.x
Starting from version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade.

View File

@ -3,7 +3,6 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "prometheus-node-exporter.fullname" . }}
namespace: {{ include "prometheus-node-exporter.namespace" . }}
labels:
{{- include "prometheus-node-exporter.labels" . | nindent 4 }}
rules:

View File

@ -13,6 +13,7 @@ spec:
selector:
matchLabels:
{{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }}
revisionHistoryLimit: {{ .Values.revisionHistoryLimit }}
{{- with .Values.updateStrategy }}
updateStrategy:
{{- toYaml . | nindent 4 }}

View File

@ -1,4 +1,4 @@
{{ range .Values.extraManifests }}
---
{{ tpl (toYaml .) $ }}
{{ tpl . $ }}
{{ end }}

View File

@ -1,3 +1,4 @@
{{- if .Values.service.enabled }}
apiVersion: v1
kind: Service
metadata:
@ -25,3 +26,4 @@ spec:
name: {{ .Values.service.portName }}
selector:
{{- include "prometheus-node-exporter.selectorLabels" . | nindent 4 }}
{{- end }}

View File

@ -14,6 +14,10 @@ imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
# Number of old history to retain to allow rollback
# Default Kubernetes value is set to 10
revisionHistoryLimit: 10
global:
# To help compatibility with other charts which use global.imagePullSecrets.
# Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style).
@ -38,7 +42,7 @@ kubeRBACProxy:
image:
registry: quay.io
repository: brancz/kube-rbac-proxy
tag: v0.14.0
tag: v0.15.0
sha: ""
pullPolicy: IfNotPresent
@ -65,6 +69,7 @@ kubeRBACProxy:
# memory: 32Mi
service:
enabled: true
type: ClusterIP
port: 9100
targetPort: 9100
@ -466,7 +471,8 @@ verticalPodAutoscaler:
# Extra manifests to deploy as an array
extraManifests: []
# - apiVersion: v1
# - |
# apiVersion: v1
# kind: ConfigMap
# metadata:
# name: prometheus-extra

View File

@ -14,4 +14,4 @@ name: prometheus-windows-exporter
sources:
- https://github.com/prometheus-community/windows_exporter/
type: application
version: 0.1.1
version: 0.1.2

View File

@ -12,7 +12,7 @@ image:
config: |-
collectors:
enabled: '[defaults],container'
enabled: '[defaults],memory,container'
imagePullSecrets: []
# - name: "image-pull-secret"

View File

@ -179,4 +179,10 @@ spec:
{{- if .Values.alertmanager.alertmanagerSpec.minReadySeconds }}
minReadySeconds: {{ .Values.alertmanager.alertmanagerSpec.minReadySeconds }}
{{- end }}
{{- with .Values.alertmanager.alertmanagerSpec.additionalConfig }}
{{- tpl (toYaml .) $ | nindent 2 }}
{{- end }}
{{- with .Values.alertmanager.alertmanagerSpec.additionalConfigString }}
{{- tpl . $ | nindent 2 }}
{{- end }}
{{- end }}

View File

@ -1,3 +1,4 @@
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if .Values.alertmanager.enabled }}
apiVersion: v1
kind: Service
@ -44,6 +45,9 @@ spec:
targetPort: {{ .Values.alertmanager.service.targetPort }}
protocol: TCP
- name: reloader-web
{{- if semverCompare ">=1.20.0-0" $kubeTargetVersion }}
appProtocol: http
{{- end }}
port: 8080
targetPort: reloader-web
{{- if .Values.alertmanager.service.additionalPorts }}

View File

@ -36,17 +36,15 @@ spec:
bearerTokenFile: {{ .Values.alertmanager.serviceMonitor.bearerTokenFile }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.tlsConfig }}
tlsConfig: {{ toYaml .Values.alertmanager.serviceMonitor.tlsConfig | nindent 6 }}
tlsConfig: {{- toYaml .Values.alertmanager.serviceMonitor.tlsConfig | nindent 6 }}
{{- end }}
path: "{{ trimSuffix "/" .Values.alertmanager.alertmanagerSpec.routePrefix }}/metrics"
{{- if .Values.alertmanager.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.alertmanager.serviceMonitor.metricRelabelings | indent 6) . }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.alertmanager.serviceMonitor.relabelings | indent 6 }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.metricRelabelings }}
metricRelabelings: {{- tpl (toYaml .Values.alertmanager.serviceMonitor.metricRelabelings | nindent 6) . }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.relabelings }}
relabelings: {{- toYaml .Values.alertmanager.serviceMonitor.relabelings | nindent 6 }}
{{- end }}
- port: reloader-web
{{- if .Values.alertmanager.serviceMonitor.interval }}
interval: {{ .Values.alertmanager.serviceMonitor.interval }}
@ -54,22 +52,37 @@ spec:
{{- if .Values.alertmanager.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.alertmanager.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.scheme }}
scheme: {{ .Values.alertmanager.serviceMonitor.scheme }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.bearerTokenFile }}
bearerTokenFile: {{ .Values.alertmanager.serviceMonitor.bearerTokenFile }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.tlsConfig }}
tlsConfig: {{ toYaml .Values.alertmanager.serviceMonitor.tlsConfig | nindent 6 }}
{{- end }}
scheme: http
path: "/metrics"
{{- if .Values.alertmanager.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.alertmanager.serviceMonitor.metricRelabelings | indent 6) . }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.alertmanager.serviceMonitor.relabelings | indent 6 }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.metricRelabelings }}
metricRelabelings: {{- tpl (toYaml .Values.alertmanager.serviceMonitor.metricRelabelings | nindent 6) . }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.relabelings }}
relabelings: {{- toYaml .Values.alertmanager.serviceMonitor.relabelings | nindent 6 }}
{{- end }}
{{- range .Values.alertmanager.serviceMonitor.additionalEndpoints }}
- port: {{ .port }}
{{- if or $.Values.alertmanager.serviceMonitor.interval .interval }}
interval: {{ default $.Values.alertmanager.serviceMonitor.interval .interval }}
{{- end }}
{{- if or $.Values.alertmanager.serviceMonitor.proxyUrl .proxyUrl }}
proxyUrl: {{ default $.Values.alertmanager.serviceMonitor.proxyUrl .proxyUrl }}
{{- end }}
{{- if or $.Values.alertmanager.serviceMonitor.scheme .scheme }}
scheme: {{ default $.Values.alertmanager.serviceMonitor.scheme .scheme }}
{{- end }}
{{- if or $.Values.alertmanager.serviceMonitor.bearerTokenFile .bearerTokenFile }}
bearerTokenFile: {{ default $.Values.alertmanager.serviceMonitor.bearerTokenFile .bearerTokenFile }}
{{- end }}
{{- if or $.Values.alertmanager.serviceMonitor.tlsConfig .tlsConfig }}
tlsConfig: {{- default $.Values.alertmanager.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }}
{{- end }}
path: {{ .path }}
{{- if or $.Values.alertmanager.serviceMonitor.metricRelabelings .metricRelabelings }}
metricRelabelings: {{- tpl (default $.Values.alertmanager.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }}
{{- end }}
{{- if or $.Values.alertmanager.serviceMonitor.relabelings .relabelings }}
relabelings: {{- default $.Values.alertmanager.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -12,6 +12,10 @@ metadata:
{{- include "kube-prometheus-stack.labels" . | indent 4 }}
spec:
{{- include "servicemonitor.scrapeLimits" .Values.kubelet.serviceMonitor | nindent 2 }}
{{- with .Values.kubelet.serviceMonitor.attachMetadata }}
attachMetadata:
{{- toYaml . | nindent 4 }}
{{- end }}
endpoints:
{{- if .Values.kubelet.serviceMonitor.https }}
- port: https-metrics
@ -29,7 +33,8 @@ spec:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }}
honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }}
{{- if .Values.kubelet.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubelet.serviceMonitor.metricRelabelings | indent 4) . }}
@ -51,7 +56,8 @@ spec:
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }}
honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }}
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
@ -78,7 +84,8 @@ spec:
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }}
honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }}
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
@ -105,7 +112,8 @@ spec:
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }}
honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }}
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
@ -130,7 +138,8 @@ spec:
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }}
honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }}
{{- if .Values.kubelet.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubelet.serviceMonitor.metricRelabelings | indent 4) . }}
@ -151,7 +160,8 @@ spec:
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }}
honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }}
{{- if .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings | indent 4) . }}
@ -172,7 +182,8 @@ spec:
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }}
honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }}
{{- if .Values.kubelet.serviceMonitor.probesMetricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubelet.serviceMonitor.probesMetricRelabelings | indent 4) . }}
@ -194,7 +205,8 @@ spec:
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }}
honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }}
{{- if .Values.kubelet.serviceMonitor.resourceMetricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceMetricRelabelings | indent 4) . }}

View File

@ -55,8 +55,8 @@ data:
timeInterval: {{ $scrapeInterval }}
{{- if $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations }}
exemplarTraceIdDestinations:
- datasourceUid: {{ .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.datasourceUid }}
name: {{ .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.traceIdLabelName }}
- datasourceUid: {{ $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.datasourceUid }}
name: {{ $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.traceIdLabelName }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -6,7 +6,7 @@ metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create
namespace: {{ template "kube-prometheus-stack.namespace" . }}
annotations:
helm.sh/hook: post-install,post-upgrade
helm.sh/hook: pre-install,pre-upgrade
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
## Ensure this is run before the job
helm.sh/hook-weight: "-5"

View File

@ -32,8 +32,10 @@ spec:
- port: "8080"
{{- end }}
protocol: "TCP"
{{- if not .Values.prometheusOperator.tls.enabled }}
rules:
http:
- method: "GET"
path: "/metrics"
{{- end }}
{{- end }}

View File

@ -84,6 +84,12 @@ rules:
- get
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- get
{{- if .Capabilities.APIVersions.Has "discovery.k8s.io/v1/EndpointSlice" }}
- apiGroups:
- discovery.k8s.io

View File

@ -91,10 +91,10 @@ spec:
{{- else }}
- --prometheus-config-reloader={{ $configReloaderRegistry }}/{{ .Values.prometheusOperator.prometheusConfigReloader.image.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloader.image.tag | default .Chart.AppVersion }}
{{- end }}
- --config-reloader-cpu-request={{ .Values.prometheusOperator.prometheusConfigReloader.resources.requests.cpu }}
- --config-reloader-cpu-limit={{ .Values.prometheusOperator.prometheusConfigReloader.resources.limits.cpu }}
- --config-reloader-memory-request={{ .Values.prometheusOperator.prometheusConfigReloader.resources.requests.memory }}
- --config-reloader-memory-limit={{ .Values.prometheusOperator.prometheusConfigReloader.resources.limits.memory }}
- --config-reloader-cpu-request={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).requests).cpu) | default 0 }}
- --config-reloader-cpu-limit={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).limits).cpu) | default 0 }}
- --config-reloader-memory-request={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).requests).memory) | default 0 }}
- --config-reloader-memory-limit={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).limits).memory) | default 0 }}
{{- if .Values.prometheusOperator.prometheusConfigReloader.enableProbe }}
- --enable-config-reloader-probes=true
{{- end }}

View File

@ -8,7 +8,13 @@ rules:
- "config-reloaders"
- "etcd"
- "general.rules"
- "k8s.rules"
- "k8s.rules.container_cpu_usage_seconds_total"
- "k8s.rules.container_memory_cache"
- "k8s.rules.container_memory_rss"
- "k8s.rules.container_memory_swap"
- "k8s.rules.container_memory_working_set_bytes"
- "k8s.rules.container_resource"
- "k8s.rules.pod_owner"
- "kube-apiserver-availability.rules"
- "kube-apiserver-burnrate.rules"
- "kube-apiserver-histogram.rules"
@ -33,4 +39,6 @@ rules:
- "node-network"
- "prometheus-operator"
- "prometheus"
- "windows.node.rules"
- "windows.pod.rules"
{{- end }}

View File

@ -1,6 +1,6 @@
{{- if and .Values.prometheus.enabled .Values.prometheus.thanosIngress.enabled }}
{{- $pathType := .Values.prometheus.thanosIngress.pathType | default "" }}
{{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
{{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "thanos-discovery" }}
{{- $thanosPort := .Values.prometheus.thanosIngress.servicePort -}}
{{- $routePrefix := list .Values.prometheus.prometheusSpec.routePrefix }}
{{- $paths := .Values.prometheus.thanosIngress.paths | default $routePrefix -}}

View File

@ -356,7 +356,18 @@ spec:
{{- if not .Values.prometheus.agentMode }}
{{- if .Values.prometheus.prometheusSpec.thanos }}
thanos:
{{ toYaml .Values.prometheus.prometheusSpec.thanos | indent 4 }}
{{- with (omit .Values.prometheus.prometheusSpec.thanos "objectStorageConfig")}}
{{ toYaml . | indent 4 }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret }}
objectStorageConfig:
key: "{{.Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret.key }}"
name: "{{.Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret.name }}"
{{- else if .Values.prometheus.prometheusSpec.thanos.objectStorageConfig.secret}}
objectStorageConfig:
key: object-storage-configs.yaml
name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus
{{- end }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.disableCompaction }}
disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
@ -441,4 +452,10 @@ spec:
tracingConfig:
{{ toYaml .Values.prometheus.prometheusSpec.tracingConfig | indent 4 }}
{{- end }}
{{- with .Values.prometheus.prometheusSpec.additionalConfig }}
{{- tpl (toYaml .) $ | nindent 2 }}
{{- end }}
{{- with .Values.prometheus.prometheusSpec.additionalConfigString }}
{{- tpl . $ | nindent 2 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,17 @@
{{- if .Values.prometheus.enabled }}
apiVersion: v1
kind: Secret
metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}-prometheus
app.kubernetes.io/component: prometheus
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
data:
{{- with .Values.prometheus.prometheusSpec.thanos.objectStorageConfig }}
{{- if and .secret (not .existingSecret) }}
object-storage-configs.yaml: {{ toYaml .secret | b64enc | quote }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -1,3 +1,4 @@
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if .Values.prometheus.enabled }}
apiVersion: v1
kind: Service
@ -43,6 +44,9 @@ spec:
port: {{ .Values.prometheus.service.port }}
targetPort: {{ .Values.prometheus.service.targetPort }}
- name: reloader-web
{{- if semverCompare "> 1.20.0-0" $kubeTargetVersion }}
appProtocol: http
{{- end }}
port: 8080
targetPort: reloader-web
{{- if .Values.prometheus.thanosIngress.enabled }}

View File

@ -29,40 +29,53 @@ spec:
scheme: {{ .Values.prometheus.serviceMonitor.scheme }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.tlsConfig }}
tlsConfig: {{ toYaml .Values.prometheus.serviceMonitor.tlsConfig | nindent 6 }}
tlsConfig: {{- toYaml .Values.prometheus.serviceMonitor.tlsConfig | nindent 6 }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.bearerTokenFile }}
bearerTokenFile: {{ .Values.prometheus.serviceMonitor.bearerTokenFile }}
{{- end }}
path: "{{ trimSuffix "/" .Values.prometheus.prometheusSpec.routePrefix }}/metrics"
{{- if .Values.prometheus.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | indent 6) . }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.prometheus.serviceMonitor.relabelings | indent 6 }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.metricRelabelings }}
metricRelabelings: {{- tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | nindent 6) . }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.relabelings }}
relabelings: {{- toYaml .Values.prometheus.serviceMonitor.relabelings | nindent 6 }}
{{- end }}
- port: reloader-web
{{- if .Values.prometheus.serviceMonitor.interval }}
interval: {{ .Values.prometheus.serviceMonitor.interval }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.scheme }}
scheme: {{ .Values.prometheus.serviceMonitor.scheme }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.tlsConfig }}
tlsConfig: {{ toYaml .Values.prometheus.serviceMonitor.tlsConfig | nindent 6 }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.bearerTokenFile }}
bearerTokenFile: {{ .Values.prometheus.serviceMonitor.bearerTokenFile }}
{{- end }}
scheme: http
path: "/metrics"
{{- if .Values.prometheus.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | indent 6) . }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.prometheus.serviceMonitor.relabelings | indent 6 }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.metricRelabelings }}
metricRelabelings: {{- tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | nindent 6) . }}
{{- end }}
{{- if .Values.prometheus.serviceMonitor.relabelings }}
relabelings: {{- toYaml .Values.prometheus.serviceMonitor.relabelings | nindent 6 }}
{{- end }}
{{- range .Values.prometheus.serviceMonitor.additionalEndpoints }}
- port: {{ .port }}
{{- if or $.Values.prometheus.serviceMonitor.interval .interval }}
interval: {{ default $.Values.prometheus.serviceMonitor.interval .interval }}
{{- end }}
{{- if or $.Values.prometheus.serviceMonitor.proxyUrl .proxyUrl }}
proxyUrl: {{ default $.Values.prometheus.serviceMonitor.proxyUrl .proxyUrl }}
{{- end }}
{{- if or $.Values.prometheus.serviceMonitor.scheme .scheme }}
scheme: {{ default $.Values.prometheus.serviceMonitor.scheme .scheme }}
{{- end }}
{{- if or $.Values.prometheus.serviceMonitor.bearerTokenFile .bearerTokenFile }}
bearerTokenFile: {{ default $.Values.prometheus.serviceMonitor.bearerTokenFile .bearerTokenFile }}
{{- end }}
{{- if or $.Values.prometheus.serviceMonitor.tlsConfig .tlsConfig }}
tlsConfig: {{- default $.Values.prometheus.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }}
{{- end }}
path: {{ .path }}
{{- if or $.Values.prometheus.serviceMonitor.metricRelabelings .metricRelabelings }}
metricRelabelings: {{- tpl (default $.Values.prometheus.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }}
{{- end }}
{{- if or $.Values.prometheus.serviceMonitor.relabelings .relabelings }}
relabelings: {{- default $.Values.prometheus.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -35,5 +35,13 @@ items:
podTargetLabels:
{{ toYaml .podTargetLabels | indent 8 }}
{{- end }}
{{- if .metricRelabelings }}
metricRelabelings:
{{ toYaml .metricRelabelings | indent 8 }}
{{- end }}
{{- if .relabelings }}
relabelings:
{{ toYaml .relabelings | indent 8 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -71,17 +71,27 @@ spec:
alertmanagersUrl:
{{ toYaml .Values.thanosRuler.thanosRulerSpec.alertmanagersUrl | indent 4 }}
{{- end }}
{{- if .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig }}
{{- if .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret }}
alertmanagersConfig:
{{ toYaml .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig | indent 4 }}
key: "{{.Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret.key }}"
name: "{{.Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret.name }}"
{{- else if .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.secret }}
alertmanagersConfig:
key: alertmanager-configs.yaml
name: {{ template "kube-prometheus-stack.thanosRuler.name" . }}
{{- end }}
{{- if .Values.thanosRuler.thanosRulerSpec.queryEndpoints }}
queryEndpoints:
{{ toYaml .Values.thanosRuler.thanosRulerSpec.queryEndpoints | indent 4 }}
{{- end }}
{{- if .Values.thanosRuler.thanosRulerSpec.queryConfig }}
{{- if .Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret }}
queryConfig:
{{ toYaml .Values.thanosRuler.thanosRulerSpec.queryConfig | indent 4 }}
key: "{{.Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret.key }}"
name: "{{.Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret.name }}"
{{- else if .Values.thanosRuler.thanosRulerSpec.queryConfig.secret }}
queryConfig:
key: query-configs.yaml
name: {{ template "kube-prometheus-stack.thanosRuler.name" . }}
{{- end }}
{{- if .Values.thanosRuler.thanosRulerSpec.resources }}
resources:
@ -98,17 +108,19 @@ spec:
storage:
{{ toYaml .Values.thanosRuler.thanosRulerSpec.storage | indent 4 }}
{{- end }}
{{- if .Values.thanosRuler.thanosRulerSpec.objectStorageConfig }}
{{- if .Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret }}
objectStorageConfig:
{{ toYaml .Values.thanosRuler.thanosRulerSpec.objectStorageConfig | indent 4 }}
key: "{{.Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret.key }}"
name: "{{.Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret.name }}"
{{- else if .Values.thanosRuler.thanosRulerSpec.objectStorageConfig.secret }}
objectStorageConfig:
key: object-storage-configs.yaml
name: {{ template "kube-prometheus-stack.thanosRuler.name" . }}
{{- end }}
{{- if .Values.thanosRuler.thanosRulerSpec.labels }}
labels:
{{ toYaml .Values.thanosRuler.thanosRulerSpec.labels | indent 4 }}
{{- end }}
{{- if .Values.thanosRuler.thanosRulerSpec.objectStorageConfigFile }}
objectStorageConfigFile: {{ .Values.thanosRuler.thanosRulerSpec.objectStorageConfigFile }}
{{- end }}
{{- if .Values.thanosRuler.thanosRulerSpec.podMetadata }}
podMetadata:
{{ toYaml .Values.thanosRuler.thanosRulerSpec.podMetadata | indent 4 }}

View File

@ -0,0 +1,26 @@
{{- if .Values.thanosRuler.enabled }}
apiVersion: v1
kind: Secret
metadata:
name: {{ template "kube-prometheus-stack.thanosRuler.name" . }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ include "kube-prometheus-stack.thanosRuler.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
data:
{{- with .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig }}
{{- if and .secret (not .existingSecret) }}
alertmanager-configs.yaml: {{ toYaml .secret | b64enc | quote }}
{{- end }}
{{- end }}
{{- with .Values.thanosRuler.thanosRulerSpec.objectStorageConfig }}
{{- if and .secret (not .existingSecret) }}
object-storage-configs.yaml: {{ toYaml .secret | b64enc | quote }}
{{- end }}
{{- end }}
{{- with .Values.thanosRuler.thanosRulerSpec.queryConfig }}
{{- if and .secret (not .existingSecret) }}
query-configs.yaml: {{ toYaml .secret | b64enc | quote }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -35,15 +35,38 @@ spec:
bearerTokenFile: {{ .Values.thanosRuler.serviceMonitor.bearerTokenFile }}
{{- end }}
{{- if .Values.thanosRuler.serviceMonitor.tlsConfig }}
tlsConfig: {{ toYaml .Values.thanosRuler.serviceMonitor.tlsConfig | nindent 6 }}
tlsConfig: {{- toYaml .Values.thanosRuler.serviceMonitor.tlsConfig | nindent 6 }}
{{- end }}
path: "{{ trimSuffix "/" .Values.thanosRuler.thanosRulerSpec.routePrefix }}/metrics"
{{- if .Values.thanosRuler.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.thanosRuler.serviceMonitor.metricRelabelings | indent 6) . }}
{{- end }}
{{- if .Values.thanosRuler.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.thanosRuler.serviceMonitor.relabelings | indent 6 }}
{{- end }}
{{- if .Values.thanosRuler.serviceMonitor.metricRelabelings }}
metricRelabelings: {{- tpl (toYaml .Values.thanosRuler.serviceMonitor.metricRelabelings | nindent 6) . }}
{{- end }}
{{- if .Values.thanosRuler.serviceMonitor.relabelings }}
relabelings: {{- toYaml .Values.thanosRuler.serviceMonitor.relabelings | nindent 6 }}
{{- end }}
{{- range .Values.thanosRuler.serviceMonitor.additionalEndpoints }}
- port: {{ .port }}
{{- if or $.Values.thanosRuler.serviceMonitor.interval .interval }}
interval: {{ default $.Values.thanosRuler.serviceMonitor.interval .interval }}
{{- end }}
{{- if or $.Values.thanosRuler.serviceMonitor.proxyUrl .proxyUrl }}
proxyUrl: {{ default $.Values.thanosRuler.serviceMonitor.proxyUrl .proxyUrl }}
{{- end }}
{{- if or $.Values.thanosRuler.serviceMonitor.scheme .scheme }}
scheme: {{ default $.Values.thanosRuler.serviceMonitor.scheme .scheme }}
{{- end }}
{{- if or $.Values.thanosRuler.serviceMonitor.bearerTokenFile .bearerTokenFile }}
bearerTokenFile: {{ default $.Values.thanosRuler.serviceMonitor.bearerTokenFile .bearerTokenFile }}
{{- end }}
{{- if or $.Values.thanosRuler.serviceMonitor.tlsConfig .tlsConfig }}
tlsConfig: {{- default $.Values.thanosRuler.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }}
{{- end }}
path: {{ .path }}
{{- if or $.Values.thanosRuler.serviceMonitor.metricRelabelings .metricRelabelings }}
metricRelabelings: {{- tpl (default $.Values.thanosRuler.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }}
{{- end }}
{{- if or $.Values.thanosRuler.serviceMonitor.relabelings .relabelings }}
relabelings: {{- default $.Values.thanosRuler.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -10,7 +10,7 @@ nameOverride: ""
##
namespaceOverride: ""
## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.16.6
## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.26.6
##
kubeTargetVersionOverride: ""
@ -42,7 +42,12 @@ defaultRules:
etcd: true
configReloaders: true
general: true
k8s: true
k8sContainerCpuUsageSecondsTotal: true
k8sContainerMemoryCache: true
k8sContainerMemoryRss: true
k8sContainerMemorySwap: true
k8sContainerResource: true
k8sPodOwner: true
kubeApiserverAvailability: true
kubeApiserverBurnrate: true
kubeApiserverHistogram: true
@ -70,6 +75,9 @@ defaultRules:
## Reduce app namespace alert scope
appNamespacesTarget: ".*"
## Set keep_firing_for for all alerts
keepFiringFor: ""
## Labels for default rules
labels: {}
## Annotations for default rules
@ -87,7 +95,12 @@ defaultRules:
etcd: {}
configReloaders: {}
general: {}
k8s: {}
k8sContainerCpuUsageSecondsTotal: {}
k8sContainerMemoryCache: {}
k8sContainerMemoryRss: {}
k8sContainerMemorySwap: {}
k8sContainerResource: {}
k8sPodOwner: {}
kubeApiserverAvailability: {}
kubeApiserverBurnrate: {}
kubeApiserverHistogram: {}
@ -117,7 +130,12 @@ defaultRules:
etcd: {}
configReloaders: {}
general: {}
k8s: {}
k8sContainerCpuUsageSecondsTotal: {}
k8sContainerMemoryCache: {}
k8sContainerMemoryRss: {}
k8sContainerMemorySwap: {}
k8sContainerResource: {}
k8sPodOwner: {}
kubeApiserverAvailability: {}
kubeApiserverBurnrate: {}
kubeApiserverHistogram: {}
@ -141,6 +159,8 @@ defaultRules:
prometheus: {}
prometheusOperator: {}
additionalAggregationLabels: []
## Prefix for runbook URLs. Use this to override the first part of the runbookURLs that is common to all rules.
runbookUrl: "https://runbooks.prometheus-operator.dev/runbooks"
@ -201,10 +221,30 @@ global:
# - "image-pull-secret"
windowsMonitoring:
## Deploys the windows-exporter and Windows-specific dashboards and rules
## Deploys the windows-exporter and Windows-specific dashboards and rules (job name must be 'windows-exporter')
enabled: false
## Job must match jobLabel in the PodMonitor/ServiceMonitor and is used for the rules
job: prometheus-windows-exporter
## Configuration for prometheus-windows-exporter
## ref: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-windows-exporter
##
prometheus-windows-exporter:
## Enable ServiceMonitor and set Kubernetes label to use as a job label
##
prometheus:
monitor:
enabled: true
jobLabel: jobLabel
## Set job label to 'windows-exporter' as required by the default Prometheus rules and Grafana dashboards
##
podLabels:
jobLabel: windows-exporter
## Enable memory and container metrics as required by the default Prometheus rules and Grafana dashboards
##
config: |-
collectors:
enabled: '[defaults],memory,container'
## Configuration for alertmanager
## ref: https://prometheus.io/docs/alerting/alertmanager/
@ -440,11 +480,14 @@ alertmanager:
##
## Additional ports to open for Alertmanager service
##
additionalPorts: []
# additionalPorts:
# - name: authenticated
# - name: oauth-proxy
# port: 8081
# targetPort: 8081
# - name: oauth-metrics
# port: 8082
# targetPort: 8082
externalIPs: []
loadBalancerIP: ""
@ -561,6 +604,12 @@ alertmanager:
# replacement: $1
# action: replace
## Additional Endpoints
##
additionalEndpoints: []
# - port: oauth-metrics
# path: /metrics
## Settings affecting alertmanagerSpec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#alertmanagerspec
##
@ -575,7 +624,7 @@ alertmanager:
image:
registry: quay.io
repository: prometheus/alertmanager
tag: v0.25.0
tag: v0.26.0
sha: ""
## If true then the user will be responsible to provide a secret with alertmanager configuration
@ -783,15 +832,19 @@ alertmanager:
containers: []
# containers:
# - name: oauth-proxy
# image: quay.io/oauth2-proxy/oauth2-proxy:v7.3.0
# image: quay.io/oauth2-proxy/oauth2-proxy:v7.5.1
# args:
# - --upstream=http://127.0.0.1:9093
# - --http-address=0.0.0.0:8081
# - --metrics-address=0.0.0.0:8082
# - ...
# ports:
# - containerPort: 8081
# name: oauth-proxy
# protocol: TCP
# - containerPort: 8082
# name: oauth-metrics
# protocol: TCP
# resources: {}
# Additional volumes on the output StatefulSet definition.
@ -840,6 +893,14 @@ alertmanager:
## be considered available. Defaults to 0 (pod will be considered available as soon as it is ready).
minReadySeconds: 0
## Additional configuration which is not covered by the properties above. (passed through tpl)
additionalConfig: {}
## Additional configuration which is not covered by the properties above.
## Useful, if you need advanced templating inside alertmanagerSpec.
## Otherwise, use alertmanager.alertmanagerSpec.additionalConfig (passed through tpl)
additionalConfigString: ""
## ExtraSecret can be used to store various data in an extra secret
## (use it for example to store hashed basic auth credentials)
extraSecret:
@ -1130,10 +1191,23 @@ kubelet:
namespace: kube-system
serviceMonitor:
## Attach metadata to discovered targets. Requires Prometheus v2.45 for endpoints created by the operator.
##
attachMetadata:
node: false
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## If true, Prometheus use (respect) labels provided by exporter.
##
honorLabels: true
## If true, Prometheus ingests metrics with timestamp provided by exporter. If false, Prometheus ingests metrics with timestamp of scrape.
##
honorTimestamps: true
## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
##
sampleLimit: 0
@ -1881,6 +1955,11 @@ kube-state-metrics:
##
nodeExporter:
enabled: true
operatingSystems:
linux:
enabled: true
darwin:
enabled: true
## Configuration for prometheus-node-exporter subchart
##
@ -2099,7 +2178,7 @@ prometheusOperator:
## match labels used in selector
# matchLabels: {}
## Service account for Alertmanager to use.
## Service account for Prometheus Operator to use.
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
##
serviceAccount:
@ -2120,7 +2199,7 @@ prometheusOperator:
nodePortTls: 30443
## Additional ports to open for Prometheus service
## Additional ports to open for Prometheus operator service
## ref: https://kubernetes.io/docs/concepts/services-networking/service/#multi-port-services
##
additionalPorts: []
@ -2372,20 +2451,20 @@ prometheusOperator:
enableProbe: false
# resource config for prometheusConfigReloader
resources:
requests:
cpu: 200m
memory: 50Mi
limits:
cpu: 200m
memory: 50Mi
resources: {}
# requests:
# cpu: 200m
# memory: 50Mi
# limits:
# cpu: 200m
# memory: 50Mi
## Thanos side-car image when configured
##
thanosImage:
registry: quay.io
repository: thanos/thanos
tag: v0.31.0
tag: v0.32.5
sha: ""
## Set a Label Selector to filter watched prometheus and prometheusAgent
@ -2580,12 +2659,16 @@ prometheus:
##
type: ClusterIP
## Additional port to define in the Service
## Additional ports to open for Prometheus service
##
additionalPorts: []
# additionalPorts:
# - name: authenticated
# - name: oauth-proxy
# port: 8081
# targetPort: 8081
# - name: oauth-metrics
# port: 8082
# targetPort: 8082
## Consider that all endpoints are considered "ready" even if the Pods themselves are not
## Ref: https://kubernetes.io/docs/reference/kubernetes-api/service-resources/service-v1/#ServiceSpec
@ -2828,6 +2911,12 @@ prometheus:
# replacement: $1
# action: replace
## Additional Endpoints
##
additionalEndpoints: []
# - port: oauth-metrics
# path: /metrics
## Settings affecting prometheusSpec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#prometheusspec
##
@ -2894,7 +2983,7 @@ prometheus:
image:
registry: quay.io
repository: prometheus/prometheus
tag: v2.45.0
tag: v2.48.0
sha: ""
## Tolerations for use with node taints
@ -3374,22 +3463,42 @@ prometheus:
# secrets: |
# - resourceName: "projects/$PROJECT_ID/secrets/testsecret/versions/latest"
# fileName: "objstore.yaml"
# objectStorageConfigFile: /var/secrets/object-store.yaml
## ObjectStorageConfig configures object storage in Thanos.
# objectStorageConfig:
# # use existing secret, if configured, objectStorageConfig.secret will not be used
# existingSecret: {}
# # name: ""
# # key: ""
# # will render objectStorageConfig secret data and configure it to be used by Thanos custom resource,
# # ignored when prometheusspec.thanos.objectStorageConfig.existingSecret is set
# # https://thanos.io/tip/thanos/storage.md/#s3
# secret: {}
# # type: S3
# # config:
# # bucket: ""
# # endpoint: ""
# # region: ""
# # access_key: ""
# # secret_key: ""
## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod.
## if using proxy extraContainer update targetPort with proxy container port
containers: []
# containers:
# - name: oauth-proxy
# image: quay.io/oauth2-proxy/oauth2-proxy:v7.3.0
# image: quay.io/oauth2-proxy/oauth2-proxy:v7.5.1
# args:
# - --upstream=http://127.0.0.1:9093
# - --upstream=http://127.0.0.1:9090
# - --http-address=0.0.0.0:8081
# - --metrics-address=0.0.0.0:8082
# - ...
# ports:
# - containerPort: 8081
# name: oauth-proxy
# protocol: TCP
# - containerPort: 8082
# name: oauth-metrics
# protocol: TCP
# resources: {}
## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes
@ -3490,6 +3599,14 @@ prometheus:
## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#prometheustracingconfig
tracingConfig: {}
## Additional configuration which is not covered by the properties above. (passed through tpl)
additionalConfig: {}
## Additional configuration which is not covered by the properties above.
## Useful, if you need advanced templating inside alertmanagerSpec.
## Otherwise, use prometheus.prometheusSpec.additionalConfig (passed through tpl)
additionalConfigString: ""
additionalRulesForClusterRole: []
# - apiGroups: [ "" ]
# resources:
@ -3585,6 +3702,25 @@ prometheus:
##
# serverName: ""
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
# metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
# relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
# separator: ;
# regex: ^(.*)$
# targetLabel: nodename
# replacement: $1
# action: replace
additionalPodMonitors: []
## Name of the PodMonitor to create
##
@ -3792,6 +3928,12 @@ thanosRuler:
# replacement: $1
# action: replace
## Additional Endpoints
##
additionalEndpoints: []
# - port: oauth-metrics
# path: /metrics
## Settings affecting thanosRulerpec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#thanosrulerspec
##
@ -3806,7 +3948,7 @@ thanosRuler:
image:
registry: quay.io
repository: thanos/thanos
tag: v0.31.0
tag: v0.32.5
sha: ""
## Namespaces to be selected for PrometheusRules discovery.
@ -3876,16 +4018,24 @@ thanosRuler:
## AlertmanagerConfig define configuration for connecting to alertmanager.
## Only available with Thanos v0.10.0 and higher. Maps to the alertmanagers.config Thanos Ruler arg.
alertmanagersConfig: {}
# - api_version: v2
# http_config:
# basic_auth:
# username: some_user
# password: some_pass
# static_configs:
# - alertmanager.thanos.io
# scheme: http
# timeout: 10s
alertmanagersConfig:
# use existing secret, if configured, alertmanagersConfig.secret will not be used
existingSecret: {}
# name: ""
# key: ""
# will render render alertmanagersConfig secret data and configure it to be used by Thanos Ruler custom resource, ignored when alertmanagersConfig.existingSecret is set
# https://thanos.io/tip/components/rule.md/#alertmanager
secret: {}
# alertmanagers:
# - api_version: v2
# http_config:
# basic_auth:
# username: some_user
# password: some_pass
# static_configs:
# - alertmanager.thanos.io
# scheme: http
# timeout: 10s
## DEPRECATED. Define URLs to send alerts to Alertmanager. For Thanos v0.10.0 and higher, alertmanagersConfig should be used instead.
## Note: this field will be ignored if alertmanagersConfig is specified. Maps to the alertmanagers.url Thanos Ruler arg.
@ -3900,13 +4050,22 @@ thanosRuler:
##
routePrefix: /
## ObjectStorageConfig configures object storage in Thanos. Alternative to
## ObjectStorageConfigFile, and lower order priority.
objectStorageConfig: {}
## ObjectStorageConfigFile specifies the path of the object storage configuration file.
## When used alongside with ObjectStorageConfig, ObjectStorageConfigFile takes precedence.
objectStorageConfigFile: ""
## ObjectStorageConfig configures object storage in Thanos
objectStorageConfig:
# use existing secret, if configured, objectStorageConfig.secret will not be used
existingSecret: {}
# name: ""
# key: ""
# will render objectStorageConfig secret data and configure it to be used by Thanos Ruler custom resource, ignored when objectStorageConfig.existingSecret is set
# https://thanos.io/tip/thanos/storage.md/#s3
secret: {}
# type: S3
# config:
# bucket: ""
# endpoint: ""
# region: ""
# access_key: ""
# secret_key: ""
## QueryEndpoints defines Thanos querier endpoints from which to query metrics.
## Maps to the --query flag of thanos ruler.
@ -3914,7 +4073,22 @@ thanosRuler:
## Define configuration for connecting to thanos query instances. If this is defined, the queryEndpoints field will be ignored.
## Maps to the query.config CLI argument. Only available with thanos v0.11.0 and higher.
queryConfig: {}
queryConfig:
# use existing secret, if configured, queryConfig.secret will not be used
existingSecret: {}
# name: ""
# key: ""
# render queryConfig secret data and configure it to be used by Thanos Ruler custom resource, ignored when queryConfig.existingSecret is set
# https://thanos.io/tip/components/rule.md/#query-api
secret: {}
# - http_config:
# basic_auth:
# username: some_user
# password: some_pass
# static_configs:
# - URL
# scheme: http
# timeout: 10s
## Labels configure the external label pairs to ThanosRuler. A default replica
## label `thanos_ruler_replica` will be always added as a label with the value

View File

@ -528,7 +528,7 @@
"steppedLine": false,
"targets": [
{
"expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
"expr": "cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ resource }}",
@ -893,7 +893,7 @@
"steppedLine": false,
"targets": [
{
"expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
"expr": "cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ resource }}",
@ -1473,6 +1473,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -1615,6 +1615,7 @@
},
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -943,6 +943,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -52,7 +52,6 @@
"expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -131,7 +130,6 @@
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -210,7 +208,6 @@
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -289,7 +286,6 @@
"expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -368,7 +364,6 @@
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -447,7 +442,6 @@
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -536,7 +530,6 @@
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -766,7 +759,6 @@
"expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -774,7 +766,6 @@
"expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -782,7 +773,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -790,7 +780,6 @@
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -798,7 +787,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -806,7 +794,6 @@
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
},
@ -814,7 +801,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "G"
}
@ -905,7 +891,6 @@
{
"expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -1135,7 +1120,6 @@
"expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -1143,7 +1127,6 @@
"expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -1151,7 +1134,6 @@
"expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -1159,7 +1141,6 @@
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -1167,7 +1148,6 @@
"expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -1175,7 +1155,6 @@
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
},
@ -1183,7 +1162,6 @@
"expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "G"
}
@ -1399,7 +1377,6 @@
"expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -1407,7 +1384,6 @@
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -1415,7 +1391,6 @@
"expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -1423,7 +1398,6 @@
"expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -1431,7 +1405,6 @@
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -1439,7 +1412,6 @@
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}
@ -1530,7 +1502,6 @@
{
"expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -1608,7 +1579,6 @@
{
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -1698,7 +1668,6 @@
{
"expr": "avg(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -1776,7 +1745,6 @@
{
"expr": "avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -1866,7 +1834,6 @@
{
"expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -1944,7 +1911,6 @@
{
"expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -2034,7 +2000,6 @@
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -2112,7 +2077,6 @@
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -2203,7 +2167,6 @@
{
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -2281,7 +2244,6 @@
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
"legendLink": null
}
@ -2383,7 +2345,7 @@
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -2391,14 +2353,14 @@
"pattern": "Value #A",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "IOPS(Writes)",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -2406,14 +2368,14 @@
"pattern": "Value #B",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "IOPS(Reads + Writes)",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -2421,7 +2383,7 @@
"pattern": "Value #C",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "Throughput(Read)",
@ -2500,7 +2462,6 @@
"expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -2508,7 +2469,6 @@
"expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -2516,7 +2476,6 @@
"expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -2524,7 +2483,6 @@
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -2532,7 +2490,6 @@
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -2540,7 +2497,6 @@
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}

View File

@ -52,7 +52,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -131,7 +130,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -210,7 +208,6 @@
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -289,7 +286,6 @@
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
"refId": "A"
}
],
@ -401,21 +397,18 @@
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - requests",
"legendLink": null
},
{
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - limits",
"legendLink": null
}
@ -615,7 +608,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -623,7 +615,6 @@
"expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -631,7 +622,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -639,7 +629,6 @@
"expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -647,7 +636,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
}
@ -761,21 +749,18 @@
{
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
},
{
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - requests",
"legendLink": null
},
{
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - limits",
"legendLink": null
}
@ -1020,7 +1005,6 @@
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -1028,7 +1012,6 @@
"expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -1036,7 +1019,6 @@
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -1044,7 +1026,6 @@
"expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -1052,7 +1033,6 @@
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -1060,7 +1040,6 @@
"expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
},
@ -1068,7 +1047,6 @@
"expr": "sum(container_memory_cache{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "G"
},
@ -1076,7 +1054,6 @@
"expr": "sum(container_memory_swap{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "H"
}
@ -1292,7 +1269,6 @@
"expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -1300,7 +1276,6 @@
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -1308,7 +1283,6 @@
"expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -1316,7 +1290,6 @@
"expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -1324,7 +1297,6 @@
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -1332,7 +1304,6 @@
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}
@ -1423,7 +1394,6 @@
{
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1501,7 +1471,6 @@
{
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1591,7 +1560,6 @@
{
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1669,7 +1637,6 @@
{
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1759,7 +1726,6 @@
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1837,7 +1803,6 @@
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1928,7 +1893,6 @@
{
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -2006,7 +1970,6 @@
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -2108,7 +2071,7 @@
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -2116,14 +2079,14 @@
"pattern": "Value #A",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "IOPS(Writes)",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -2131,14 +2094,14 @@
"pattern": "Value #B",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "IOPS(Reads + Writes)",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -2146,7 +2109,7 @@
"pattern": "Value #C",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "Throughput(Read)",
@ -2225,7 +2188,6 @@
"expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -2233,7 +2195,6 @@
"expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -2241,7 +2202,6 @@
"expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -2249,7 +2209,6 @@
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -2257,7 +2216,6 @@
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -2265,7 +2223,6 @@
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}

View File

@ -62,14 +62,12 @@
{
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "max capacity",
"legendLink": null
},
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -269,7 +267,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -277,7 +274,6 @@
"expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -285,7 +281,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -293,7 +288,6 @@
"expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -301,7 +295,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
}
@ -404,14 +397,12 @@
{
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "max capacity",
"legendLink": null
},
{
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -656,7 +647,6 @@
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -664,7 +654,6 @@
"expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -672,7 +661,6 @@
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -680,7 +668,6 @@
"expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -688,7 +675,6 @@
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -696,7 +682,6 @@
"expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
},
@ -704,7 +689,6 @@
"expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "G"
},
@ -712,7 +696,6 @@
"expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "H"
}

View File

@ -69,21 +69,18 @@
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
"legendLink": null
},
{
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "requests",
"legendLink": null
},
{
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limits",
"legendLink": null
}
@ -173,7 +170,6 @@
{
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
"legendLink": null
}
@ -382,7 +378,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -390,7 +385,6 @@
"expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -398,7 +392,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -406,7 +399,6 @@
"expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -414,7 +406,6 @@
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
}
@ -526,21 +517,18 @@
{
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
"legendLink": null
},
{
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "requests",
"legendLink": null
},
{
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limits",
"legendLink": null
}
@ -785,7 +773,6 @@
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -793,7 +780,6 @@
"expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -801,7 +787,6 @@
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -809,7 +794,6 @@
"expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -817,7 +801,6 @@
"expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -825,7 +808,6 @@
"expr": "sum(container_memory_rss{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
},
@ -833,7 +815,6 @@
"expr": "sum(container_memory_cache{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "G"
},
@ -841,7 +822,6 @@
"expr": "sum(container_memory_swap{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "H"
}
@ -932,7 +912,6 @@
{
"expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1010,7 +989,6 @@
{
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1100,7 +1078,6 @@
{
"expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1178,7 +1155,6 @@
{
"expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1268,7 +1244,6 @@
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1346,7 +1321,6 @@
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1437,14 +1411,12 @@
{
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Reads",
"legendLink": null
},
{
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Writes",
"legendLink": null
}
@ -1522,14 +1494,12 @@
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Reads",
"legendLink": null
},
{
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Writes",
"legendLink": null
}
@ -1620,7 +1590,6 @@
{
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
"legendLink": null
}
@ -1698,7 +1667,6 @@
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
"legendLink": null
}
@ -1800,7 +1768,7 @@
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -1808,14 +1776,14 @@
"pattern": "Value #A",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "IOPS(Writes)",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -1823,14 +1791,14 @@
"pattern": "Value #B",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "IOPS(Reads + Writes)",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"decimals": 3,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
@ -1838,7 +1806,7 @@
"pattern": "Value #C",
"thresholds": [ ],
"type": "number",
"unit": "short"
"unit": "iops"
},
{
"alias": "Throughput(Read)",
@ -1917,7 +1885,6 @@
"expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -1925,7 +1892,6 @@
"expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\",device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -1933,7 +1899,6 @@
"expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -1941,7 +1906,6 @@
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -1949,7 +1913,6 @@
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -1957,7 +1920,6 @@
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}

View File

@ -50,7 +50,6 @@
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -250,7 +249,6 @@
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -258,7 +256,6 @@
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -266,7 +263,6 @@
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -274,7 +270,6 @@
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -282,7 +277,6 @@
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
}
@ -373,7 +367,6 @@
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -573,7 +566,6 @@
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -581,7 +573,6 @@
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -589,7 +580,6 @@
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -597,7 +587,6 @@
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -605,7 +594,6 @@
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
}
@ -821,7 +809,6 @@
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -829,7 +816,6 @@
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -837,7 +823,6 @@
"expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -845,7 +830,6 @@
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -853,7 +837,6 @@
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -861,7 +844,6 @@
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}
@ -952,7 +934,6 @@
{
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1030,7 +1011,6 @@
{
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1120,7 +1100,6 @@
{
"expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1198,7 +1177,6 @@
{
"expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1288,7 +1266,6 @@
{
"expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1366,7 +1343,6 @@
{
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1456,7 +1432,6 @@
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}
@ -1534,7 +1509,6 @@
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"legendLink": null
}

View File

@ -73,21 +73,18 @@
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}} - {{workload_type}}",
"legendLink": null
},
{
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - requests",
"legendLink": null
},
{
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - limits",
"legendLink": null
}
@ -317,7 +314,6 @@
"expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -325,7 +321,6 @@
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -333,7 +328,6 @@
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -341,7 +335,6 @@
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -349,7 +342,6 @@
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -357,7 +349,6 @@
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}
@ -471,21 +462,18 @@
{
"expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}} - {{workload_type}}",
"legendLink": null
},
{
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - requests",
"legendLink": null
},
{
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - limits",
"legendLink": null
}
@ -715,7 +703,6 @@
"expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -723,7 +710,6 @@
"expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -731,7 +717,6 @@
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -739,7 +724,6 @@
"expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -747,7 +731,6 @@
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -755,7 +738,6 @@
"expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}
@ -986,7 +968,6 @@
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
@ -994,7 +975,6 @@
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B"
},
@ -1002,7 +982,6 @@
"expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C"
},
@ -1010,7 +989,6 @@
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D"
},
@ -1018,7 +996,6 @@
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E"
},
@ -1026,7 +1003,6 @@
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F"
}
@ -1117,7 +1093,6 @@
{
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
"legendLink": null
}
@ -1195,7 +1170,6 @@
{
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
"legendLink": null
}
@ -1285,7 +1259,6 @@
{
"expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
"legendLink": null
}
@ -1363,7 +1336,6 @@
{
"expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
"legendLink": null
}
@ -1453,7 +1425,6 @@
{
"expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
"legendLink": null
}
@ -1531,7 +1502,6 @@
{
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
"legendLink": null
}
@ -1621,7 +1591,6 @@
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
"legendLink": null
}
@ -1699,7 +1668,6 @@
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
"legendLink": null
}

View File

@ -1889,6 +1889,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -1133,6 +1133,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -1345,6 +1345,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -394,6 +394,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -899,6 +899,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -1012,6 +1012,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -877,6 +877,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -1057,6 +1057,7 @@
"list": [
{
"current": {
"selected": true,
"text": "default",
"value": "default"
},

View File

@ -18,8 +18,8 @@
"subdir": "contrib/mixin"
}
},
"version": "5a54fe6dd1740d32ac90cb0623970464fb582214",
"sum": "GdePvMDfLQcVhwzk/Ephi/jC27ywGObLB5t0eC0lXd4="
"version": "62b772c3214c9ee75f75fa783646c5694c42f69f",
"sum": "xuUBd2vqF7asyVDe5CE08uPT/RxAdy8O75EjFJoMXXU="
},
{
"source": {
@ -51,6 +51,16 @@
"version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
"sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v10.0.0"
}
},
"version": "bb2afaffbcefeae1035cd691ab06a486e0022002",
"sum": "gj/20VIGucG2vDGjG7YdHLC4yUUfrpuaneUYaRmymOM="
},
{
"source": {
"git": {
@ -58,8 +68,28 @@
"subdir": "grafana-builder"
}
},
"version": "62aec8403a5c38d5dc97ba596703753289b1c33b",
"sum": "xEFMv4+ObwP5L1Wu0XK5agWci4AJzNApys6iKAQxLlQ="
"version": "32685d75e4ae753e06ab3bea13df9d59bb5da46a",
"sum": "VmOxvg9FuY9UYr3lN6ZJe2HhuIErJoWimPybQr3S3yQ="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/docsonnet.git",
"subdir": "doc-util"
}
},
"version": "503e5c8fe96d6b55775037713ac10b184709ad93",
"sum": "BY4u0kLF3Qf/4IB4HnX9S5kEQIpHb4MUrppp6WLDtlU="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/xtd.git",
"subdir": ""
}
},
"version": "c1a315a7dbead0335a5e0486acc5583395b22a24",
"sum": "UVdL+uuFI8BSQgLfMJEJk2WDKsQXNT3dRHcr2Ti9rLI="
},
{
"source": {
@ -68,8 +98,8 @@
"subdir": ""
}
},
"version": "a10227e04218679b5e0e1d50784a4251bf3bdaf3",
"sum": "bpHFTDHKyY6ESLCtRqpzEQTWDZw3AiYC8fyuZX+KDWE="
"version": "2dbe4f9625a811b8b89f0495e74509c74779da82",
"sum": "Fe7bN9E6qeKNUdENjQvYttgf4S1DDqXRVB80wdmQgHQ="
},
{
"source": {
@ -78,7 +108,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "e8272ced472cb99ef1e2e2dc0e9ff481ae381fdb",
"version": "98b38ba9bbfdff27b359c58adecab30cc1311a78",
"sum": "+dOzAK+fwsFf97uZpjcjTcEJEC1H8hh/j8f5uIQK/5g="
},
{
@ -88,7 +118,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "e8272ced472cb99ef1e2e2dc0e9ff481ae381fdb",
"version": "98b38ba9bbfdff27b359c58adecab30cc1311a78",
"sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c="
},
{
@ -98,8 +128,8 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "4b5b94347dd71b3649fef612ab3b8cf237ac48b9",
"sum": "8AeC579AWxP6VzLTxQ/ccIrwOY0G782ZceLlWmOL5/o="
"version": "80ab54b66a88cd40fc935d17abbd7b50b12cc3f7",
"sum": "w35hpzjA5b+xr9dXnpudKRsdTheO9YO1SESoG4oyyL8="
},
{
"source": {
@ -108,7 +138,7 @@
"subdir": "jsonnet/mixin"
}
},
"version": "f7edae17a94c25be7ef70cee32d94d5a23f1c8b0",
"version": "1d0006317b0d4ac5753ee4c189a36918de7c872b",
"sum": "n3flMIzlADeyygb0uipZ4KPp2uNSjdtkrwgHjTC7Ca4=",
"name": "prometheus-operator-mixin"
},
@ -119,8 +149,8 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "f7edae17a94c25be7ef70cee32d94d5a23f1c8b0",
"sum": "LLGbS2uangsA5enNpZKxwdCAPZnO1Bj+W+o8Esk0QLw="
"version": "1d0006317b0d4ac5753ee4c189a36918de7c872b",
"sum": "IGuHwz77oTKx9Vi1dnTng/RBV/QQ2YfAdB+WPqK/w5g="
},
{
"source": {
@ -129,8 +159,8 @@
"subdir": "doc/alertmanager-mixin"
}
},
"version": "6fe1a24df07eed6f6818abd500708040beee7d7b",
"sum": "1d7ZKYArJKacAWXLUz0bRC1uOkozee/PPw97/W5zGhc=",
"version": "4494abfce419d1bbd3cb1a2c0b6584da88ac9b64",
"sum": "IpF46ZXsm+0wJJAPtAre8+yxTNZA57mBqGpBP/r7/kw=",
"name": "alertmanager"
},
{
@ -140,8 +170,8 @@
"subdir": "docs/node-mixin"
}
},
"version": "381f32b1c5943afb35940b88c45c3fa4bf5fc1de",
"sum": "By6n6U10hYDogUsyhsaKZehbhzxBZZobJloiKyKadgM="
"version": "12f1744e799e04373c7a29b42bf8b8a332c82790",
"sum": "QZwFBpulndqo799gkR5rP2/WdcQKQkNnaBwhaOI8Jeg="
},
{
"source": {
@ -150,8 +180,8 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "c579144f66b73de12f06981c99288bf03538d3ce",
"sum": "8OngT76gVXOUROOOeP9yTe6E/dn+2D2J34Dn690QCG0=",
"version": "965e603fa792bca0900ac76eb45ae84c81af1cdf",
"sum": "rNvddVTMNfaguOGzEGoeKjUsfhlXJBUImC+SIFNNCiM=",
"name": "prometheus"
},
{
@ -161,8 +191,9 @@
"subdir": "config/crd/bases"
}
},
"version": "40a9909aba5daff05e36f7c7230c5b588773dcdf",
"sum": "L3lljFFoFB+nhXnyo8Yl1hKqe60nhHXY0IZCO3H2iVk="
"version": "551856d42dff02ec38c5b0ea6a2d99c4cb127e82",
"sum": "bY/Pcrrbynguq8/HaI88cQ3B2hLv/xc+76QILY7IL+g=",
"name": "pyrra"
},
{
"source": {
@ -171,8 +202,8 @@
"subdir": "mixin"
}
},
"version": "8fcd30ffcedf9e2728518dc2970d070d4c301302",
"sum": "WhheqsiX0maUXByZFsb9xhCEsGXK2955bPmPPf1x+Cs=",
"version": "9d6f82e55d13c162c00620045f109dbff5cb9344",
"sum": "HhSSbGGCNHCMy1ee5jElYDm0yS9Vesa7QB2/SHKdjsY=",
"name": "thanos-mixin"
}
],

View File

@ -7,7 +7,7 @@
"app.kubernetes.io/instance": "main",
"app.kubernetes.io/name": "alertmanager",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.25.0",
"app.kubernetes.io/version": "0.26.0",
"prometheus": "k8s",
"role": "alert-rules"
},
@ -52,7 +52,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts",
"summary": "An Alertmanager instance failed to send notifications."
},
"expr": "(\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n)\n> 0.01\n",
"expr": "(\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n/\n ignoring (reason) group_left rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n)\n> 0.01\n",
"for": "5m",
"labels": {
"severity": "warning"
@ -65,7 +65,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts",
"summary": "All Alertmanager instances in a cluster failed to send notifications to a critical integration."
},
"expr": "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n)\n> 0.01\n",
"expr": "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n/\n ignoring (reason) group_left rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n)\n> 0.01\n",
"for": "5m",
"labels": {
"severity": "critical"
@ -78,7 +78,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts",
"summary": "All Alertmanager instances in a cluster failed to send notifications to a non-critical integration."
},
"expr": "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n)\n> 0.01\n",
"expr": "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n/\n ignoring (reason) group_left rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n)\n> 0.01\n",
"for": "5m",
"labels": {
"severity": "warning"

View File

@ -139,7 +139,7 @@
"description": "etcd cluster \"{{ $labels.job }}\": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.",
"summary": "etcd cluster database is running full."
},
"expr": "(last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95\n",
"expr": "(last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~\".*etcd.*\"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~\".*etcd.*\"}[5m]))*100 > 95\n",
"for": "10m",
"labels": {
"severity": "critical"
@ -151,7 +151,7 @@
"description": "etcd cluster \"{{ $labels.job }}\": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive.",
"summary": "etcd cluster database growing very fast."
},
"expr": "predict_linear(etcd_mvcc_db_total_size_in_bytes[4h], 4*60*60) > etcd_server_quota_backend_bytes\n",
"expr": "predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~\".*etcd.*\"}[4h], 4*60*60) > etcd_server_quota_backend_bytes{job=~\".*etcd.*\"}\n",
"for": "10m",
"labels": {
"severity": "warning"
@ -164,7 +164,7 @@
"runbook_url": "https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation",
"summary": "etcd database size in use is less than 50% of the actual allocated storage."
},
"expr": "(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes > 104857600\n",
"expr": "(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~\".*etcd.*\"}[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~\".*etcd.*\"}[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~\".*etcd.*\"} > 104857600\n",
"for": "10m",
"labels": {
"severity": "warning"

View File

@ -326,7 +326,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh",
"summary": "Processes experience elevated CPU throttling."
},
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", }[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)\n > ( 25 / 100 )\n",
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", }[5m])) by (cluster, container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)\n > ( 25 / 100 )\n",
"for": "15m",
"labels": {
"severity": "info"
@ -792,98 +792,98 @@
"name": "kube-apiserver-burnrate.rules",
"rules": [
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate1d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate1h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate2h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate30m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate3d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate5m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate6h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate1d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate1h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate2h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate30m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate3d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
"labels": {
"verb": "write"
},
"record": "apiserver_request:burnrate5m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
"labels": {
"verb": "write"
},
@ -895,20 +895,20 @@
"name": "kube-apiserver-histogram.rules",
"rules": [
{
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))) > 0\n",
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))) > 0\n",
"labels": {
"quantile": "0.99",
"verb": "read"
},
"record": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile"
"record": "cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile"
},
{
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))) > 0\n",
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))) > 0\n",
"labels": {
"quantile": "0.99",
"verb": "write"
},
"record": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile"
"record": "cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile"
}
]
},
@ -935,37 +935,37 @@
"record": "code:apiserver_request_total:increase30d"
},
{
"expr": "sum by (cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count{job=\"apiserver\"}[1h]))\n",
"record": "cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h"
"expr": "sum by (cluster, verb, scope) (increase(apiserver_request_sli_duration_seconds_count{job=\"apiserver\"}[1h]))\n",
"record": "cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h"
},
{
"expr": "sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30)\n",
"record": "cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d"
"expr": "sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d]) * 24 * 30)\n",
"record": "cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d"
},
{
"expr": "sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h]))\n",
"record": "cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h"
"expr": "sum by (cluster, verb, scope, le) (increase(apiserver_request_sli_duration_seconds_bucket[1h]))\n",
"record": "cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h"
},
{
"expr": "sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30)\n",
"record": "cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d"
"expr": "sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30)\n",
"record": "cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d"
},
{
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"labels": {
"verb": "all"
},
"record": "apiserver_request:availability30d"
},
{
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:availability30d"
},
{
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
"labels": {
"verb": "write"
},
@ -1004,28 +1004,53 @@
]
},
{
"name": "k8s.rules",
"name": "k8s.rules.container_cpu_usage_seconds_total",
"rules": [
{
"expr": "sum by (cluster, namespace, pod, container) (\n irate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}[5m])\n) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (\n 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
"record": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate"
},
}
]
},
{
"name": "k8s.rules.container_memory_working_set_bytes",
"rules": [
{
"expr": "container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,\n max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
"record": "node_namespace_pod_container:container_memory_working_set_bytes"
},
}
]
},
{
"name": "k8s.rules.container_memory_rss",
"rules": [
{
"expr": "container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,\n max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
"record": "node_namespace_pod_container:container_memory_rss"
},
}
]
},
{
"name": "k8s.rules.container_memory_cache",
"rules": [
{
"expr": "container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,\n max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
"record": "node_namespace_pod_container:container_memory_cache"
},
}
]
},
{
"name": "k8s.rules.container_memory_swap",
"rules": [
{
"expr": "container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,\n max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
"record": "node_namespace_pod_container:container_memory_swap"
},
}
]
},
{
"name": "k8s.rules.container_resource",
"rules": [
{
"expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests"
@ -1057,7 +1082,12 @@
{
"expr": "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n",
"record": "namespace_cpu:kube_pod_container_resource_limits:sum"
},
}
]
},
{
"name": "k8s.rules.pod_owner",
"rules": [
{
"expr": "max by (cluster, namespace, workload, pod) (\n label_replace(\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"},\n \"replicaset\", \"$1\", \"owner_name\", \"(.*)\"\n ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (\n 1, max by (replicaset, namespace, owner_name) (\n kube_replicaset_owner{job=\"kube-state-metrics\"}\n )\n ),\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n",
"labels": {

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "exporter",
"app.kubernetes.io/name": "node-exporter",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "1.6.1",
"app.kubernetes.io/version": "1.7.0",
"prometheus": "k8s",
"role": "alert-rules"
},
@ -326,6 +326,19 @@
"labels": {
"severity": "warning"
}
},
{
"alert": "NodeBondingDegraded",
"annotations": {
"description": "Bonding interface {{ $labels.master }} on {{ $labels.instance }} is in degraded state due to one or more slave failures.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded",
"summary": "Bonding interface is degraded"
},
"expr": "(node_bonding_slaves - node_bonding_active) != 0\n",
"for": "5m",
"labels": {
"severity": "warning"
}
}
]
},

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "controller",
"app.kubernetes.io/name": "prometheus-operator",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.67.1",
"app.kubernetes.io/version": "0.69.1",
"prometheus": "k8s",
"role": "alert-rules"
},

View File

@ -7,7 +7,7 @@
"app.kubernetes.io/instance": "k8s",
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "2.46.0",
"app.kubernetes.io/version": "2.48.0",
"prometheus": "k8s",
"role": "alert-rules"
},

View File

@ -2,10 +2,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-alertmanagerconfigs
index 78c65ad..1adb2cf 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-alertmanagerconfigs.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-alertmanagerconfigs.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: alertmanagerconfigs.monitoring.coreos.com
@ -14,10 +14,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-alertmanagers.yaml
index 8456cc4..b2d2e60 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-alertmanagers.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-alertmanagers.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: alertmanagers.monitoring.coreos.com
@ -26,10 +26,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-podmonitors.yaml ch
index 84816da..bcc33c8 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-podmonitors.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-podmonitors.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: podmonitors.monitoring.coreos.com
@ -38,10 +38,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-probes.yaml charts/
index ee137b3..fda3c21 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-probes.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-probes.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: probes.monitoring.coreos.com
@ -50,10 +50,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-prometheusagents.ya
index 6a63596..39c2d3f 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-prometheusagents.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-prometheusagents.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: prometheusagents.monitoring.coreos.com
@ -62,10 +62,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-prometheuses.yaml c
index 72f900d..093143b 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-prometheuses.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-prometheuses.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: prometheuses.monitoring.coreos.com
@ -74,10 +74,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-prometheusrules.yam
index 9d9f85d..20da04f 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-prometheusrules.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-prometheusrules.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: prometheusrules.monitoring.coreos.com
@ -86,10 +86,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-scrapeconfigs.yaml
index 95f0711..e0804f6 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-scrapeconfigs.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-scrapeconfigs.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: scrapeconfigs.monitoring.coreos.com
@ -98,10 +98,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-servicemonitors.yam
index 5f52dc6..daa1a62 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-servicemonitors.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-servicemonitors.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: servicemonitors.monitoring.coreos.com
@ -110,10 +110,10 @@ diff --git charts/kube-prometheus-stack/charts/crds/crds/crd-thanosrulers.yaml c
index 949bba7..d48a63c 100644
--- charts/kube-prometheus-stack/charts/crds/crds/crd-thanosrulers.yaml
+++ charts/kube-prometheus-stack/charts/crds/crds/crd-thanosrulers.yaml
@@ -5,6 +5,7 @@ kind: CustomResourceDefinition
metadata:
@@ -6,6 +6,7 @@ kind: CustomResourceDefinition
annotations:
controller-gen.kubebuilder.io/version: v0.11.1
operator.prometheus.io/version: 0.69.1
+ argocd.argoproj.io/sync-options: ServerSideApply=true
creationTimestamp: null
name: thanosrulers.monitoring.coreos.com

File diff suppressed because one or more lines are too long

View File

@ -38,7 +38,7 @@ spec:
description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} failed to send {{`{{`}} $value | humanizePercentage {{`}}`}} of notifications to {{`{{`}} $labels.integration {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts
summary: An Alertmanager instance failed to send notifications.
expr: "(\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n)\n> 0.01\n"
expr: "(\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n/\n ignoring (reason) group_left rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n)\n> 0.01\n"
for: 5m
labels:
severity: warning
@ -47,7 +47,7 @@ spec:
description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
expr: "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n)\n> 0.01\n"
expr: "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n/\n ignoring (reason) group_left rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n)\n> 0.01\n"
for: 5m
labels:
severity: critical
@ -56,7 +56,7 @@ spec:
description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
expr: "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n)\n> 0.01\n"
expr: "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n/\n ignoring (reason) group_left rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n)\n> 0.01\n"
for: 5m
labels:
severity: warning

View File

@ -121,7 +121,7 @@ spec:
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size exceeds the defined quota on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.'
summary: etcd cluster database is running full.
expr: '(last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95
expr: '(last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95
'
for: 10m
@ -131,7 +131,7 @@ spec:
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please check as it might be disruptive.'
summary: etcd cluster database growing very fast.
expr: 'predict_linear(etcd_mvcc_db_total_size_in_bytes[4h], 4*60*60) > etcd_server_quota_backend_bytes
expr: 'predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60) > etcd_server_quota_backend_bytes{job=~".*etcd.*"}
'
for: 10m
@ -142,7 +142,7 @@ spec:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size in use on instance {{`{{`}} $labels.instance {{`}}`}} is {{`{{`}} $value | humanizePercentage {{`}}`}} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.'
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
summary: etcd database size in use is less than 50% of the actual allocated storage.
expr: '(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes > 104857600
expr: '(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
'
for: 10m

View File

@ -246,7 +246,7 @@ spec:
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
summary: Processes experience elevated CPU throttling.
expr: "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", }[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)\n > ( 25 / 100 )\n"
expr: "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", }[5m])) by (cluster, container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)\n > ( 25 / 100 )\n"
for: 15m
labels:
severity: info
@ -625,85 +625,85 @@ spec:
severity: critical
- name: kube-apiserver-burnrate.rules
rules:
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
labels:
verb: read
record: apiserver_request:burnrate1d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
labels:
verb: read
record: apiserver_request:burnrate1h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
labels:
verb: read
record: apiserver_request:burnrate2h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"\
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"\
,code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
labels:
verb: read
record: apiserver_request:burnrate30m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
labels:
verb: read
record: apiserver_request:burnrate3d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
labels:
verb: read
record: apiserver_request:burnrate5m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
labels:
verb: read
record: apiserver_request:burnrate6h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
labels:
verb: write
record: apiserver_request:burnrate1d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
labels:
verb: write
record: apiserver_request:burnrate1h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
labels:
verb: write
record: apiserver_request:burnrate2h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
labels:
verb: write
record: apiserver_request:burnrate30m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
labels:
verb: write
record: apiserver_request:burnrate3d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
labels:
verb: write
record: apiserver_request:burnrate5m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
labels:
verb: write
record: apiserver_request:burnrate6h
- name: kube-apiserver-histogram.rules
rules:
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0
'
labels:
quantile: '0.99'
verb: read
record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0
record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0
'
labels:
quantile: '0.99'
verb: write
record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile
record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile
- interval: 3m
name: kube-apiserver-availability.rules
rules:
@ -723,32 +723,32 @@ spec:
labels:
verb: write
record: code:apiserver_request_total:increase30d
- expr: 'sum by (cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count{job="apiserver"}[1h]))
- expr: 'sum by (cluster, verb, scope) (increase(apiserver_request_sli_duration_seconds_count{job="apiserver"}[1h]))
'
record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h
- expr: 'sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30)
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h
- expr: 'sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d]) * 24 * 30)
'
record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d
- expr: 'sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h]))
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d
- expr: 'sum by (cluster, verb, scope, le) (increase(apiserver_request_sli_duration_seconds_bucket[1h]))
'
record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h
- expr: 'sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30)
record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h
- expr: 'sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30)
'
record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"\
record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"\
cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
labels:
verb: all
record: apiserver_request:availability30d
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
labels:
verb: read
record: apiserver_request:availability30d
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
labels:
verb: write
record: apiserver_request:availability30d
@ -780,18 +780,28 @@ spec:
'
record: code_verb:apiserver_request_total:increase1h
- name: k8s.rules
- name: k8s.rules.container_cpu_usage_seconds_total
rules:
- expr: "sum by (cluster, namespace, pod, container) (\n irate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}[5m])\n) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (\n 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
- name: k8s.rules.container_memory_working_set_bytes
rules:
- expr: "container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,\n max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
record: node_namespace_pod_container:container_memory_working_set_bytes
- name: k8s.rules.container_memory_rss
rules:
- expr: "container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,\n max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
record: node_namespace_pod_container:container_memory_rss
- name: k8s.rules.container_memory_cache
rules:
- expr: "container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,\n max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
record: node_namespace_pod_container:container_memory_cache
- name: k8s.rules.container_memory_swap
rules:
- expr: "container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,\n max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
record: node_namespace_pod_container:container_memory_swap
- name: k8s.rules.container_resource
rules:
- expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
@ -808,6 +818,8 @@ spec:
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_cpu:kube_pod_container_resource_limits:sum
- name: k8s.rules.pod_owner
rules:
- expr: "max by (cluster, namespace, workload, pod) (\n label_replace(\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"},\n \"replicaset\", \"$1\", \"owner_name\", \"(.*)\"\n ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (\n 1, max by (replicaset, namespace, owner_name) (\n kube_replicaset_owner{job=\"kube-state-metrics\"}\n )\n ),\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n"
labels:
workload_type: deployment

View File

@ -270,6 +270,17 @@ spec:
for: 5m
labels:
severity: warning
- alert: NodeBondingDegraded
annotations:
description: Bonding interface {{`{{`}} $labels.master {{`}}`}} on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more slave failures.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
summary: Bonding interface is degraded
expr: '(node_bonding_slaves - node_bonding_active) != 0
'
for: 5m
labels:
severity: warning
- name: node-exporter.rules
rules:
- expr: "count without (cpu, mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}\n)\n"

View File

@ -20,3 +20,5 @@ cd -
# Delete not used upstream dashboards or rules
rm -rf charts/kube-prometheus-stack/templates/grafana/dashboards-1.14 charts/kube-prometheus-stack/templates/prometheus/rules-1.14
update_docs

View File

@ -95,7 +95,7 @@ operators:
metrics:
enabled: false
namespace: monitoring
targetRevision: 0.9.4
targetRevision: 0.9.5
istio:
grafana: {}
prometheus: {}