feat: add grafana and prometheus rules for openEBS lvm driver
This commit is contained in:
parent
0e5d6b6c31
commit
c584600630
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-ci
|
||||
description: KubeZero umbrella chart for all things CI
|
||||
type: application
|
||||
version: 0.4.25
|
||||
version: 0.4.26
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
|
@ -17,7 +17,7 @@ gitea:
|
||||
enabled: false
|
||||
|
||||
image:
|
||||
tag: 1.15.10
|
||||
tag: 1.16.1
|
||||
rootless: true
|
||||
|
||||
securityContext:
|
||||
@ -117,10 +117,10 @@ jenkins:
|
||||
- kubernetes:1.31.3
|
||||
- workflow-aggregator:2.6
|
||||
- git:4.10.3
|
||||
- configuration-as-code:1.55.1
|
||||
- configuration-as-code:1346.ve8cfa_3473c94
|
||||
- antisamy-markup-formatter:2.7
|
||||
- prometheus:2.0.10
|
||||
- htmlpublisher:1.28
|
||||
- htmlpublisher:1.29
|
||||
- build-discarder:60.v1747b0eb632a
|
||||
|
||||
serviceAccountAgent:
|
||||
|
2
charts/kubezero-storage/.gitignore
vendored
Normal file
2
charts/kubezero-storage/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
jsonnet/vendor/
|
||||
jsonnet/tmp/
|
1
charts/kubezero-storage/.helmignore
Normal file
1
charts/kubezero-storage/.helmignore
Normal file
@ -0,0 +1 @@
|
||||
jsonnet
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-storage
|
||||
description: KubeZero umbrella chart for all things storage incl. AWS EBS/EFS, openEBS-lvm, gemini
|
||||
type: application
|
||||
version: 0.5.4
|
||||
version: 0.5.7
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -28,7 +28,7 @@ dependencies:
|
||||
condition: gemini.enabled
|
||||
# repository: https://charts.fairwinds.com/stable
|
||||
- name: aws-ebs-csi-driver
|
||||
version: 2.6.2
|
||||
version: 2.6.3
|
||||
condition: aws-ebs-csi-driver.enabled
|
||||
# repository: https://kubernetes-sigs.github.io/aws-ebs-csi-driver
|
||||
- name: aws-efs-csi-driver
|
||||
|
@ -1,6 +1,6 @@
|
||||
# kubezero-storage
|
||||
|
||||
![Version: 0.5.3](https://img.shields.io/badge/Version-0.5.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
![Version: 0.5.7](https://img.shields.io/badge/Version-0.5.7-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
|
||||
KubeZero umbrella chart for all things storage incl. AWS EBS/EFS, openEBS-lvm, gemini
|
||||
|
||||
@ -18,7 +18,7 @@ Kubernetes: `>= 1.20.0`
|
||||
|
||||
| Repository | Name | Version |
|
||||
|------------|------|---------|
|
||||
| | aws-ebs-csi-driver | 2.6.2 |
|
||||
| | aws-ebs-csi-driver | 2.6.3 |
|
||||
| | aws-efs-csi-driver | 2.2.3 |
|
||||
| | gemini | 0.0.8 |
|
||||
| | lvm-localpv | 0.8.6 |
|
||||
@ -73,7 +73,6 @@ Kubernetes: `>= 1.20.0`
|
||||
| aws-efs-csi-driver.node.tolerations[0].key | string | `"kubezero-workergroup"` | |
|
||||
| aws-efs-csi-driver.node.tolerations[0].operator | string | `"Exists"` | |
|
||||
| aws-efs-csi-driver.replicaCount | int | `1` | |
|
||||
| aws-efs-csi-driver.storageClasses[0].name | string | `"efs-sc"` | |
|
||||
| gemini.enabled | bool | `false` | |
|
||||
| gemini.resources.limits.cpu | string | `"400m"` | |
|
||||
| gemini.resources.limits.memory | string | `"128Mi"` | |
|
||||
@ -90,6 +89,7 @@ Kubernetes: `>= 1.20.0`
|
||||
| lvm-localpv.lvmNode.tolerations[0].effect | string | `"NoSchedule"` | |
|
||||
| lvm-localpv.lvmNode.tolerations[0].key | string | `"kubezero-workergroup"` | |
|
||||
| lvm-localpv.lvmNode.tolerations[0].operator | string | `"Exists"` | |
|
||||
| lvm-localpv.prometheus.enabled | bool | `false` | |
|
||||
| lvm-localpv.storageClass.default | bool | `false` | |
|
||||
| lvm-localpv.storageClass.vgpattern | string | `""` | |
|
||||
| snapshotController.enabled | bool | `true` | |
|
||||
@ -107,3 +107,4 @@ Kubernetes: `>= 1.20.0`
|
||||
- https://kubernetes-csi.github.io/docs/snapshot-controller.html#deployment
|
||||
|
||||
## Resources
|
||||
- https://github.com/openebs/monitoring/blob/develop/docs/openebs-mixin-user-guide.md#install-openebs-mixin-in-existing-prometheus-stack
|
||||
|
@ -19,3 +19,4 @@
|
||||
- https://kubernetes-csi.github.io/docs/snapshot-controller.html#deployment
|
||||
|
||||
## Resources
|
||||
- https://github.com/openebs/monitoring/blob/develop/docs/openebs-mixin-user-guide.md#install-openebs-mixin-in-existing-prometheus-stack
|
||||
|
@ -1,5 +1,9 @@
|
||||
# Helm chart
|
||||
|
||||
## v2.6.3
|
||||
|
||||
* Bump app/driver to version `v1.5.1`
|
||||
|
||||
## v2.6.2
|
||||
|
||||
* Update csi-resizer version to v1.1.0
|
||||
|
@ -3,7 +3,7 @@ annotations:
|
||||
- kind: added
|
||||
description: Custom controller.updateStrategy to set controller deployment strategy.
|
||||
apiVersion: v2
|
||||
appVersion: 1.5.0
|
||||
appVersion: 1.5.1
|
||||
description: A Helm chart for AWS EBS CSI Driver
|
||||
home: https://github.com/kubernetes-sigs/aws-ebs-csi-driver
|
||||
keywords:
|
||||
@ -19,4 +19,4 @@ maintainers:
|
||||
name: aws-ebs-csi-driver
|
||||
sources:
|
||||
- https://github.com/kubernetes-sigs/aws-ebs-csi-driver
|
||||
version: 2.6.2
|
||||
version: 2.6.3
|
||||
|
@ -1,9 +1,9 @@
|
||||
apiVersion: v1
|
||||
appVersion: 0.1.0
|
||||
appVersion: "1.0"
|
||||
description: Automated backup and restore of PersistentVolumes using the VolumeSnapshot
|
||||
API
|
||||
maintainers:
|
||||
- email: robertb@fairwinds.com
|
||||
name: rbren
|
||||
name: gemini
|
||||
version: 0.0.8
|
||||
version: 1.0.0
|
||||
|
@ -28,7 +28,7 @@ Your cluster must support the [VolumeSnapshot API](https://kubernetes.io/docs/co
|
||||
|-----|------|---------|-------------|
|
||||
| image.pullPolicy | string | `"Always"` | imagePullPolicy - Highly recommended to leave this as `Always` |
|
||||
| image.repository | string | `"quay.io/fairwinds/gemini"` | Repository for the gemini image |
|
||||
| image.tag | string | `"0.1"` | The gemini image tag to use |
|
||||
| image.tag | string | `nil` | The gemini image tag to use. Defaults to .Chart.appVersion |
|
||||
| rbac.create | bool | `true` | If true, create a new ServiceAccount and attach permissions |
|
||||
| rbac.serviceAccountName | string | `nil` | |
|
||||
| verbosity | int | `5` | How verbose the controller logs should be |
|
||||
|
@ -31,7 +31,7 @@ spec:
|
||||
- -v
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
image: '{{.Values.image.repository}}:{{.Values.image.tag}}'
|
||||
image: "{{.Values.image.repository}}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: '{{.Values.image.pullPolicy}}'
|
||||
name: gemini-controller
|
||||
resources:
|
||||
|
@ -3,8 +3,8 @@ image:
|
||||
pullPolicy: Always
|
||||
# image.repository -- Repository for the gemini image
|
||||
repository: quay.io/fairwinds/gemini
|
||||
# image.tag -- The gemini image tag to use
|
||||
tag: "0.1"
|
||||
# image.tag -- The gemini image tag to use. Defaults to .Chart.appVersion
|
||||
tag:
|
||||
|
||||
rbac:
|
||||
# rbac.create -- If true, create a new ServiceAccount and attach permissions
|
||||
|
35
charts/kubezero-storage/jsonnet/Makefile
Normal file
35
charts/kubezero-storage/jsonnet/Makefile
Normal file
@ -0,0 +1,35 @@
|
||||
JSONNET_ARGS := -n 2 --max-blank-lines 2 --string-style s --comment-style s
|
||||
ifneq (,$(shell which jsonnetfmt))
|
||||
JSONNET_FMT_CMD := jsonnetfmt
|
||||
else
|
||||
JSONNET_FMT_CMD := jsonnet
|
||||
JSONNET_FMT_ARGS := fmt $(JSONNET_ARGS)
|
||||
endif
|
||||
JSONNET_FMT := $(JSONNET_FMT_CMD) $(JSONNET_FMT_ARGS)
|
||||
|
||||
dashboardsDirPath=dashboards
|
||||
rulessDirPath=rules
|
||||
|
||||
all: fmt generate lint
|
||||
|
||||
fmt:
|
||||
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
|
||||
xargs -n 1 -- $(JSONNET_FMT) -i
|
||||
|
||||
generate: clean
|
||||
jsonnet -J vendor -m rules -e '(import "prometheusRules.libsonnet")'
|
||||
jsonnet -J vendor -m dashboards -e '(import "dashboards.libsonnet")'
|
||||
|
||||
render: generate
|
||||
../../kubezero-metrics/sync_grafana_dashboards.py dashboards.yaml ../templates/lvm/grafana-dashboards.yaml
|
||||
../../kubezero-metrics/sync_prometheus_rules.py rules.yaml ../templates/lvm/
|
||||
|
||||
lint:
|
||||
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
|
||||
while read f; do \
|
||||
$(JSONNET_FMT) "$$f" | diff -u "$$f" -; \
|
||||
done
|
||||
|
||||
clean:
|
||||
rm -rf $(dashboardsDirPath) $(rulessDirPath)
|
||||
mkdir -p $(dashboardsDirPath) $(rulessDirPath)
|
24
charts/kubezero-storage/jsonnet/config.libsonnet
Normal file
24
charts/kubezero-storage/jsonnet/config.libsonnet
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
_config+:: {
|
||||
dashboards+: {
|
||||
cStor: false,
|
||||
jiva: false,
|
||||
localPV: false,
|
||||
lvmLocalPV: true,
|
||||
deviceLocalPV: false,
|
||||
zfsLocalPV: false,
|
||||
ndm: false,
|
||||
npd: false,
|
||||
},
|
||||
alertRules+: {
|
||||
cStor: false,
|
||||
jiva: false,
|
||||
localPV: false,
|
||||
lvmLocalPV: true,
|
||||
deviceLocalPV: false,
|
||||
zfsLocalPV: false,
|
||||
ndm: false,
|
||||
npd: false,
|
||||
},
|
||||
},
|
||||
}
|
10
charts/kubezero-storage/jsonnet/dashboards.libsonnet
Normal file
10
charts/kubezero-storage/jsonnet/dashboards.libsonnet
Normal file
@ -0,0 +1,10 @@
|
||||
local addMixin = (import 'kube-prometheus/lib/mixin.libsonnet');
|
||||
|
||||
local openebsMixin = addMixin({
|
||||
name: 'openebs',
|
||||
mixin: (import 'github.com/openebs/monitoring/jsonnet/openebs-mixin/mixin.libsonnet') +
|
||||
(import 'config.libsonnet'),
|
||||
});
|
||||
|
||||
|
||||
{ ['openebs-' + name]: openebsMixin.grafanaDashboards[name] for name in std.objectFields(openebsMixin.grafanaDashboards) }
|
10
charts/kubezero-storage/jsonnet/dashboards.yaml
Normal file
10
charts/kubezero-storage/jsonnet/dashboards.yaml
Normal file
@ -0,0 +1,10 @@
|
||||
configmap: openebs-lvmlocalpv-grafana-dashboard
|
||||
gzip: true
|
||||
folder: KubeZero
|
||||
condition: 'index .Values "lvm-localpv" "prometheus" "enabled"'
|
||||
dashboards:
|
||||
- name: openebs-localpv-pool
|
||||
url: file://dashboards/openebs-lvmlocalpv-pool.json
|
||||
tags:
|
||||
- storage
|
||||
- openEBS
|
File diff suppressed because it is too large
Load Diff
24
charts/kubezero-storage/jsonnet/jsonnetfile.json
Normal file
24
charts/kubezero-storage/jsonnet/jsonnetfile.json
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"version": 1,
|
||||
"dependencies": [
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/kube-prometheus.git",
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "release-0.9"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/openebs/monitoring.git",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "develop"
|
||||
}
|
||||
],
|
||||
"legacyImports": true
|
||||
}
|
170
charts/kubezero-storage/jsonnet/jsonnetfile.lock.json
Normal file
170
charts/kubezero-storage/jsonnet/jsonnetfile.lock.json
Normal file
@ -0,0 +1,170 @@
|
||||
{
|
||||
"version": 1,
|
||||
"dependencies": [
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/brancz/kubernetes-grafana.git",
|
||||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "90f38916f1f8a310a715d18e36f787f84df4ddf5",
|
||||
"sum": "0kZ1pnuIirDtbg6F9at5+NQOwKNONIGEPq0eECzvRkI="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/etcd-io/etcd.git",
|
||||
"subdir": "contrib/mixin"
|
||||
}
|
||||
},
|
||||
"version": "fa191c64bd3c3eaa6cc167e837d7a73e3b7d7c72",
|
||||
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib.git",
|
||||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "6db00c292d3a1c71661fc875f90e0ec7caa538c2",
|
||||
"sum": "gF8foHByYcB25jcUOBqP6jxk0OPifQMjPvKY0HaCk6w="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/jsonnet-libs.git",
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "03d32a72a2a0bf0ee00ffc853be5f07ad3bafcbe",
|
||||
"sum": "0KkygBQd/AFzUvVzezE4qF/uDYgrwUXVpZfINBti0oc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "fb9d8ed4bc4a3d6efac525f72e8a0d2c583a0fe2",
|
||||
"sum": "xjKkdp+5fkekCNBUIgZCHTRmVdUEmQNFKslrL2Ho8gs="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "177bc8ec789fa049a9585713d232035b159f8c92",
|
||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes/kube-state-metrics.git",
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "f170cc73f11c1580d7f38af746be0f2fa79c6a1e",
|
||||
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes/kube-state-metrics.git",
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "f170cc73f11c1580d7f38af746be0f2fa79c6a1e",
|
||||
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/openebs/monitoring.git",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "87b902c85429eedce78033049de9280f39e81372",
|
||||
"sum": "TyL/PPFwpRRzAOwYVlvgBsSHS6/+Z6TgRGFe8beJcpE="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/kube-prometheus.git",
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "c8db3c85db6e13b94726b7c5d630a6ebe7038cf9",
|
||||
"sum": "7DfVEg2kDzIQPgxTThnjQ4S2ZksrF0hkNLysXP6Wv24="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/prometheus-operator.git",
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "83fe36566f4e0894eb5ffcd2638a0f039a17bdeb",
|
||||
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
|
||||
"name": "prometheus-operator-mixin"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/prometheus-operator.git",
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "83fe36566f4e0894eb5ffcd2638a0f039a17bdeb",
|
||||
"sum": "J1G++A8hrtr3+OZQMmcNeb1w/C30bXqqwpwHL/Xhsd4="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/alertmanager.git",
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "b408b522bc653d014e53035e59fa394cc1edd762",
|
||||
"sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/node_exporter.git",
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "832909dd257eb368cf83363ffcae3ab84cb4bcb1",
|
||||
"sum": "MmxGhE2PJ1a52mk2x7vDpMT2at4Jglbud/rK74CB5i0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/prometheus.git",
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "751ca03faddc9c64089c41d0da370a3a0b477742",
|
||||
"sum": "AS8WYFi/z10BZSF6DFkKBscjB32XDMM7iIso7CO/FyI=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/thanos-io/thanos.git",
|
||||
"subdir": "mixin"
|
||||
}
|
||||
},
|
||||
"version": "ff363498fc95cfe17de894d7237bcf38bdd0bc36",
|
||||
"sum": "cajthvLKDjYgYHCKQU2g/pTMRkxcbuJEvTnCyJOihl8=",
|
||||
"name": "thanos-mixin"
|
||||
}
|
||||
],
|
||||
"legacyImports": false
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
local addMixin = (import 'kube-prometheus/lib/mixin.libsonnet');
|
||||
|
||||
local openebsMixin = addMixin({
|
||||
name: 'openebs',
|
||||
mixin: (import 'github.com/openebs/monitoring/jsonnet/openebs-mixin/mixin.libsonnet') +
|
||||
(import 'config.libsonnet'),
|
||||
});
|
||||
|
||||
{ 'openebs-mixin-prometheusRules': openebsMixin.prometheusRules }
|
4
charts/kubezero-storage/jsonnet/rules.yaml
Normal file
4
charts/kubezero-storage/jsonnet/rules.yaml
Normal file
@ -0,0 +1,4 @@
|
||||
rules:
|
||||
- name: prometheus-rules
|
||||
url: file://rules/openebs-mixin-prometheusRules
|
||||
condition: 'index .Values "lvm-localpv" "prometheus" "enabled"'
|
@ -0,0 +1,156 @@
|
||||
{
|
||||
"apiVersion": "monitoring.coreos.com/v1",
|
||||
"kind": "PrometheusRule",
|
||||
"metadata": {
|
||||
"labels": {
|
||||
"prometheus": "k8s"
|
||||
},
|
||||
"name": "openebs",
|
||||
"namespace": "monitoring"
|
||||
},
|
||||
"spec": {
|
||||
"groups": [
|
||||
{
|
||||
"name": "lvm-pool",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "LVMVolumeGroupMissingPhysicalVolume",
|
||||
"annotations": {
|
||||
"componentType": "volume group",
|
||||
"description": "LVM volume group '{{ $labels.name }}' on node '{{ $labels.instance }}' is missing {{ $value }} underlying physical volume(s).",
|
||||
"summary": "LVM volume group '{{ $labels.name }}' is missing the underlying physical volume.",
|
||||
"vgName": "{{ $labels.name }}"
|
||||
},
|
||||
"expr": "lvm_vg_missing_pv_count > 0",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "LVMVolumeGroupCapacityLow",
|
||||
"annotations": {
|
||||
"componentType": "volume group",
|
||||
"description": "LVM volume group '{{ $labels.name }}' on node '{{ $labels.instance }}' has {{ with printf \"lvm_vg_free_size_bytes{instance='%s',name='%s'}\" $labels.instance $labels.name | query }} {{ . | first | value }} {{ end }}bytes of space remaining",
|
||||
"summary": "LVM volume group '{{ $labels.name }}' is running low on capacity. Already {{ $value }}% of total capacity is consumed.",
|
||||
"vgName": "{{ $labels.name }}"
|
||||
},
|
||||
"expr": "((lvm_vg_total_size_bytes - lvm_vg_free_size_bytes)/lvm_vg_total_size_bytes)*100 > 90",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "LVMThinPoolCapacityLow",
|
||||
"annotations": {
|
||||
"componentType": "logical volume",
|
||||
"description": "LVM thin pool '{{ $labels.name }}' on node '{{ $labels.instance }}' has {{ with printf \"lvm_lv_total_size_bytes{instance='%s',name='%s',segtype='%s'}-((lvm_lv_used_percent{instance='%s',name='%s',segtype='%s'}*lvm_lv_total_size_bytes{instance='%s',name='%s',segtype='%s'})/100)\" $labels.instance $labels.name $labels.segtype $labels.instance $labels.name $labels.segtype $labels.instance $labels.name $labels.segtype | query }} {{ . | first | value }} {{ end }}bytes of space remaining",
|
||||
"lvName": "{{ $labels.name }}",
|
||||
"summary": "LVM thin pool '{{ $labels.name }}' is running low on capacity. Already {{ $value }}% of total capacity is consumed."
|
||||
},
|
||||
"expr": "lvm_lv_used_percent{segtype=\"thin-pool\"} > 90",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "persistent-volume-claim",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "StalePersistentVolumeClaim",
|
||||
"annotations": {
|
||||
"description": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' has no consumer",
|
||||
"summary": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' in namespace '{{ $labels.namespace }}' is not consumed by any pod in any namespace"
|
||||
},
|
||||
"expr": "kube_persistentvolumeclaim_info unless (kube_persistentvolumeclaim_info * on(persistentvolumeclaim) group_left kube_pod_spec_volumes_persistentvolumeclaims_info) == 1",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "info"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "PendingPersistentVolumeClaim",
|
||||
"annotations": {
|
||||
"description": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' has been in pending state for more than 5 minutes",
|
||||
"summary": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' pending in namespace '{{ $labels.namespace }}'"
|
||||
},
|
||||
"expr": "kube_persistentvolumeclaim_status_phase{phase=\"Pending\"} == 1",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "LostPersistentVolumeClaim",
|
||||
"annotations": {
|
||||
"description": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' has been in lost state for more than 5 minutes",
|
||||
"summary": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' in namespace '{{ $labels.namespace }}' lost it's corresponding persistent volume"
|
||||
},
|
||||
"expr": "kube_persistentvolumeclaim_status_phase{phase=\"Lost\"} == 1",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "volume-node",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "VolumeNodeFileSystemIsReadOnly",
|
||||
"annotations": {
|
||||
"description": "Persistent Volume's filesystem on node '{{ $labels.node }}' for persistent volume claim '{{ $labels.persistentvolumeclaim }}' has become read-only",
|
||||
"summary": "Volume mount failed for persistent volume claim '{{ $labels.persistentvolumeclaim }}' on node '{{ $labels.node }}' due to read-only file-system"
|
||||
},
|
||||
"expr": "kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason=\"FilesystemIsReadOnly\"} > 0",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "VolumeNodeExt4Error",
|
||||
"annotations": {
|
||||
"description": "Persistent Volume's on node '{{ $labels.node }}' persistent volume claim '{{ $labels.persistentvolumeclaim }}' encountering ext4 filesystem error",
|
||||
"summary": "Node '{{ $labels.node }}' has encountered errors on ext4 file-system on volume having claim '{{ $labels.persistentvolumeclaim }}'"
|
||||
},
|
||||
"expr": "kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason=\"Ext4Error\"} > 0",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "VolumeNodeIOError",
|
||||
"annotations": {
|
||||
"description": "Persistent Volume on node '{{ $labels.node }}' for persistent volume claim '{{ $labels.persistentvolumeclaim }}' encountering errors w.r.t buffer I/O ",
|
||||
"summary": "IO errors encountered on volume having persistent volume claim '{{ $labels.persistentvolumeclaim }}' on node '{{ $labels.node }}'"
|
||||
},
|
||||
"expr": "kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason=\"IOError\"} > 0",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "VolumeNodeExt4Warning",
|
||||
"annotations": {
|
||||
"description": "Persistent Volume on node '{{ $labels.node }}' receiving ext4 filesystem warning for persistent volume claim '{{ $labels.persistentvolumeclaim }}'",
|
||||
"summary": "Node '{{ $labels.node }}' has encountered warning on ext4 file-system on volume having claim '{{ $labels.persistentvolumeclaim }}'"
|
||||
},
|
||||
"expr": "kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason=\"Ext4Warning\"} > 0",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
104
charts/kubezero-storage/templates/lvm/prometheus-rules.yaml
Normal file
104
charts/kubezero-storage/templates/lvm/prometheus-rules.yaml
Normal file
@ -0,0 +1,104 @@
|
||||
{{- if index .Values "lvm-localpv" "prometheus" "enabled" }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: {{ printf "%s-%s" (include "kubezero-lib.fullname" $) "prometheus-rules" | trunc 63 | trimSuffix "-" }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kubezero-lib.labels" . | nindent 4 }}
|
||||
spec:
|
||||
groups:
|
||||
- name: lvm-pool
|
||||
rules:
|
||||
- alert: LVMVolumeGroupMissingPhysicalVolume
|
||||
annotations:
|
||||
componentType: volume group
|
||||
description: LVM volume group '{{`{{`}} $labels.name {{`}}`}}' on node '{{`{{`}} $labels.instance {{`}}`}}' is missing {{`{{`}} $value {{`}}`}} underlying physical volume(s).
|
||||
summary: LVM volume group '{{`{{`}} $labels.name {{`}}`}}' is missing the underlying physical volume.
|
||||
vgName: '{{`{{`}} $labels.name {{`}}`}}'
|
||||
expr: lvm_vg_missing_pv_count > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: LVMVolumeGroupCapacityLow
|
||||
annotations:
|
||||
componentType: volume group
|
||||
description: LVM volume group '{{`{{`}} $labels.name {{`}}`}}' on node '{{`{{`}} $labels.instance {{`}}`}}' has {{`{{`}} with printf "lvm_vg_free_size_bytes{instance='%s',name='%s'}" $labels.instance $labels.name | query {{`}}`}} {{`{{`}} . | first | value {{`}}`}} {{`{{`}} end {{`}}`}}bytes of space remaining
|
||||
summary: LVM volume group '{{`{{`}} $labels.name {{`}}`}}' is running low on capacity. Already {{`{{`}} $value {{`}}`}}% of total capacity is consumed.
|
||||
vgName: '{{`{{`}} $labels.name {{`}}`}}'
|
||||
expr: ((lvm_vg_total_size_bytes - lvm_vg_free_size_bytes)/lvm_vg_total_size_bytes)*100 > 90
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: LVMThinPoolCapacityLow
|
||||
annotations:
|
||||
componentType: logical volume
|
||||
description: LVM thin pool '{{`{{`}} $labels.name {{`}}`}}' on node '{{`{{`}} $labels.instance {{`}}`}}' has {{`{{`}} with printf "lvm_lv_total_size_bytes{instance='%s',name='%s',segtype='%s'}-((lvm_lv_used_percent{instance='%s',name='%s',segtype='%s'}*lvm_lv_total_size_bytes{instance='%s',name='%s',segtype='%s'})/100)" $labels.instance $labels.name $labels.segtype $labels.instance $labels.name $labels.segtype $labels.instance $labels.name $labels.segtype | query {{`}}`}} {{`{{`}} . | first | value {{`}}`}} {{`{{`}} end {{`}}`}}bytes of space remaining
|
||||
lvName: '{{`{{`}} $labels.name {{`}}`}}'
|
||||
summary: LVM thin pool '{{`{{`}} $labels.name {{`}}`}}' is running low on capacity. Already {{`{{`}} $value {{`}}`}}% of total capacity is consumed.
|
||||
expr: lvm_lv_used_percent{segtype="thin-pool"} > 90
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: persistent-volume-claim
|
||||
rules:
|
||||
- alert: StalePersistentVolumeClaim
|
||||
annotations:
|
||||
description: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' has no consumer
|
||||
summary: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' in namespace '{{`{{`}} $labels.namespace {{`}}`}}' is not consumed by any pod in any namespace
|
||||
expr: kube_persistentvolumeclaim_info unless (kube_persistentvolumeclaim_info * on(persistentvolumeclaim) group_left kube_pod_spec_volumes_persistentvolumeclaims_info) == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: PendingPersistentVolumeClaim
|
||||
annotations:
|
||||
description: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' has been in pending state for more than 5 minutes
|
||||
summary: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' pending in namespace '{{`{{`}} $labels.namespace {{`}}`}}'
|
||||
expr: kube_persistentvolumeclaim_status_phase{phase="Pending"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: LostPersistentVolumeClaim
|
||||
annotations:
|
||||
description: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' has been in lost state for more than 5 minutes
|
||||
summary: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' in namespace '{{`{{`}} $labels.namespace {{`}}`}}' lost it's corresponding persistent volume
|
||||
expr: kube_persistentvolumeclaim_status_phase{phase="Lost"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: volume-node
|
||||
rules:
|
||||
- alert: VolumeNodeFileSystemIsReadOnly
|
||||
annotations:
|
||||
description: Persistent Volume's filesystem on node '{{`{{`}} $labels.node {{`}}`}}' for persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' has become read-only
|
||||
summary: Volume mount failed for persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' on node '{{`{{`}} $labels.node {{`}}`}}' due to read-only file-system
|
||||
expr: kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason="FilesystemIsReadOnly"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VolumeNodeExt4Error
|
||||
annotations:
|
||||
description: Persistent Volume's on node '{{`{{`}} $labels.node {{`}}`}}' persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' encountering ext4 filesystem error
|
||||
summary: Node '{{`{{`}} $labels.node {{`}}`}}' has encountered errors on ext4 file-system on volume having claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}'
|
||||
expr: kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason="Ext4Error"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VolumeNodeIOError
|
||||
annotations:
|
||||
description: 'Persistent Volume on node ''{{`{{`}} $labels.node {{`}}`}}'' for persistent volume claim ''{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}'' encountering errors w.r.t buffer I/O '
|
||||
summary: IO errors encountered on volume having persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' on node '{{`{{`}} $labels.node {{`}}`}}'
|
||||
expr: kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason="IOError"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VolumeNodeExt4Warning
|
||||
annotations:
|
||||
description: Persistent Volume on node '{{`{{`}} $labels.node {{`}}`}}' receiving ext4 filesystem warning for persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}'
|
||||
summary: Node '{{`{{`}} $labels.node {{`}}`}}' has encountered warning on ext4 file-system on volume having claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}'
|
||||
expr: kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason="Ext4Warning"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
{{- end }}
|
18
charts/kubezero-storage/templates/lvm/servicemonitor.yaml
Normal file
18
charts/kubezero-storage/templates/lvm/servicemonitor.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
{{- if index .Values "lvm-localpv" "prometheus" "enabled" }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: {{ printf "%s-%s" (include "kubezero-lib.fullname" $) "openebs-monitoring-lvmlocalpv" | trunc 63 | trimSuffix "-" }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kubezero-lib.labels" . | nindent 4 }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
name: openebs-lvm-node
|
||||
namespaceSelector:
|
||||
any: true
|
||||
endpoints:
|
||||
- port: metrics
|
||||
path: /metrics
|
||||
{{- end }}
|
@ -26,3 +26,7 @@ VERSION=$(yq eval '.dependencies[] | select(.name=="aws-efs-csi-driver") | .vers
|
||||
rm -rf charts/aws-efs-csi-driver
|
||||
curl -L -s -o - https://github.com/kubernetes-sigs/aws-efs-csi-driver/releases/download/helm-chart-aws-efs-csi-driver-${VERSION}/aws-efs-csi-driver-${VERSION}.tgz | tar xfz - -C charts
|
||||
patch -i efs.patch -p0 --no-backup-if-mismatch
|
||||
|
||||
# Metrics
|
||||
cd jsonnet
|
||||
make render
|
||||
|
@ -44,6 +44,8 @@ lvm-localpv:
|
||||
analytics:
|
||||
enabled: false
|
||||
|
||||
prometheus:
|
||||
enabled: false
|
||||
|
||||
gemini:
|
||||
enabled: false
|
||||
@ -170,8 +172,12 @@ aws-efs-csi-driver:
|
||||
effect: NoSchedule
|
||||
operator: Exists
|
||||
|
||||
storageClasses:
|
||||
- name: efs-sc
|
||||
#storageClasses:
|
||||
#- name: efs-sc
|
||||
# parameters:
|
||||
# provisioningMode: efs-ap
|
||||
# directoryPerms: "700"
|
||||
# fileSystemId: <efs-id-from-worker-cfn>
|
||||
|
||||
#PersistentVolumes:
|
||||
# - name: example-pv
|
||||
|
Loading…
Reference in New Issue
Block a user