105 lines
6.6 KiB
YAML
105 lines
6.6 KiB
YAML
|
{{- if index .Values "lvm-localpv" "prometheus" "enabled" }}
|
||
|
apiVersion: monitoring.coreos.com/v1
|
||
|
kind: PrometheusRule
|
||
|
metadata:
|
||
|
name: {{ printf "%s-%s" (include "kubezero-lib.fullname" $) "prometheus-rules" | trunc 63 | trimSuffix "-" }}
|
||
|
namespace: {{ .Release.Namespace }}
|
||
|
labels:
|
||
|
{{- include "kubezero-lib.labels" . | nindent 4 }}
|
||
|
spec:
|
||
|
groups:
|
||
|
- name: lvm-pool
|
||
|
rules:
|
||
|
- alert: LVMVolumeGroupMissingPhysicalVolume
|
||
|
annotations:
|
||
|
componentType: volume group
|
||
|
description: LVM volume group '{{`{{`}} $labels.name {{`}}`}}' on node '{{`{{`}} $labels.instance {{`}}`}}' is missing {{`{{`}} $value {{`}}`}} underlying physical volume(s).
|
||
|
summary: LVM volume group '{{`{{`}} $labels.name {{`}}`}}' is missing the underlying physical volume.
|
||
|
vgName: '{{`{{`}} $labels.name {{`}}`}}'
|
||
|
expr: lvm_vg_missing_pv_count > 0
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- alert: LVMVolumeGroupCapacityLow
|
||
|
annotations:
|
||
|
componentType: volume group
|
||
|
description: LVM volume group '{{`{{`}} $labels.name {{`}}`}}' on node '{{`{{`}} $labels.instance {{`}}`}}' has {{`{{`}} with printf "lvm_vg_free_size_bytes{instance='%s',name='%s'}" $labels.instance $labels.name | query {{`}}`}} {{`{{`}} . | first | value {{`}}`}} {{`{{`}} end {{`}}`}}bytes of space remaining
|
||
|
summary: LVM volume group '{{`{{`}} $labels.name {{`}}`}}' is running low on capacity. Already {{`{{`}} $value {{`}}`}}% of total capacity is consumed.
|
||
|
vgName: '{{`{{`}} $labels.name {{`}}`}}'
|
||
|
expr: ((lvm_vg_total_size_bytes - lvm_vg_free_size_bytes)/lvm_vg_total_size_bytes)*100 > 90
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- alert: LVMThinPoolCapacityLow
|
||
|
annotations:
|
||
|
componentType: logical volume
|
||
|
description: LVM thin pool '{{`{{`}} $labels.name {{`}}`}}' on node '{{`{{`}} $labels.instance {{`}}`}}' has {{`{{`}} with printf "lvm_lv_total_size_bytes{instance='%s',name='%s',segtype='%s'}-((lvm_lv_used_percent{instance='%s',name='%s',segtype='%s'}*lvm_lv_total_size_bytes{instance='%s',name='%s',segtype='%s'})/100)" $labels.instance $labels.name $labels.segtype $labels.instance $labels.name $labels.segtype $labels.instance $labels.name $labels.segtype | query {{`}}`}} {{`{{`}} . | first | value {{`}}`}} {{`{{`}} end {{`}}`}}bytes of space remaining
|
||
|
lvName: '{{`{{`}} $labels.name {{`}}`}}'
|
||
|
summary: LVM thin pool '{{`{{`}} $labels.name {{`}}`}}' is running low on capacity. Already {{`{{`}} $value {{`}}`}}% of total capacity is consumed.
|
||
|
expr: lvm_lv_used_percent{segtype="thin-pool"} > 90
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- name: persistent-volume-claim
|
||
|
rules:
|
||
|
- alert: StalePersistentVolumeClaim
|
||
|
annotations:
|
||
|
description: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' has no consumer
|
||
|
summary: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' in namespace '{{`{{`}} $labels.namespace {{`}}`}}' is not consumed by any pod in any namespace
|
||
|
expr: kube_persistentvolumeclaim_info unless (kube_persistentvolumeclaim_info * on(persistentvolumeclaim) group_left kube_pod_spec_volumes_persistentvolumeclaims_info) == 1
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: info
|
||
|
- alert: PendingPersistentVolumeClaim
|
||
|
annotations:
|
||
|
description: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' has been in pending state for more than 5 minutes
|
||
|
summary: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' pending in namespace '{{`{{`}} $labels.namespace {{`}}`}}'
|
||
|
expr: kube_persistentvolumeclaim_status_phase{phase="Pending"} == 1
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: LostPersistentVolumeClaim
|
||
|
annotations:
|
||
|
description: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' has been in lost state for more than 5 minutes
|
||
|
summary: Persistent Volume Claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' in namespace '{{`{{`}} $labels.namespace {{`}}`}}' lost it's corresponding persistent volume
|
||
|
expr: kube_persistentvolumeclaim_status_phase{phase="Lost"} == 1
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- name: volume-node
|
||
|
rules:
|
||
|
- alert: VolumeNodeFileSystemIsReadOnly
|
||
|
annotations:
|
||
|
description: Persistent Volume's filesystem on node '{{`{{`}} $labels.node {{`}}`}}' for persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' has become read-only
|
||
|
summary: Volume mount failed for persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' on node '{{`{{`}} $labels.node {{`}}`}}' due to read-only file-system
|
||
|
expr: kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason="FilesystemIsReadOnly"} > 0
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- alert: VolumeNodeExt4Error
|
||
|
annotations:
|
||
|
description: Persistent Volume's on node '{{`{{`}} $labels.node {{`}}`}}' persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' encountering ext4 filesystem error
|
||
|
summary: Node '{{`{{`}} $labels.node {{`}}`}}' has encountered errors on ext4 file-system on volume having claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}'
|
||
|
expr: kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason="Ext4Error"} > 0
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- alert: VolumeNodeIOError
|
||
|
annotations:
|
||
|
description: 'Persistent Volume on node ''{{`{{`}} $labels.node {{`}}`}}'' for persistent volume claim ''{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}'' encountering errors w.r.t buffer I/O '
|
||
|
summary: IO errors encountered on volume having persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}' on node '{{`{{`}} $labels.node {{`}}`}}'
|
||
|
expr: kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason="IOError"} > 0
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- alert: VolumeNodeExt4Warning
|
||
|
annotations:
|
||
|
description: Persistent Volume on node '{{`{{`}} $labels.node {{`}}`}}' receiving ext4 filesystem warning for persistent volume claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}'
|
||
|
summary: Node '{{`{{`}} $labels.node {{`}}`}}' has encountered warning on ext4 file-system on volume having claim '{{`{{`}} $labels.persistentvolumeclaim {{`}}`}}'
|
||
|
expr: kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason="Ext4Warning"} > 0
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
|
||
|
{{- end }}
|