157 lines
8.3 KiB
Plaintext
157 lines
8.3 KiB
Plaintext
|
{
|
||
|
"apiVersion": "monitoring.coreos.com/v1",
|
||
|
"kind": "PrometheusRule",
|
||
|
"metadata": {
|
||
|
"labels": {
|
||
|
"prometheus": "k8s"
|
||
|
},
|
||
|
"name": "openebs",
|
||
|
"namespace": "monitoring"
|
||
|
},
|
||
|
"spec": {
|
||
|
"groups": [
|
||
|
{
|
||
|
"name": "lvm-pool",
|
||
|
"rules": [
|
||
|
{
|
||
|
"alert": "LVMVolumeGroupMissingPhysicalVolume",
|
||
|
"annotations": {
|
||
|
"componentType": "volume group",
|
||
|
"description": "LVM volume group '{{ $labels.name }}' on node '{{ $labels.instance }}' is missing {{ $value }} underlying physical volume(s).",
|
||
|
"summary": "LVM volume group '{{ $labels.name }}' is missing the underlying physical volume.",
|
||
|
"vgName": "{{ $labels.name }}"
|
||
|
},
|
||
|
"expr": "lvm_vg_missing_pv_count > 0",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "critical"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"alert": "LVMVolumeGroupCapacityLow",
|
||
|
"annotations": {
|
||
|
"componentType": "volume group",
|
||
|
"description": "LVM volume group '{{ $labels.name }}' on node '{{ $labels.instance }}' has {{ with printf \"lvm_vg_free_size_bytes{instance='%s',name='%s'}\" $labels.instance $labels.name | query }} {{ . | first | value }} {{ end }}bytes of space remaining",
|
||
|
"summary": "LVM volume group '{{ $labels.name }}' is running low on capacity. Already {{ $value }}% of total capacity is consumed.",
|
||
|
"vgName": "{{ $labels.name }}"
|
||
|
},
|
||
|
"expr": "((lvm_vg_total_size_bytes - lvm_vg_free_size_bytes)/lvm_vg_total_size_bytes)*100 > 90",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "critical"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"alert": "LVMThinPoolCapacityLow",
|
||
|
"annotations": {
|
||
|
"componentType": "logical volume",
|
||
|
"description": "LVM thin pool '{{ $labels.name }}' on node '{{ $labels.instance }}' has {{ with printf \"lvm_lv_total_size_bytes{instance='%s',name='%s',segtype='%s'}-((lvm_lv_used_percent{instance='%s',name='%s',segtype='%s'}*lvm_lv_total_size_bytes{instance='%s',name='%s',segtype='%s'})/100)\" $labels.instance $labels.name $labels.segtype $labels.instance $labels.name $labels.segtype $labels.instance $labels.name $labels.segtype | query }} {{ . | first | value }} {{ end }}bytes of space remaining",
|
||
|
"lvName": "{{ $labels.name }}",
|
||
|
"summary": "LVM thin pool '{{ $labels.name }}' is running low on capacity. Already {{ $value }}% of total capacity is consumed."
|
||
|
},
|
||
|
"expr": "lvm_lv_used_percent{segtype=\"thin-pool\"} > 90",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "critical"
|
||
|
}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "persistent-volume-claim",
|
||
|
"rules": [
|
||
|
{
|
||
|
"alert": "StalePersistentVolumeClaim",
|
||
|
"annotations": {
|
||
|
"description": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' has no consumer",
|
||
|
"summary": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' in namespace '{{ $labels.namespace }}' is not consumed by any pod in any namespace"
|
||
|
},
|
||
|
"expr": "kube_persistentvolumeclaim_info unless (kube_persistentvolumeclaim_info * on(persistentvolumeclaim) group_left kube_pod_spec_volumes_persistentvolumeclaims_info) == 1",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "info"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"alert": "PendingPersistentVolumeClaim",
|
||
|
"annotations": {
|
||
|
"description": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' has been in pending state for more than 5 minutes",
|
||
|
"summary": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' pending in namespace '{{ $labels.namespace }}'"
|
||
|
},
|
||
|
"expr": "kube_persistentvolumeclaim_status_phase{phase=\"Pending\"} == 1",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "warning"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"alert": "LostPersistentVolumeClaim",
|
||
|
"annotations": {
|
||
|
"description": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' has been in lost state for more than 5 minutes",
|
||
|
"summary": "Persistent Volume Claim '{{ $labels.persistentvolumeclaim }}' in namespace '{{ $labels.namespace }}' lost it's corresponding persistent volume"
|
||
|
},
|
||
|
"expr": "kube_persistentvolumeclaim_status_phase{phase=\"Lost\"} == 1",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "warning"
|
||
|
}
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "volume-node",
|
||
|
"rules": [
|
||
|
{
|
||
|
"alert": "VolumeNodeFileSystemIsReadOnly",
|
||
|
"annotations": {
|
||
|
"description": "Persistent Volume's filesystem on node '{{ $labels.node }}' for persistent volume claim '{{ $labels.persistentvolumeclaim }}' has become read-only",
|
||
|
"summary": "Volume mount failed for persistent volume claim '{{ $labels.persistentvolumeclaim }}' on node '{{ $labels.node }}' due to read-only file-system"
|
||
|
},
|
||
|
"expr": "kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason=\"FilesystemIsReadOnly\"} > 0",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "critical"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"alert": "VolumeNodeExt4Error",
|
||
|
"annotations": {
|
||
|
"description": "Persistent Volume's on node '{{ $labels.node }}' persistent volume claim '{{ $labels.persistentvolumeclaim }}' encountering ext4 filesystem error",
|
||
|
"summary": "Node '{{ $labels.node }}' has encountered errors on ext4 file-system on volume having claim '{{ $labels.persistentvolumeclaim }}'"
|
||
|
},
|
||
|
"expr": "kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason=\"Ext4Error\"} > 0",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "critical"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"alert": "VolumeNodeIOError",
|
||
|
"annotations": {
|
||
|
"description": "Persistent Volume on node '{{ $labels.node }}' for persistent volume claim '{{ $labels.persistentvolumeclaim }}' encountering errors w.r.t buffer I/O ",
|
||
|
"summary": "IO errors encountered on volume having persistent volume claim '{{ $labels.persistentvolumeclaim }}' on node '{{ $labels.node }}'"
|
||
|
},
|
||
|
"expr": "kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason=\"IOError\"} > 0",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "critical"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"alert": "VolumeNodeExt4Warning",
|
||
|
"annotations": {
|
||
|
"description": "Persistent Volume on node '{{ $labels.node }}' receiving ext4 filesystem warning for persistent volume claim '{{ $labels.persistentvolumeclaim }}'",
|
||
|
"summary": "Node '{{ $labels.node }}' has encountered warning on ext4 file-system on volume having claim '{{ $labels.persistentvolumeclaim }}'"
|
||
|
},
|
||
|
"expr": "kubelet_volume_stats_inodes * on(node) group_left(reason) problem_counter{reason=\"Ext4Warning\"} > 0",
|
||
|
"for": "5m",
|
||
|
"labels": {
|
||
|
"severity": "critical"
|
||
|
}
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
}
|