Browse Source

Merge pull request 'Release V2.19.3' (#41) from master into stable

Reviewed-on: #41
v2.19.3
Stefan Reimer 9 months ago
parent
commit
dfd7388469
  1. 1
      .gitignore
  2. 2
      charts/kubeadm/.helmignore
  3. 2
      charts/kubeadm/Chart.yaml
  4. 1
      charts/kubeadm/templates/ClusterConfiguration.yaml
  5. 18
      charts/kubeadm/templates/KubeletConfiguration.yaml
  6. 2
      charts/kubeadm/templates/README.md
  7. 13
      charts/kubeadm/templates/_helpers.tpl
  8. 8
      charts/kubeadm/templates/k8s-ecr-login-renew/README.md
  9. 40
      charts/kubeadm/templates/k8s-ecr-login-renew/cronjob.yaml
  10. 31
      charts/kubeadm/templates/k8s-ecr-login-renew/service-account.yml
  11. 2
      charts/kubeadm/templates/patches/etcd0.yaml
  12. 2
      charts/kubeadm/templates/patches/kube-apiserver0.yaml
  13. 2
      charts/kubeadm/templates/patches/kube-controller-manager0.yaml
  14. 0
      charts/kubeadm/templates/resources/00-aws-iam-authenticator-crds.yaml
  15. 0
      charts/kubeadm/templates/resources/01-aws-iam-authenticator-deployment.yaml
  16. 0
      charts/kubeadm/templates/resources/02-aws-iam-authenticator-mappings.yaml
  17. 8
      charts/kubeadm/templates/resources/10-runtimeClass.yaml
  18. 1
      charts/kubeadm/values.yaml
  19. 8
      charts/kubezero-istio-ingress/Chart.yaml
  20. 2
      charts/kubezero-istio-ingress/charts/istio-ingress/Chart.yaml
  21. 2
      charts/kubezero-istio-ingress/charts/istio-ingress/values.yaml
  22. 2
      charts/kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml
  23. 2
      charts/kubezero-istio-ingress/charts/istio-private-ingress/values.yaml
  24. 11
      charts/kubezero-istio-ingress/values.yaml
  25. 8
      charts/kubezero-istio/Chart.yaml
  26. 2
      charts/kubezero-istio/charts/base/Chart.yaml
  27. 2
      charts/kubezero-istio/charts/istio-discovery/Chart.yaml
  28. 2
      charts/kubezero-istio/charts/istio-discovery/values.yaml
  29. 2
      charts/kubezero-istio/update.sh
  30. 4
      charts/kubezero-istio/values.yaml
  31. 4
      charts/kubezero-metrics/Chart.yaml
  32. 66
      charts/kubezero-metrics/adjust_alarms.patch
  33. 6
      charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml
  34. 4
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml
  35. 11
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md
  36. 7
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl
  37. 29
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml
  38. 10
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml
  39. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml
  40. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml
  41. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml
  42. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml
  43. 4
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml
  44. 4
      charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml
  45. 4
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml
  46. 3
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml
  47. 3
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml
  48. 3
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml
  49. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml
  50. 5
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml
  51. 3
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml
  52. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml
  53. 5
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml
  54. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml
  55. 5
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml
  56. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml
  57. 5
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml
  58. 8
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml
  59. 3
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml
  60. 3
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/node-exporter/servicemonitor.yaml
  61. 4
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml
  62. 14
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml
  63. 6
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml
  64. 2
      charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml
  65. 70
      charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml
  66. 39
      charts/kubezero-metrics/remove_etcd_grpc_alerts.patch
  67. 6
      charts/kubezero-metrics/update.sh
  68. 2
      charts/kubezero-metrics/values.yaml
  69. 6
      charts/kubezero/templates/istio-ingress.yaml
  70. 10
      charts/kubezero/templates/istio.yaml
  71. 1
      kubezero-repo.GFV/repo
  72. 8
      scripts/exec_each_node.sh
  73. 9
      scripts/pod_mem_cgroup_limit.sh

1
.gitignore

@ -8,3 +8,4 @@
# Breaks Helm V3 dependencies in Argo
Chart.lock
kubezero-repo.???

2
charts/kubeadm/.helmignore

@ -0,0 +1,2 @@
*.sh
*.md

2
charts/kubeadm/Chart.yaml

@ -2,7 +2,7 @@ apiVersion: v2
name: kubeadm
description: KubeZero Kubeadm golden config
type: application
version: 1.19.9
version: 1.20.0
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:

1
charts/kubeadm/templates/ClusterConfiguration.yaml

@ -42,6 +42,7 @@ apiServer:
audit-log-maxage: "7"
audit-log-maxsize: "100"
audit-log-maxbackup: "3"
audit-log-compress: "true"
tls-cipher-suites: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384"
admission-control-config-file: /etc/kubernetes/apiserver/admission-configuration.yaml
{{- if eq .Values.platform "aws" }}

18
charts/kubeadm/templates/KubeletConfiguration.yaml

@ -1,3 +1,4 @@
# https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
metadata:
@ -22,14 +23,15 @@ featureGates: {{ include "kubeadm.featuregates" ( dict "return" "map" "platform"
podsPerCore: 20
# cpuCFSQuotaPeriod: 10ms
# Basic OS on Ubuntu 20.04 incl. crio
#systemReserved:
# memory: 256Mi
# This should be dynamic based on number of maxpods and available cores
# https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#memory_cpu
# Below are default minimal for 2 cores and minimum kubelet
systemReserved:
memory: 256Mi
ephemeral-storage: "2Gi"
# kubelet memory should be static as runc,conmon are added to each pod's cgroup
kubeReserved:
cpu: 70m
# memory: 128Mi
memory: 128Mi
# Lets use below to reserve memory for system processes as kubeReserved/sytemReserved doesnt go well with systemd it seems
evictionHard:
memory.available: "484Mi"
#evictionHard:
# memory.available: "484Mi"
imageGCLowThresholdPercent: 70
kernelMemcgNotification: true

2
charts/kubeadm/templates/README.md

@ -0,0 +1,2 @@
# aws-iam-authenticator
- https://github.com/kubernetes-sigs/aws-iam-authenticator

13
charts/kubeadm/templates/_helpers.tpl

@ -2,17 +2,14 @@
Feature gates for all control plane components
*/ -}}
{{- define "kubeadm.featuregates" -}}
{{- $gates := dict "DefaultPodTopologySpread" "true" "CustomCPUCFSQuotaPeriod" "true" "GenericEphemeralVolume" "true" }}
{{- if eq .platform "aws" }}
{{- $gates = merge $gates ( dict "CSIMigrationAWS" "true" "CSIMigrationAWSComplete" "true") }}
{{- end }}
{{- $gates := list "CustomCPUCFSQuotaPeriod" "GenericEphemeralVolume" "CSIMigrationAWSComplete" "CSIMigrationAzureDiskComplete" "CSIMigrationAzureFileComplete" "CSIMigrationGCEComplete" "CSIMigrationOpenStackComplete" "CSIMigrationvSphereComplete" }}
{{- if eq .return "csv" }}
{{- range $key, $val := $gates }}
{{- $key }}={{- $val }},
{{- range $key := $gates }}
{{- $key }}=true,
{{- end }}
{{- else }}
{{- range $key, $val := $gates }}
{{ $key }}: {{ $val }}
{{- range $key := $gates }}
{{ $key }}: true
{{- end }}
{{- end }}
{{- end -}}

8
charts/kubeadm/templates/k8s-ecr-login-renew/README.md

@ -0,0 +1,8 @@
# Create IAM role for ECR read-only access
- Attach managed policy: `AmazonEC2ContainerRegistryReadOnly`
# Create secret for IAM user for ecr-renew
`kubectl create secret -n kube-system generic ecr-renew-cred --from-literal=AWS_REGION=<AWS_REGION> --from-literal=AWS_ACCESS_KEY_ID=<AWS_SECRET_ID> --from-literal=AWS_SECRET_ACCESS_KEY=<AWS_SECRET_KEY>
# Resources
- https://github.com/nabsul/k8s-ecr-login-renew

40
charts/kubeadm/templates/k8s-ecr-login-renew/cronjob.yaml

@ -0,0 +1,40 @@
apiVersion: batch/v1beta1
kind: CronJob
metadata:
namespace: kube-system
name: ecr-renew
labels:
app: ecr-renew
spec:
schedule: "0 */6 * * *"
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 5
jobTemplate:
spec:
template:
spec:
restartPolicy: OnFailure
serviceAccountName: ecr-renew
containers:
- name: ecr-renew
image: nabsul/k8s-ecr-login-renew:v1.4
env:
- name: DOCKER_SECRET_NAME
value: ecr-login
- name: TARGET_NAMESPACE
value: "*"
- name: AWS_REGION
valueFrom:
secretKeyRef:
name: ecr-renew-cred
key: AWS_REGION
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: ecr-renew-cred
key: AWS_ACCESS_KEY_ID
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: ecr-renew-cred
key: AWS_SECRET_ACCESS_KEY

31
charts/kubeadm/templates/k8s-ecr-login-renew/service-account.yml

@ -0,0 +1,31 @@
apiVersion: v1
kind: ServiceAccount
metadata:
namespace: kube-system
name: ecr-renew
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: ecr-renew
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "update", "get", "delete"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
namespace: kube-system
name: ecr-renew
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: ecr-renew
subjects:
- kind: ServiceAccount
name: ecr-renew
namespace: kube-system

2
charts/kubeadm/templates/patches/etcd0.yaml

@ -3,5 +3,5 @@ spec:
- name: etcd
resources:
requests:
cpu: 250m
cpu: 200m
memory: 192Mi

2
charts/kubeadm/templates/patches/kube-apiserver0.yaml

@ -3,5 +3,5 @@ spec:
- name: kube-apiserver
resources:
requests:
cpu: 250m
cpu: 200m
memory: 1Gi

2
charts/kubeadm/templates/patches/kube-controller-manager0.yaml

@ -3,5 +3,5 @@ spec:
- name: kube-controller-manager
resources:
requests:
cpu: 200m
cpu: 100m
memory: 128Mi

0
charts/kubeadm/templates/aws-iam-authenticator/crds.yaml → charts/kubeadm/templates/resources/00-aws-iam-authenticator-crds.yaml

0
charts/kubeadm/templates/aws-iam-authenticator/deployment.yaml → charts/kubeadm/templates/resources/01-aws-iam-authenticator-deployment.yaml

0
charts/kubeadm/templates/aws-iam-authenticator/mappings.yaml → charts/kubeadm/templates/resources/02-aws-iam-authenticator-mappings.yaml

8
charts/kubeadm/templates/resources/10-runtimeClass.yaml

@ -0,0 +1,8 @@
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: crio
handler: runc
overhead:
podFixed:
memory: 16Mi

1
charts/kubeadm/values.yaml

@ -13,5 +13,4 @@ systemd: true
protectKernelDefaults: true
WorkerNodeRole: "arn:aws:iam::000000000000:role/KubernetesNode"
WorkerIamRole: "arn:aws:iam::000000000000:role/KubernetesNode"
KubeAdminRole: "arn:aws:iam::000000000000:role/KubernetesNode"

8
charts/kubezero-istio-ingress/Chart.yaml

@ -2,8 +2,8 @@ apiVersion: v2
name: kubezero-istio-ingress
description: KubeZero Umbrella Chart for Istio based Ingress
type: application
version: 0.5.1
appVersion: 1.9.2
version: 0.5.2
appVersion: 1.9.3
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -16,9 +16,9 @@ dependencies:
version: ">= 0.1.3"
repository: https://zero-down-time.github.io/kubezero/
- name: istio-ingress
version: 1.9.2
version: 1.9.3
condition: istio-ingress.enabled
- name: istio-private-ingress
version: 1.9.2
version: 1.9.3
condition: istio-private-ingress.enabled
kubeVersion: ">= 1.18.0"

2
charts/kubezero-istio-ingress/charts/istio-ingress/Chart.yaml

@ -1,6 +1,6 @@
apiVersion: v1
name: istio-ingress
version: 1.9.2
version: 1.9.3
tillerVersion: ">=2.7.2"
description: Helm chart for deploying Istio gateways
keywords:

2
charts/kubezero-istio-ingress/charts/istio-ingress/values.yaml

@ -174,7 +174,7 @@ global:
hub: docker.io/istio
# Default tag for Istio images.
tag: 1.9.2
tag: 1.9.3
# Specify image pull policy if default behavior isn't desired.
# Default behavior: latest images will be Always else IfNotPresent.

2
charts/kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml

@ -1,6 +1,6 @@
apiVersion: v1
name: istio-private-ingress
version: 1.9.2
version: 1.9.3
tillerVersion: ">=2.7.2"
description: Helm chart for deploying Istio gateways
keywords:

2
charts/kubezero-istio-ingress/charts/istio-private-ingress/values.yaml

@ -174,7 +174,7 @@ global:
hub: docker.io/istio
# Default tag for Istio images.
tag: 1.9.2
tag: 1.9.3
# Specify image pull policy if default behavior isn't desired.
# Default behavior: latest images will be Always else IfNotPresent.

11
charts/kubezero-istio-ingress/values.yaml

@ -1,7 +1,7 @@
# Make sure these values match kuberzero-istio !!!
global:
#hub: docker.io/istio
#tag: 1.9.0
#tag: 1.9.3
logAsJson: true
jwtPolicy: first-party-jwt
@ -23,10 +23,11 @@ istio-ingress:
replicaCount: 1
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
# cpu: 100m
memory: 256Mi
memory: 512Mi
externalTrafficPolicy: Local
podAntiAffinityLabelSelector:
- key: app
@ -37,7 +38,6 @@ istio-ingress:
env:
TERMINATION_DRAIN_DURATION_SECONDS: '"60"'
# ISTIO_META_HTTP10: '"1"'
# The node selector is normally the list of nodeports, see CloudBender
nodeSelector:
node.kubernetes.io/ingress.public: "30080_30443"
@ -87,11 +87,11 @@ istio-private-ingress:
replicaCount: 1
resources:
requests:
cpu: 100m
cpu: 50m
memory: 64Mi
limits:
# cpu: 100m
memory: 256Mi
memory: 512Mi
externalTrafficPolicy: Local
podAntiAffinityLabelSelector:
- key: app
@ -102,7 +102,6 @@ istio-private-ingress:
env:
TERMINATION_DRAIN_DURATION_SECONDS: '"60"'
# ISTIO_META_HTTP10: '"1"'
nodeSelector:
node.kubernetes.io/ingress.private: "31080_31443"
#nodeSelector: "31080_31443_31671_31672_31224"

8
charts/kubezero-istio/Chart.yaml

@ -2,8 +2,8 @@ apiVersion: v2
name: kubezero-istio
description: KubeZero Umbrella Chart for Istio
type: application
version: 0.5.1
appVersion: 1.9.2
version: 0.5.3
appVersion: 1.9.3
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -16,7 +16,7 @@ dependencies:
version: ">= 0.1.3"
repository: https://zero-down-time.github.io/kubezero/
- name: base
version: 1.9.2
version: 1.9.3
- name: istio-discovery
version: 1.9.2
version: 1.9.3
kubeVersion: ">= 1.18.0"

2
charts/kubezero-istio/charts/base/Chart.yaml

@ -1,6 +1,6 @@
apiVersion: v1
name: base
version: 1.9.2
version: 1.9.3
tillerVersion: ">=2.7.2"
description: Helm chart for deploying Istio cluster resources and CRDs
keywords:

2
charts/kubezero-istio/charts/istio-discovery/Chart.yaml

@ -1,6 +1,6 @@
apiVersion: v1
name: istio-discovery
version: 1.9.2
version: 1.9.3
tillerVersion: ">=2.7.2"
description: Helm chart for istio control plane
keywords:

2
charts/kubezero-istio/charts/istio-discovery/values.yaml

@ -232,7 +232,7 @@ global:
# Dev builds from prow are on gcr.io
hub: docker.io/istio
# Default tag for Istio images.
tag: 1.9.2
tag: 1.9.3
# Specify image pull policy if default behavior isn't desired.
# Default behavior: latest images will be Always else IfNotPresent.

2
charts/kubezero-istio/update.sh

@ -1,7 +1,7 @@
#!/bin/bash
set -ex
export ISTIO_VERSION=1.9.2
export ISTIO_VERSION=1.9.3
if [ ! -d istio-$ISTIO_VERSION ]; then
NAME="istio-$ISTIO_VERSION"

4
charts/kubezero-istio/values.yaml

@ -35,6 +35,10 @@ istio-discovery:
enabled: false
meshConfig:
defaultConfig:
terminationDrainDuration: 60s
# proxyMetadata:
# ISTIO_META_HTTP10: '"1"'
accessLogFile: /dev/stdout
accessLogEncoding: 'JSON'
h2UpgradePolicy: 'DO_NOT_UPGRADE'

4
charts/kubezero-metrics/Chart.yaml

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-metrics
description: KubeZero Umbrella Chart for prometheus-operator
type: application
version: 0.3.4
version: 0.3.5
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -16,7 +16,7 @@ dependencies:
version: ">= 0.1.3"
repository: https://zero-down-time.github.io/kubezero/
- name: kube-prometheus-stack
version: 14.3.0
version: 14.9.0
# Switch back to upstream once all alerts are fixed eg. etcd gpcr
# repository: https://prometheus-community.github.io/helm-charts
- name: prometheus-adapter

66
charts/kubezero-metrics/adjust_alarms.patch

@ -0,0 +1,66 @@
diff -turN charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml charts/kube-prometheus-stack.zdt/templates/prometheus/rules-1.14/etcd.yaml
--- charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml 2021-04-14 22:13:29.000000000 +0200
+++ charts/kube-prometheus-stack.zdt/templates/prometheus/rules-1.14/etcd.yaml 2021-04-15 14:43:03.074281889 +0200
@@ -54,34 +54,6 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- - alert: etcdHighNumberOfFailedGRPCRequests
- annotations:
- message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
- expr: |-
- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
- /
- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
- > 1
- for: 10m
- labels:
- severity: warning
-{{- if .Values.defaultRules.additionalRuleLabels }}
-{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
-{{- end }}
- - alert: etcdHighNumberOfFailedGRPCRequests
- annotations:
- message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
- expr: |-
- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
- /
- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
- > 5
- for: 5m
- labels:
- severity: critical
-{{- if .Values.defaultRules.additionalRuleLabels }}
-{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
-{{- end }}
- alert: etcdGRPCRequestsSlow
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
diff -turN charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml charts/kube-prometheus-stack.zdt/templates/prometheus/rules-1.14/node-exporter.yaml
--- charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml 2021-04-14 22:13:29.000000000 +0200
+++ charts/kube-prometheus-stack.zdt/templates/prometheus/rules-1.14/node-exporter.yaml 2021-04-15 14:49:41.614282790 +0200
@@ -30,7 +30,7 @@
summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: |-
(
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
+ node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 25
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
and
@@ -48,7 +48,7 @@
summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: |-
(
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
+ node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 10
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
and
@@ -259,4 +259,4 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}

6
charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml

@ -15,11 +15,11 @@ dependencies:
- condition: nodeExporter.enabled
name: prometheus-node-exporter
repository: https://prometheus-community.github.io/helm-charts
version: 1.16.*
version: 1.17.*
- condition: grafana.enabled
name: grafana
repository: https://grafana.github.io/helm-charts
version: 6.6.*
version: 6.7.*
description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator.
home: https://github.com/prometheus-operator/kube-prometheus
icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png
@ -44,4 +44,4 @@ sources:
- https://github.com/prometheus-community/helm-charts
- https://github.com/prometheus-operator/kube-prometheus
type: application
version: 14.3.0
version: 14.9.0

4
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 7.4.3
appVersion: 7.5.3
description: The leading tool for querying and visualizing time series and metrics.
home: https://grafana.net
icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png
@ -19,4 +19,4 @@ name: grafana
sources:
- https://github.com/grafana/grafana
type: application
version: 6.6.3
version: 6.7.4

11
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md

@ -59,8 +59,8 @@ This version requires Helm >= 3.1.0.
| `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` |
| `priorityClassName` | Name of Priority Class to assign pods | `nil` |
| `image.repository` | Image repository | `grafana/grafana` |
| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.4.3` |
| `image.sha` | Image sha (optional) | `16dc29783ec7d4a23fa19207507586344c6797023604347eb3e8ea5ae431e181` |
| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.4.5` |
| `image.sha` | Image sha (optional) | `2b56f6106ddc376bb46d974230d530754bf65a640dfbc5245191d72d3b49efc6` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `image.pullSecrets` | Image pull secrets | `{}` |
| `service.type` | Kubernetes service type | `ClusterIP` |
@ -242,6 +242,9 @@ ingress:
### Example of extraVolumeMounts
Volume can be type persistentVolumeClaim or hostPath but not both at same time.
If none existingClaim or hostPath argument is givent then type is emptyDir.
```yaml
- extraVolumeMounts:
- name: plugins
@ -249,6 +252,10 @@ ingress:
subPath: configs/grafana/plugins
existingClaim: existing-grafana-claim
readOnly: false
- name: dashboards
mountPath: /var/lib/grafana/dashboards
hostPath: /usr/shared/grafana/dashboards
readOnly: false
```
## Import dashboards

7
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl

@ -479,8 +479,15 @@ volumes:
{{- end }}
{{- range .Values.extraVolumeMounts }}
- name: {{ .name }}
{{- if .existingClaim }}
persistentVolumeClaim:
claimName: {{ .existingClaim }}
{{- else if .hostPath }}
hostPath:
path: {{ .hostPath }}
{{- else }}
emptyDir: {}
{{- end }}
{{- end }}
{{- range .Values.extraEmptyDirMounts }}
- name: {{ .name }}

29
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml

@ -17,19 +17,8 @@ spec:
privileged: false
allowPrivilegeEscalation: false
requiredDropCapabilities:
# Default set from Docker, without DAC_OVERRIDE or CHOWN
- FOWNER
- FSETID
- KILL
- SETGID
- SETUID
- SETPCAP
- NET_BIND_SERVICE
- NET_RAW
- SYS_CHROOT
- MKNOD
- AUDIT_WRITE
- SETFCAP
# Default set from Docker, with DAC_OVERRIDE and CHOWN
- ALL
volumes:
- 'configMap'
- 'emptyDir'
@ -42,12 +31,20 @@ spec:
hostIPC: false
hostPID: false
runAsUser:
rule: 'RunAsAny'
rule: 'MustRunAsNonRoot'
seLinux:
rule: 'RunAsAny'
supplementalGroups:
rule: 'RunAsAny'
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
- min: 1
max: 65535
fsGroup:
rule: 'RunAsAny'
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
- min: 1
max: 65535
readOnlyRootFilesystem: false
{{- end }}

10
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml

@ -53,7 +53,7 @@ livenessProbe:
image:
repository: grafana/grafana
tag: 7.4.3
tag: 7.5.3
sha: ""
pullPolicy: IfNotPresent
@ -401,10 +401,14 @@ extraSecretMounts: []
## Additional grafana server volume mounts
# Defines additional volume mounts.
extraVolumeMounts: []
# - name: extra-volume
# mountPath: /mnt/volume
# - name: extra-volume-0
# mountPath: /mnt/volume0
# readOnly: true
# existingClaim: volume-claim
# - name: extra-volume-1
# mountPath: /mnt/volume1
# readOnly: true
# hostPath: /usr/shared/
## Pass the plugins you want installed as a list.
##

2
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml

@ -15,4 +15,4 @@ maintainers:
name: kube-state-metrics
sources:
- https://github.com/kubernetes/kube-state-metrics/
version: 2.13.0
version: 2.13.2

2
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml

@ -1,4 +1,4 @@
{{- if and (eq $.Values.rbac.create true) (not .Values.rbac.useExistingRole) -}}
{{- if and (eq $.Values.rbac.create true) (not .Values.rbac.useExistingRole) -}}
{{- if eq .Values.rbac.useClusterRole false }}
{{- range (split "," $.Values.namespace) }}
{{- end }}

2
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml

@ -1,7 +1,7 @@
{{- if and (eq .Values.rbac.create true) (eq .Values.rbac.useClusterRole false) -}}
{{- range (split "," $.Values.namespace) }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:

2
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml

@ -14,4 +14,4 @@ maintainers:
name: prometheus-node-exporter
sources:
- https://github.com/prometheus/node_exporter/
version: 1.16.2
version: 1.17.0

4
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml

@ -29,6 +29,10 @@ spec:
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
{{- if .Values.extraInitContainers }}
initContainers:
{{ toYaml .Values.extraInitContainers | nindent 6 }}
{{- end }}
containers:
- name: node-exporter
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"

4
charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml

@ -168,3 +168,7 @@ sidecarVolumeMount: []
## - name: collector-textfiles
## mountPath: /run/prometheus
## readOnly: false
## Additional InitContainers to initialize the pod
##
extraInitContainers: []

4
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml

@ -7,6 +7,10 @@ metadata:
labels:
app: {{ template "kube-prometheus-stack.name" . }}-alertmanager
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.alertmanager.annotations }}
annotations:
{{ toYaml .Values.alertmanager.annotations | indent 4 }}
{{- end }}
spec:
{{- if .Values.alertmanager.alertmanagerSpec.image }}
image: {{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }}

3
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml

@ -21,6 +21,9 @@ spec:
{{- if .Values.alertmanager.serviceMonitor.interval }}
interval: {{ .Values.alertmanager.serviceMonitor.interval }}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.alertmanager.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.alertmanager.serviceMonitor.scheme }}
scheme: {{ .Values.alertmanager.serviceMonitor.scheme }}
{{- end }}

3
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml

@ -21,6 +21,9 @@ spec:
{{- if .Values.coreDns.serviceMonitor.interval}}
interval: {{ .Values.coreDns.serviceMonitor.interval }}
{{- end }}
{{- if .Values.coreDns.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.coreDns.serviceMonitor.proxyUrl}}
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if .Values.coreDns.serviceMonitor.metricRelabelings }}
metricRelabelings:

3
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml

@ -13,6 +13,9 @@ spec:
{{- if .Values.kubeApiServer.serviceMonitor.interval }}
interval: {{ .Values.kubeApiServer.serviceMonitor.interval }}
{{- end }}
{{- if .Values.kubeApiServer.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeApiServer.serviceMonitor.proxyUrl}}
{{- end }}
port: https
scheme: https
{{- if .Values.kubeApiServer.serviceMonitor.metricRelabelings }}

2
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml

@ -1,4 +1,4 @@
{{- if .Values.kubeControllerManager.enabled }}
{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.service.enabled }}
apiVersion: v1
kind: Service
metadata:

5
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml

@ -1,4 +1,4 @@
{{- if .Values.kubeControllerManager.enabled }}
{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.serviceMonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
@ -22,6 +22,9 @@ spec:
interval: {{ .Values.kubeControllerManager.serviceMonitor.interval }}
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if .Values.kubeControllerManager.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeControllerManager.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.kubeControllerManager.serviceMonitor.https }}
scheme: https
tlsConfig:

3
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml

@ -22,6 +22,9 @@ spec:
interval: {{ .Values.kubeDns.serviceMonitor.interval }}
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if .Values.kubeDns.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeDns.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.kubeDns.serviceMonitor.dnsmasqMetricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubeDns.serviceMonitor.dnsmasqMetricRelabelings | indent 4) . }}

2
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml

@ -1,4 +1,4 @@
{{- if .Values.kubeEtcd.enabled }}
{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.service.enabled }}
apiVersion: v1
kind: Service
metadata:

5
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml

@ -1,4 +1,4 @@
{{- if .Values.kubeEtcd.enabled }}
{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.serviceMonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
@ -22,6 +22,9 @@ spec:
interval: {{ .Values.kubeEtcd.serviceMonitor.interval }}
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if .Values.kubeEtcd.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeEtcd.serviceMonitor.proxyUrl}}
{{- end }}
{{- if eq .Values.kubeEtcd.serviceMonitor.scheme "https" }}
scheme: https
tlsConfig:

2
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml

@ -1,4 +1,4 @@
{{- if .Values.kubeProxy.enabled }}
{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.service.enabled }}
apiVersion: v1
kind: Service
metadata:

5
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml

@ -1,4 +1,4 @@
{{- if .Values.kubeProxy.enabled }}
{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.serviceMonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
@ -22,6 +22,9 @@ spec:
interval: {{ .Values.kubeProxy.serviceMonitor.interval }}
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if .Values.kubeProxy.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeProxy.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.kubeProxy.serviceMonitor.https }}
scheme: https
tlsConfig:

2
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml

@ -1,4 +1,4 @@
{{- if .Values.kubeScheduler.enabled }}
{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.service.enabled }}
apiVersion: v1
kind: Service
metadata:

5
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml

@ -1,4 +1,4 @@
{{- if .Values.kubeScheduler.enabled }}
{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.serviceMonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
@ -22,6 +22,9 @@ spec:
interval: {{ .Values.kubeScheduler.serviceMonitor.interval }}
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if .Values.kubeScheduler.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeScheduler.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.kubeScheduler.serviceMonitor.https }}
scheme: https
tlsConfig:

8
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml

@ -14,6 +14,9 @@ spec:
{{- if .Values.kubeStateMetrics.serviceMonitor.interval }}
interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeStateMetrics.serviceMonitor.proxyUrl}}
{{- end }}
honorLabels: true
{{- if .Values.kubeStateMetrics.serviceMonitor.metricRelabelings }}
metricRelabelings:
@ -22,6 +25,11 @@ spec:
{{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
namespaceSelector:
matchNames:
- {{ .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
{{- end }}
selector:
matchLabels:

3
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml

@ -15,6 +15,9 @@ spec:
{{- if .Values.kubelet.serviceMonitor.interval }}
interval: {{ .Values.kubelet.serviceMonitor.interval }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl}}
{{- end }}
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true

3
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/node-exporter/servicemonitor.yaml

@ -18,6 +18,9 @@ spec:
{{- if .Values.nodeExporter.serviceMonitor.interval }}
interval: {{ .Values.nodeExporter.serviceMonitor.interval }}
{{- end }}
{{- if .Values.kubeApiServer.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeApiServer.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.nodeExporter.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.nodeExporter.serviceMonitor.scrapeTimeout }}
{{- end }}

4
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml

@ -18,7 +18,7 @@ metadata:
namespace: {{ template "kube-prometheus-stack.namespace" . }}
spec:
secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert
duration: 43800h # 5y
duration: 43800h0m0s # 5y
issuerRef:
name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer
commonName: "ca.webhook.kube-prometheus-stack"
@ -43,7 +43,7 @@ metadata:
namespace: {{ template "kube-prometheus-stack.namespace" . }}
spec:
secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission
duration: 8760h # 1y
duration: 8760h0m0s # 1y
issuerRef:
{{- if .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef }}
{{- toYaml .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef | nindent 4 }}

14
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml

@ -150,14 +150,24 @@ spec:
{{ else }}
probeNamespaceSelector: {}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.remoteRead }}
{{- if (or .Values.prometheus.prometheusSpec.remoteRead .Values.prometheus.prometheusSpec.additionalRemoteRead) }}
remoteRead:
{{- if .Values.prometheus.prometheusSpec.remoteRead }}
{{ toYaml .Values.prometheus.prometheusSpec.remoteRead | indent 4 }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.remoteWrite }}
{{- if .Values.prometheus.prometheusSpec.additionalRemoteRead }}
{{ toYaml .Values.prometheus.prometheusSpec.additionalRemoteRead | indent 4 }}
{{- end }}
{{- end }}
{{- if (or .Values.prometheus.prometheusSpec.remoteWrite .Values.prometheus.prometheusSpec.additionalRemoteWrite) }}
remoteWrite:
{{- if .Values.prometheus.prometheusSpec.remoteWrite }}
{{ toYaml .Values.prometheus.prometheusSpec.remoteWrite | indent 4 }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.additionalRemoteWrite }}
{{ toYaml .Values.prometheus.prometheusSpec.additionalRemoteWrite | indent 4 }}
{{- end }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.securityContext }}
securityContext:
{{ toYaml .Values.prometheus.prometheusSpec.securityContext | indent 4 }}

6
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml

@ -30,7 +30,7 @@ spec:
summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: |-
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 25
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
and
@ -48,7 +48,7 @@ spec:
summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: |-
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 10
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
and
@ -259,4 +259,4 @@ spec:
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- end }}

2
charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSIdecar.yaml → charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml

@ -16,7 +16,7 @@ metadata:
{{- end }}
spec:
type: {{ .Values.prometheus.thanosService.type }}
clusterIP: None
clusterIP: {{ .Values.prometheus.thanosService.clusterIP }}
ports:
- name: {{ .Values.prometheus.thanosService.portName }}
port: {{ .Values.prometheus.thanosService.port }}

70
charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml

@ -117,6 +117,10 @@ alertmanager:
##
enabled: true
## Annotations for Alertmanager
##
annotations: {}
## Api that prometheus will use to communicate with alertmanager. Possible values are v1, v2
##
apiVersion: v2
@ -194,7 +198,7 @@ alertmanager:
# *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:>
# *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:>
# *Details:*
# {{ range .Labels.SortedPairs }} *{{ .Name }}:* `{{ .Value }}`
# {{ range .Labels.SortedPairs }} - *{{ .Name }}:* `{{ .Value }}`
# {{ end }}
# {{ end }}
# {{ end }}
@ -345,6 +349,10 @@ alertmanager:
interval: ""
selfMonitor: true
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS.
scheme: ""
@ -375,7 +383,7 @@ alertmanager:
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#alertmanagerspec
##
alertmanagerSpec:
## Standard objects metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
## Metadata Labels and Annotations gets propagated to the Alertmanager pods.
##
podMetadata: {}
@ -747,6 +755,10 @@ kubeApiServer:
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
jobLabel: component
selector:
matchLabels:
@ -771,6 +783,10 @@ kubelet:
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## Enable scraping the kubelet over https. For requirements to enable this see
## https://github.com/prometheus-operator/prometheus-operator/issues/926
##
@ -891,16 +907,22 @@ kubeControllerManager:
## If using kubeControllerManager.endpoints only the port and targetPort are used
##
service:
enabled: true
port: 10252
targetPort: 10252
# selector:
# component: kube-controller-manager
serviceMonitor:
enabled: true
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## Enable scraping kube-controller-manager over https.
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
##
@ -943,6 +965,10 @@ coreDns:
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## metric relabel configs to apply to samples before ingestion.
##
metricRelabelings: []
@ -978,6 +1004,10 @@ kubeDns:
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## metric relabel configs to apply to samples before ingestion.
##
metricRelabelings: []
@ -1024,6 +1054,7 @@ kubeEtcd:
## Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used
##
service:
enabled: true
port: 2379
targetPort: 2379
# selector:
@ -1041,9 +1072,13 @@ kubeEtcd:
## keyFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client-key
##
serviceMonitor:
enabled: true
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
scheme: http
insecureSkipVerify: false
serverName: ""
@ -1084,15 +1119,20 @@ kubeScheduler:
## If using kubeScheduler.endpoints only the port and targetPort are used
##
service:
enabled: true
port: 10251
targetPort: 10251
# selector:
# component: kube-scheduler
serviceMonitor:
enabled: true
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## Enable scraping kube-scheduler over https.
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
##
@ -1135,16 +1175,22 @@ kubeProxy:
# - 10.141.4.24
service:
enabled: true
port: 10249
targetPort: 10249
# selector:
# k8s-app: kube-proxy
serviceMonitor:
enabled: true
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## Enable scraping kube-proxy over https.
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
##
@ -1173,9 +1219,15 @@ kubeStateMetrics:
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## Override serviceMonitor selector
##
selectorOverride: {}
## Override namespace selector
##
namespaceOverride: ""
## metric relabel configs to apply to samples before ingestion.
##
@ -1217,6 +1269,10 @@ nodeExporter:
##
interval: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## How long until a scrape request times out. If not set, the Prometheus default scape timeout is used.
##
scrapeTimeout: ""
@ -1527,6 +1583,7 @@ prometheus:
serviceAccount:
create: true
name: ""
annotations: {}
# Service for thanos service discovery on sidecar
# Enable this can make Thanos Query can use
@ -1540,6 +1597,7 @@ prometheus:
portName: grpc
port: 10901
targetPort: "grpc"
clusterIP: "None"
## Service type
##
@ -1814,7 +1872,7 @@ prometheus:
##
image:
repository: quay.io/prometheus/prometheus
tag: v2.24.0
tag: v2.26.0
sha: ""
## Tolerations for use with node taints
@ -2035,7 +2093,7 @@ prometheus:
##
routePrefix: /
## Standard objects metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
## Metadata Labels and Annotations gets propagated to the prometheus pods.
##
podMetadata: {}
@ -2072,11 +2130,15 @@ prometheus:
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#remotereadspec
remoteRead: []
# - url: http://remote1/read
## additionalRemoteRead is appended to remoteRead
additionalRemoteRead: []
## The remote_write spec configuration for Prometheus.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#remotewritespec
remoteWrite: []
# - url: http://remote1/push
## additionalRemoteWrite is appended to remoteWrite
additionalRemoteWrite: []
## Enable/Disable Grafana dashboards provisioning for prometheus remote write feature
remoteWriteDashboards: false

39
charts/kubezero-metrics/remove_etcd_grpc_alerts.patch

@ -1,39 +0,0 @@
diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml
index b430951..8358704 100644
--- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml
+++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml
@@ -71,34 +71,6 @@ spec:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
-{{- end }}
- - alert: etcdHighNumberOfFailedGRPCRequests
- annotations:
- message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
- expr: |-
- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
- /
- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
- > 1
- for: 10m
- labels:
- severity: warning
-{{- if .Values.defaultRules.additionalRuleLabels }}
-{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
-{{- end }}
- - alert: etcdHighNumberOfFailedGRPCRequests
- annotations:
- message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
- expr: |-
- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
- /
- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
- > 5
- for: 5m
- labels:
- severity: critical
-{{- if .Values.defaultRules.additionalRuleLabels }}
-{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdGRPCRequestsSlow
annotations:

6
charts/kubezero-metrics/update.sh

@ -1,8 +1,10 @@
#!/bin/bash
VERSION=14.3.0
VERSION=14.9.0
rm -rf charts/kube-prometheus-stack
curl -L -s -o - https://github.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-${VERSION}/kube-prometheus-stack-${VERSION}.tgz | tar xfz - -C charts
patch -p3 -i remove_etcd_grpc_alerts.patch --no-backup-if-mismatch
# The grpc alerts could be re-enabled with etcd 3.5
# https://github.com/etcd-io/etcd/pull/12196