diff --git a/.gitignore b/.gitignore index 3bb249e..f590abb 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ # Breaks Helm V3 dependencies in Argo Chart.lock +kubezero-repo.??? diff --git a/charts/kubeadm/.helmignore b/charts/kubeadm/.helmignore new file mode 100644 index 0000000..0b1f83c --- /dev/null +++ b/charts/kubeadm/.helmignore @@ -0,0 +1,2 @@ +*.sh +*.md diff --git a/charts/kubeadm/Chart.yaml b/charts/kubeadm/Chart.yaml index 3562382..b46b7d0 100644 --- a/charts/kubeadm/Chart.yaml +++ b/charts/kubeadm/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubeadm description: KubeZero Kubeadm golden config type: application -version: 1.19.9 +version: 1.20.0 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: diff --git a/charts/kubeadm/templates/ClusterConfiguration.yaml b/charts/kubeadm/templates/ClusterConfiguration.yaml index 27595ad..e7da166 100644 --- a/charts/kubeadm/templates/ClusterConfiguration.yaml +++ b/charts/kubeadm/templates/ClusterConfiguration.yaml @@ -42,6 +42,7 @@ apiServer: audit-log-maxage: "7" audit-log-maxsize: "100" audit-log-maxbackup: "3" + audit-log-compress: "true" tls-cipher-suites: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384" admission-control-config-file: /etc/kubernetes/apiserver/admission-configuration.yaml {{- if eq .Values.platform "aws" }} diff --git a/charts/kubeadm/templates/KubeletConfiguration.yaml b/charts/kubeadm/templates/KubeletConfiguration.yaml index d26567c..c708845 100644 --- a/charts/kubeadm/templates/KubeletConfiguration.yaml +++ b/charts/kubeadm/templates/KubeletConfiguration.yaml @@ -1,3 +1,4 @@ +# https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/ apiVersion: kubelet.config.k8s.io/v1beta1 kind: KubeletConfiguration metadata: @@ -22,14 +23,15 @@ featureGates: {{ include "kubeadm.featuregates" ( dict "return" "map" "platform" podsPerCore: 20 # cpuCFSQuotaPeriod: 10ms # Basic OS on Ubuntu 20.04 incl. crio -#systemReserved: -# memory: 256Mi -# This should be dynamic based on number of maxpods and available cores -# https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture#memory_cpu -# Below are default minimal for 2 cores and minimum kubelet +systemReserved: + memory: 256Mi + ephemeral-storage: "2Gi" +# kubelet memory should be static as runc,conmon are added to each pod's cgroup kubeReserved: cpu: 70m -# memory: 128Mi + memory: 128Mi # Lets use below to reserve memory for system processes as kubeReserved/sytemReserved doesnt go well with systemd it seems -evictionHard: - memory.available: "484Mi" +#evictionHard: +# memory.available: "484Mi" +imageGCLowThresholdPercent: 70 +kernelMemcgNotification: true diff --git a/charts/kubeadm/templates/README.md b/charts/kubeadm/templates/README.md new file mode 100644 index 0000000..afb2413 --- /dev/null +++ b/charts/kubeadm/templates/README.md @@ -0,0 +1,2 @@ +# aws-iam-authenticator +- https://github.com/kubernetes-sigs/aws-iam-authenticator diff --git a/charts/kubeadm/templates/_helpers.tpl b/charts/kubeadm/templates/_helpers.tpl index 1f0823d..708c279 100644 --- a/charts/kubeadm/templates/_helpers.tpl +++ b/charts/kubeadm/templates/_helpers.tpl @@ -2,17 +2,14 @@ Feature gates for all control plane components */ -}} {{- define "kubeadm.featuregates" -}} -{{- $gates := dict "DefaultPodTopologySpread" "true" "CustomCPUCFSQuotaPeriod" "true" "GenericEphemeralVolume" "true" }} -{{- if eq .platform "aws" }} -{{- $gates = merge $gates ( dict "CSIMigrationAWS" "true" "CSIMigrationAWSComplete" "true") }} -{{- end }} +{{- $gates := list "CustomCPUCFSQuotaPeriod" "GenericEphemeralVolume" "CSIMigrationAWSComplete" "CSIMigrationAzureDiskComplete" "CSIMigrationAzureFileComplete" "CSIMigrationGCEComplete" "CSIMigrationOpenStackComplete" "CSIMigrationvSphereComplete" }} {{- if eq .return "csv" }} -{{- range $key, $val := $gates }} -{{- $key }}={{- $val }}, +{{- range $key := $gates }} +{{- $key }}=true, {{- end }} {{- else }} -{{- range $key, $val := $gates }} - {{ $key }}: {{ $val }} +{{- range $key := $gates }} + {{ $key }}: true {{- end }} {{- end }} {{- end -}} diff --git a/charts/kubeadm/templates/k8s-ecr-login-renew/README.md b/charts/kubeadm/templates/k8s-ecr-login-renew/README.md new file mode 100644 index 0000000..41b8bfc --- /dev/null +++ b/charts/kubeadm/templates/k8s-ecr-login-renew/README.md @@ -0,0 +1,8 @@ +# Create IAM role for ECR read-only access +- Attach managed policy: `AmazonEC2ContainerRegistryReadOnly` + +# Create secret for IAM user for ecr-renew +`kubectl create secret -n kube-system generic ecr-renew-cred --from-literal=AWS_REGION= --from-literal=AWS_ACCESS_KEY_ID= --from-literal=AWS_SECRET_ACCESS_KEY= + +# Resources +- https://github.com/nabsul/k8s-ecr-login-renew diff --git a/charts/kubeadm/templates/k8s-ecr-login-renew/cronjob.yaml b/charts/kubeadm/templates/k8s-ecr-login-renew/cronjob.yaml new file mode 100644 index 0000000..5d4d041 --- /dev/null +++ b/charts/kubeadm/templates/k8s-ecr-login-renew/cronjob.yaml @@ -0,0 +1,40 @@ +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + namespace: kube-system + name: ecr-renew + labels: + app: ecr-renew +spec: + schedule: "0 */6 * * *" + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 5 + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + serviceAccountName: ecr-renew + containers: + - name: ecr-renew + image: nabsul/k8s-ecr-login-renew:v1.4 + env: + - name: DOCKER_SECRET_NAME + value: ecr-login + - name: TARGET_NAMESPACE + value: "*" + - name: AWS_REGION + valueFrom: + secretKeyRef: + name: ecr-renew-cred + key: AWS_REGION + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: ecr-renew-cred + key: AWS_ACCESS_KEY_ID + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: ecr-renew-cred + key: AWS_SECRET_ACCESS_KEY diff --git a/charts/kubeadm/templates/k8s-ecr-login-renew/service-account.yml b/charts/kubeadm/templates/k8s-ecr-login-renew/service-account.yml new file mode 100644 index 0000000..0591ebc --- /dev/null +++ b/charts/kubeadm/templates/k8s-ecr-login-renew/service-account.yml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: kube-system + name: ecr-renew +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: ecr-renew +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create", "update", "get", "delete"] + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + namespace: kube-system + name: ecr-renew +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: ecr-renew +subjects: + - kind: ServiceAccount + name: ecr-renew + namespace: kube-system diff --git a/charts/kubeadm/templates/patches/etcd0.yaml b/charts/kubeadm/templates/patches/etcd0.yaml index b17a0e5..1ef4e10 100644 --- a/charts/kubeadm/templates/patches/etcd0.yaml +++ b/charts/kubeadm/templates/patches/etcd0.yaml @@ -3,5 +3,5 @@ spec: - name: etcd resources: requests: - cpu: 250m + cpu: 200m memory: 192Mi diff --git a/charts/kubeadm/templates/patches/kube-apiserver0.yaml b/charts/kubeadm/templates/patches/kube-apiserver0.yaml index 05c2a18..a5390c4 100644 --- a/charts/kubeadm/templates/patches/kube-apiserver0.yaml +++ b/charts/kubeadm/templates/patches/kube-apiserver0.yaml @@ -3,5 +3,5 @@ spec: - name: kube-apiserver resources: requests: - cpu: 250m + cpu: 200m memory: 1Gi diff --git a/charts/kubeadm/templates/patches/kube-controller-manager0.yaml b/charts/kubeadm/templates/patches/kube-controller-manager0.yaml index cb08409..5771a75 100644 --- a/charts/kubeadm/templates/patches/kube-controller-manager0.yaml +++ b/charts/kubeadm/templates/patches/kube-controller-manager0.yaml @@ -3,5 +3,5 @@ spec: - name: kube-controller-manager resources: requests: - cpu: 200m + cpu: 100m memory: 128Mi diff --git a/charts/kubeadm/templates/aws-iam-authenticator/crds.yaml b/charts/kubeadm/templates/resources/00-aws-iam-authenticator-crds.yaml similarity index 100% rename from charts/kubeadm/templates/aws-iam-authenticator/crds.yaml rename to charts/kubeadm/templates/resources/00-aws-iam-authenticator-crds.yaml diff --git a/charts/kubeadm/templates/aws-iam-authenticator/deployment.yaml b/charts/kubeadm/templates/resources/01-aws-iam-authenticator-deployment.yaml similarity index 100% rename from charts/kubeadm/templates/aws-iam-authenticator/deployment.yaml rename to charts/kubeadm/templates/resources/01-aws-iam-authenticator-deployment.yaml diff --git a/charts/kubeadm/templates/aws-iam-authenticator/mappings.yaml b/charts/kubeadm/templates/resources/02-aws-iam-authenticator-mappings.yaml similarity index 100% rename from charts/kubeadm/templates/aws-iam-authenticator/mappings.yaml rename to charts/kubeadm/templates/resources/02-aws-iam-authenticator-mappings.yaml diff --git a/charts/kubeadm/templates/resources/10-runtimeClass.yaml b/charts/kubeadm/templates/resources/10-runtimeClass.yaml new file mode 100644 index 0000000..ed979d2 --- /dev/null +++ b/charts/kubeadm/templates/resources/10-runtimeClass.yaml @@ -0,0 +1,8 @@ +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: crio +handler: runc +overhead: + podFixed: + memory: 16Mi diff --git a/charts/kubeadm/values.yaml b/charts/kubeadm/values.yaml index 6798ff8..6260a55 100644 --- a/charts/kubeadm/values.yaml +++ b/charts/kubeadm/values.yaml @@ -13,5 +13,4 @@ systemd: true protectKernelDefaults: true WorkerNodeRole: "arn:aws:iam::000000000000:role/KubernetesNode" -WorkerIamRole: "arn:aws:iam::000000000000:role/KubernetesNode" KubeAdminRole: "arn:aws:iam::000000000000:role/KubernetesNode" diff --git a/charts/kubezero-istio-ingress/Chart.yaml b/charts/kubezero-istio-ingress/Chart.yaml index 4e4e5bc..a2d0f39 100644 --- a/charts/kubezero-istio-ingress/Chart.yaml +++ b/charts/kubezero-istio-ingress/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: kubezero-istio-ingress description: KubeZero Umbrella Chart for Istio based Ingress type: application -version: 0.5.1 -appVersion: 1.9.2 +version: 0.5.2 +appVersion: 1.9.3 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: @@ -16,9 +16,9 @@ dependencies: version: ">= 0.1.3" repository: https://zero-down-time.github.io/kubezero/ - name: istio-ingress - version: 1.9.2 + version: 1.9.3 condition: istio-ingress.enabled - name: istio-private-ingress - version: 1.9.2 + version: 1.9.3 condition: istio-private-ingress.enabled kubeVersion: ">= 1.18.0" diff --git a/charts/kubezero-istio-ingress/charts/istio-ingress/Chart.yaml b/charts/kubezero-istio-ingress/charts/istio-ingress/Chart.yaml index 4973d36..75fb402 100644 --- a/charts/kubezero-istio-ingress/charts/istio-ingress/Chart.yaml +++ b/charts/kubezero-istio-ingress/charts/istio-ingress/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: istio-ingress -version: 1.9.2 +version: 1.9.3 tillerVersion: ">=2.7.2" description: Helm chart for deploying Istio gateways keywords: diff --git a/charts/kubezero-istio-ingress/charts/istio-ingress/values.yaml b/charts/kubezero-istio-ingress/charts/istio-ingress/values.yaml index 001568e..4aa40af 100644 --- a/charts/kubezero-istio-ingress/charts/istio-ingress/values.yaml +++ b/charts/kubezero-istio-ingress/charts/istio-ingress/values.yaml @@ -174,7 +174,7 @@ global: hub: docker.io/istio # Default tag for Istio images. - tag: 1.9.2 + tag: 1.9.3 # Specify image pull policy if default behavior isn't desired. # Default behavior: latest images will be Always else IfNotPresent. diff --git a/charts/kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml b/charts/kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml index 19ccd4b..39cecad 100644 --- a/charts/kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml +++ b/charts/kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: istio-private-ingress -version: 1.9.2 +version: 1.9.3 tillerVersion: ">=2.7.2" description: Helm chart for deploying Istio gateways keywords: diff --git a/charts/kubezero-istio-ingress/charts/istio-private-ingress/values.yaml b/charts/kubezero-istio-ingress/charts/istio-private-ingress/values.yaml index 001568e..4aa40af 100644 --- a/charts/kubezero-istio-ingress/charts/istio-private-ingress/values.yaml +++ b/charts/kubezero-istio-ingress/charts/istio-private-ingress/values.yaml @@ -174,7 +174,7 @@ global: hub: docker.io/istio # Default tag for Istio images. - tag: 1.9.2 + tag: 1.9.3 # Specify image pull policy if default behavior isn't desired. # Default behavior: latest images will be Always else IfNotPresent. diff --git a/charts/kubezero-istio-ingress/values.yaml b/charts/kubezero-istio-ingress/values.yaml index 50a9aee..c26f8a6 100644 --- a/charts/kubezero-istio-ingress/values.yaml +++ b/charts/kubezero-istio-ingress/values.yaml @@ -1,7 +1,7 @@ # Make sure these values match kuberzero-istio !!! global: #hub: docker.io/istio - #tag: 1.9.0 + #tag: 1.9.3 logAsJson: true jwtPolicy: first-party-jwt @@ -23,10 +23,11 @@ istio-ingress: replicaCount: 1 resources: requests: + cpu: 50m memory: 64Mi limits: # cpu: 100m - memory: 256Mi + memory: 512Mi externalTrafficPolicy: Local podAntiAffinityLabelSelector: - key: app @@ -37,7 +38,6 @@ istio-ingress: env: TERMINATION_DRAIN_DURATION_SECONDS: '"60"' # ISTIO_META_HTTP10: '"1"' - # The node selector is normally the list of nodeports, see CloudBender nodeSelector: node.kubernetes.io/ingress.public: "30080_30443" @@ -87,11 +87,11 @@ istio-private-ingress: replicaCount: 1 resources: requests: - cpu: 100m + cpu: 50m memory: 64Mi limits: # cpu: 100m - memory: 256Mi + memory: 512Mi externalTrafficPolicy: Local podAntiAffinityLabelSelector: - key: app @@ -102,7 +102,6 @@ istio-private-ingress: env: TERMINATION_DRAIN_DURATION_SECONDS: '"60"' # ISTIO_META_HTTP10: '"1"' - nodeSelector: node.kubernetes.io/ingress.private: "31080_31443" #nodeSelector: "31080_31443_31671_31672_31224" diff --git a/charts/kubezero-istio/Chart.yaml b/charts/kubezero-istio/Chart.yaml index d895187..f65d058 100644 --- a/charts/kubezero-istio/Chart.yaml +++ b/charts/kubezero-istio/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: kubezero-istio description: KubeZero Umbrella Chart for Istio type: application -version: 0.5.1 -appVersion: 1.9.2 +version: 0.5.3 +appVersion: 1.9.3 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: @@ -16,7 +16,7 @@ dependencies: version: ">= 0.1.3" repository: https://zero-down-time.github.io/kubezero/ - name: base - version: 1.9.2 + version: 1.9.3 - name: istio-discovery - version: 1.9.2 + version: 1.9.3 kubeVersion: ">= 1.18.0" diff --git a/charts/kubezero-istio/charts/base/Chart.yaml b/charts/kubezero-istio/charts/base/Chart.yaml index 6b1b165..1ed5b5c 100644 --- a/charts/kubezero-istio/charts/base/Chart.yaml +++ b/charts/kubezero-istio/charts/base/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: base -version: 1.9.2 +version: 1.9.3 tillerVersion: ">=2.7.2" description: Helm chart for deploying Istio cluster resources and CRDs keywords: diff --git a/charts/kubezero-istio/charts/istio-discovery/Chart.yaml b/charts/kubezero-istio/charts/istio-discovery/Chart.yaml index fcf5f35..06bd7e2 100644 --- a/charts/kubezero-istio/charts/istio-discovery/Chart.yaml +++ b/charts/kubezero-istio/charts/istio-discovery/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: istio-discovery -version: 1.9.2 +version: 1.9.3 tillerVersion: ">=2.7.2" description: Helm chart for istio control plane keywords: diff --git a/charts/kubezero-istio/charts/istio-discovery/values.yaml b/charts/kubezero-istio/charts/istio-discovery/values.yaml index c42d284..1290bee 100644 --- a/charts/kubezero-istio/charts/istio-discovery/values.yaml +++ b/charts/kubezero-istio/charts/istio-discovery/values.yaml @@ -232,7 +232,7 @@ global: # Dev builds from prow are on gcr.io hub: docker.io/istio # Default tag for Istio images. - tag: 1.9.2 + tag: 1.9.3 # Specify image pull policy if default behavior isn't desired. # Default behavior: latest images will be Always else IfNotPresent. diff --git a/charts/kubezero-istio/update.sh b/charts/kubezero-istio/update.sh index 34247fb..16df9f1 100755 --- a/charts/kubezero-istio/update.sh +++ b/charts/kubezero-istio/update.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ex -export ISTIO_VERSION=1.9.2 +export ISTIO_VERSION=1.9.3 if [ ! -d istio-$ISTIO_VERSION ]; then NAME="istio-$ISTIO_VERSION" diff --git a/charts/kubezero-istio/values.yaml b/charts/kubezero-istio/values.yaml index 398dd39..7b5c7b6 100644 --- a/charts/kubezero-istio/values.yaml +++ b/charts/kubezero-istio/values.yaml @@ -35,6 +35,10 @@ istio-discovery: enabled: false meshConfig: + defaultConfig: + terminationDrainDuration: 60s +# proxyMetadata: +# ISTIO_META_HTTP10: '"1"' accessLogFile: /dev/stdout accessLogEncoding: 'JSON' h2UpgradePolicy: 'DO_NOT_UPGRADE' diff --git a/charts/kubezero-metrics/Chart.yaml b/charts/kubezero-metrics/Chart.yaml index 0428ef3..316bea9 100644 --- a/charts/kubezero-metrics/Chart.yaml +++ b/charts/kubezero-metrics/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-metrics description: KubeZero Umbrella Chart for prometheus-operator type: application -version: 0.3.4 +version: 0.3.5 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: @@ -16,7 +16,7 @@ dependencies: version: ">= 0.1.3" repository: https://zero-down-time.github.io/kubezero/ - name: kube-prometheus-stack - version: 14.3.0 + version: 14.9.0 # Switch back to upstream once all alerts are fixed eg. etcd gpcr # repository: https://prometheus-community.github.io/helm-charts - name: prometheus-adapter diff --git a/charts/kubezero-metrics/adjust_alarms.patch b/charts/kubezero-metrics/adjust_alarms.patch new file mode 100644 index 0000000..aee2b07 --- /dev/null +++ b/charts/kubezero-metrics/adjust_alarms.patch @@ -0,0 +1,66 @@ +diff -turN charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml charts/kube-prometheus-stack.zdt/templates/prometheus/rules-1.14/etcd.yaml +--- charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml 2021-04-14 22:13:29.000000000 +0200 ++++ charts/kube-prometheus-stack.zdt/templates/prometheus/rules-1.14/etcd.yaml 2021-04-15 14:43:03.074281889 +0200 +@@ -54,34 +54,6 @@ + {{- if .Values.defaultRules.additionalRuleLabels }} + {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} + {{- end }} +- - alert: etcdHighNumberOfFailedGRPCRequests +- annotations: +- message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' +- expr: |- +- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method) +- / +- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method) +- > 1 +- for: 10m +- labels: +- severity: warning +-{{- if .Values.defaultRules.additionalRuleLabels }} +-{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +-{{- end }} +- - alert: etcdHighNumberOfFailedGRPCRequests +- annotations: +- message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' +- expr: |- +- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method) +- / +- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method) +- > 5 +- for: 5m +- labels: +- severity: critical +-{{- if .Values.defaultRules.additionalRuleLabels }} +-{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +-{{- end }} + - alert: etcdGRPCRequestsSlow + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' +diff -turN charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml charts/kube-prometheus-stack.zdt/templates/prometheus/rules-1.14/node-exporter.yaml +--- charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml 2021-04-14 22:13:29.000000000 +0200 ++++ charts/kube-prometheus-stack.zdt/templates/prometheus/rules-1.14/node-exporter.yaml 2021-04-15 14:49:41.614282790 +0200 +@@ -30,7 +30,7 @@ + summary: Filesystem is predicted to run out of space within the next 24 hours. + expr: |- + ( +- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40 ++ node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 25 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0 + and +@@ -48,7 +48,7 @@ + summary: Filesystem is predicted to run out of space within the next 4 hours. + expr: |- + ( +- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15 ++ node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 10 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0 + and +@@ -259,4 +259,4 @@ + {{- if .Values.defaultRules.additionalRuleLabels }} + {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} + {{- end }} +-{{- end }} +\ No newline at end of file ++{{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml index 332b01e..3523fba 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml @@ -15,11 +15,11 @@ dependencies: - condition: nodeExporter.enabled name: prometheus-node-exporter repository: https://prometheus-community.github.io/helm-charts - version: 1.16.* + version: 1.17.* - condition: grafana.enabled name: grafana repository: https://grafana.github.io/helm-charts - version: 6.6.* + version: 6.7.* description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator. home: https://github.com/prometheus-operator/kube-prometheus icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png @@ -44,4 +44,4 @@ sources: - https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-operator/kube-prometheus type: application -version: 14.3.0 +version: 14.9.0 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml index bce2e76..e9466a3 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -appVersion: 7.4.3 +appVersion: 7.5.3 description: The leading tool for querying and visualizing time series and metrics. home: https://grafana.net icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png @@ -19,4 +19,4 @@ name: grafana sources: - https://github.com/grafana/grafana type: application -version: 6.6.3 +version: 6.7.4 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md index c40b099..76a4acb 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md @@ -59,8 +59,8 @@ This version requires Helm >= 3.1.0. | `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` | | `priorityClassName` | Name of Priority Class to assign pods | `nil` | | `image.repository` | Image repository | `grafana/grafana` | -| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.4.3` | -| `image.sha` | Image sha (optional) | `16dc29783ec7d4a23fa19207507586344c6797023604347eb3e8ea5ae431e181` | +| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.4.5` | +| `image.sha` | Image sha (optional) | `2b56f6106ddc376bb46d974230d530754bf65a640dfbc5245191d72d3b49efc6` | | `image.pullPolicy` | Image pull policy | `IfNotPresent` | | `image.pullSecrets` | Image pull secrets | `{}` | | `service.type` | Kubernetes service type | `ClusterIP` | @@ -242,6 +242,9 @@ ingress: ### Example of extraVolumeMounts +Volume can be type persistentVolumeClaim or hostPath but not both at same time. +If none existingClaim or hostPath argument is givent then type is emptyDir. + ```yaml - extraVolumeMounts: - name: plugins @@ -249,6 +252,10 @@ ingress: subPath: configs/grafana/plugins existingClaim: existing-grafana-claim readOnly: false + - name: dashboards + mountPath: /var/lib/grafana/dashboards + hostPath: /usr/shared/grafana/dashboards + readOnly: false ``` ## Import dashboards diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl index ece72db..9af7717 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl @@ -479,8 +479,15 @@ volumes: {{- end }} {{- range .Values.extraVolumeMounts }} - name: {{ .name }} + {{- if .existingClaim }} persistentVolumeClaim: claimName: {{ .existingClaim }} + {{- else if .hostPath }} + hostPath: + path: {{ .hostPath }} + {{- else }} + emptyDir: {} + {{- end }} {{- end }} {{- range .Values.extraEmptyDirMounts }} - name: {{ .name }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml index 88bf64c..2c40a58 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml @@ -17,19 +17,8 @@ spec: privileged: false allowPrivilegeEscalation: false requiredDropCapabilities: - # Default set from Docker, without DAC_OVERRIDE or CHOWN - - FOWNER - - FSETID - - KILL - - SETGID - - SETUID - - SETPCAP - - NET_BIND_SERVICE - - NET_RAW - - SYS_CHROOT - - MKNOD - - AUDIT_WRITE - - SETFCAP + # Default set from Docker, with DAC_OVERRIDE and CHOWN + - ALL volumes: - 'configMap' - 'emptyDir' @@ -42,12 +31,20 @@ spec: hostIPC: false hostPID: false runAsUser: - rule: 'RunAsAny' + rule: 'MustRunAsNonRoot' seLinux: rule: 'RunAsAny' supplementalGroups: - rule: 'RunAsAny' + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 fsGroup: - rule: 'RunAsAny' + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 readOnlyRootFilesystem: false {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml index 3478fa2..d826343 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml @@ -53,7 +53,7 @@ livenessProbe: image: repository: grafana/grafana - tag: 7.4.3 + tag: 7.5.3 sha: "" pullPolicy: IfNotPresent @@ -401,10 +401,14 @@ extraSecretMounts: [] ## Additional grafana server volume mounts # Defines additional volume mounts. extraVolumeMounts: [] - # - name: extra-volume - # mountPath: /mnt/volume + # - name: extra-volume-0 + # mountPath: /mnt/volume0 # readOnly: true # existingClaim: volume-claim + # - name: extra-volume-1 + # mountPath: /mnt/volume1 + # readOnly: true + # hostPath: /usr/shared/ ## Pass the plugins you want installed as a list. ## diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml index 45a1a71..b7f6ddf 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml @@ -15,4 +15,4 @@ maintainers: name: kube-state-metrics sources: - https://github.com/kubernetes/kube-state-metrics/ -version: 2.13.0 +version: 2.13.2 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml index 6259d2f..c493f16 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml @@ -1,4 +1,4 @@ -{{- if and (eq $.Values.rbac.create true) (not .Values.rbac.useExistingRole) -}} +{{- if and (eq $.Values.rbac.create true) (not .Values.rbac.useExistingRole) -}} {{- if eq .Values.rbac.useClusterRole false }} {{- range (split "," $.Values.namespace) }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml index 89bb41b..732174a 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml @@ -1,7 +1,7 @@ {{- if and (eq .Values.rbac.create true) (eq .Values.rbac.useClusterRole false) -}} {{- range (split "," $.Values.namespace) }} --- -apiVersion: rbac.authorization.k8s.io/v1beta1 +apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: labels: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml index b4c5e16..b045551 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml @@ -14,4 +14,4 @@ maintainers: name: prometheus-node-exporter sources: - https://github.com/prometheus/node_exporter/ -version: 1.16.2 +version: 1.17.0 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml index 53fe326..7a87aaf 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml @@ -29,6 +29,10 @@ spec: {{- if .Values.priorityClassName }} priorityClassName: {{ .Values.priorityClassName }} {{- end }} + {{- if .Values.extraInitContainers }} + initContainers: + {{ toYaml .Values.extraInitContainers | nindent 6 }} + {{- end }} containers: - name: node-exporter image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml index 93f8ef8..5cb6981 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml @@ -168,3 +168,7 @@ sidecarVolumeMount: [] ## - name: collector-textfiles ## mountPath: /run/prometheus ## readOnly: false + +## Additional InitContainers to initialize the pod +## +extraInitContainers: [] diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml index bbdbc56..cd3b10b 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml @@ -7,6 +7,10 @@ metadata: labels: app: {{ template "kube-prometheus-stack.name" . }}-alertmanager {{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.alertmanager.annotations }} + annotations: +{{ toYaml .Values.alertmanager.annotations | indent 4 }} +{{- end }} spec: {{- if .Values.alertmanager.alertmanagerSpec.image }} image: {{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml index a699acc..2dc9b86 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml @@ -21,6 +21,9 @@ spec: {{- if .Values.alertmanager.serviceMonitor.interval }} interval: {{ .Values.alertmanager.serviceMonitor.interval }} {{- end }} + {{- if .Values.alertmanager.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.alertmanager.serviceMonitor.proxyUrl}} + {{- end }} {{- if .Values.alertmanager.serviceMonitor.scheme }} scheme: {{ .Values.alertmanager.serviceMonitor.scheme }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml index c398db6..6acbb5e 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml @@ -21,6 +21,9 @@ spec: {{- if .Values.coreDns.serviceMonitor.interval}} interval: {{ .Values.coreDns.serviceMonitor.interval }} {{- end }} + {{- if .Values.coreDns.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.coreDns.serviceMonitor.proxyUrl}} + {{- end }} bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token {{- if .Values.coreDns.serviceMonitor.metricRelabelings }} metricRelabelings: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml index 4c0290e..f34c187 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml @@ -13,6 +13,9 @@ spec: {{- if .Values.kubeApiServer.serviceMonitor.interval }} interval: {{ .Values.kubeApiServer.serviceMonitor.interval }} {{- end }} + {{- if .Values.kubeApiServer.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeApiServer.serviceMonitor.proxyUrl}} + {{- end }} port: https scheme: https {{- if .Values.kubeApiServer.serviceMonitor.metricRelabelings }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml index 8b5556a..d55ca2a 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.kubeControllerManager.enabled }} +{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.service.enabled }} apiVersion: v1 kind: Service metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml index 2f30a0c..c0846ea 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.kubeControllerManager.enabled }} +{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.serviceMonitor.enabled }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -22,6 +22,9 @@ spec: interval: {{ .Values.kubeControllerManager.serviceMonitor.interval }} {{- end }} bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeControllerManager.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeControllerManager.serviceMonitor.proxyUrl}} + {{- end }} {{- if .Values.kubeControllerManager.serviceMonitor.https }} scheme: https tlsConfig: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml index 3c1a061..1f1b0de 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml @@ -22,6 +22,9 @@ spec: interval: {{ .Values.kubeDns.serviceMonitor.interval }} {{- end }} bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeDns.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeDns.serviceMonitor.proxyUrl}} + {{- end }} {{- if .Values.kubeDns.serviceMonitor.dnsmasqMetricRelabelings }} metricRelabelings: {{ tpl (toYaml .Values.kubeDns.serviceMonitor.dnsmasqMetricRelabelings | indent 4) . }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml index 957e8bb..b2677e2 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.kubeEtcd.enabled }} +{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.service.enabled }} apiVersion: v1 kind: Service metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml index 17a447a..689e1fd 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.kubeEtcd.enabled }} +{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.serviceMonitor.enabled }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -22,6 +22,9 @@ spec: interval: {{ .Values.kubeEtcd.serviceMonitor.interval }} {{- end }} bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeEtcd.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeEtcd.serviceMonitor.proxyUrl}} + {{- end }} {{- if eq .Values.kubeEtcd.serviceMonitor.scheme "https" }} scheme: https tlsConfig: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml index abe4206..6a93319 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.kubeProxy.enabled }} +{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.service.enabled }} apiVersion: v1 kind: Service metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml index 0ac4418..bc3b7be 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.kubeProxy.enabled }} +{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.serviceMonitor.enabled }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -22,6 +22,9 @@ spec: interval: {{ .Values.kubeProxy.serviceMonitor.interval }} {{- end }} bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeProxy.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeProxy.serviceMonitor.proxyUrl}} + {{- end }} {{- if .Values.kubeProxy.serviceMonitor.https }} scheme: https tlsConfig: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml index 15559d4..7a9c53d 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.kubeScheduler.enabled }} +{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.service.enabled }} apiVersion: v1 kind: Service metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml index b565ec2..a9a454b 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.kubeScheduler.enabled }} +{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.serviceMonitor.enabled }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -22,6 +22,9 @@ spec: interval: {{ .Values.kubeScheduler.serviceMonitor.interval }} {{- end }} bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeScheduler.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeScheduler.serviceMonitor.proxyUrl}} + {{- end }} {{- if .Values.kubeScheduler.serviceMonitor.https }} scheme: https tlsConfig: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml index 5b723b2..caeaa1e 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml @@ -14,6 +14,9 @@ spec: {{- if .Values.kubeStateMetrics.serviceMonitor.interval }} interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }} {{- end }} + {{- if .Values.kubeStateMetrics.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeStateMetrics.serviceMonitor.proxyUrl}} + {{- end }} honorLabels: true {{- if .Values.kubeStateMetrics.serviceMonitor.metricRelabelings }} metricRelabelings: @@ -22,6 +25,11 @@ spec: {{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }} relabelings: {{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }} +{{- end }} +{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }} + namespaceSelector: + matchNames: + - {{ .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }} {{- end }} selector: matchLabels: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml index 3645932..b24a395 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml @@ -15,6 +15,9 @@ spec: {{- if .Values.kubelet.serviceMonitor.interval }} interval: {{ .Values.kubelet.serviceMonitor.interval }} {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl}} + {{- end }} tlsConfig: caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecureSkipVerify: true diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/node-exporter/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/node-exporter/servicemonitor.yaml index 5ca5f1b..adcc419 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/node-exporter/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/node-exporter/servicemonitor.yaml @@ -18,6 +18,9 @@ spec: {{- if .Values.nodeExporter.serviceMonitor.interval }} interval: {{ .Values.nodeExporter.serviceMonitor.interval }} {{- end }} + {{- if .Values.kubeApiServer.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeApiServer.serviceMonitor.proxyUrl}} + {{- end }} {{- if .Values.nodeExporter.serviceMonitor.scrapeTimeout }} scrapeTimeout: {{ .Values.nodeExporter.serviceMonitor.scrapeTimeout }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml index 090e6a5..cfd5165 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml @@ -18,7 +18,7 @@ metadata: namespace: {{ template "kube-prometheus-stack.namespace" . }} spec: secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert - duration: 43800h # 5y + duration: 43800h0m0s # 5y issuerRef: name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer commonName: "ca.webhook.kube-prometheus-stack" @@ -43,7 +43,7 @@ metadata: namespace: {{ template "kube-prometheus-stack.namespace" . }} spec: secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission - duration: 8760h # 1y + duration: 8760h0m0s # 1y issuerRef: {{- if .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef }} {{- toYaml .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef | nindent 4 }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml index f069a85..fdab5f5 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml @@ -150,14 +150,24 @@ spec: {{ else }} probeNamespaceSelector: {} {{- end }} -{{- if .Values.prometheus.prometheusSpec.remoteRead }} +{{- if (or .Values.prometheus.prometheusSpec.remoteRead .Values.prometheus.prometheusSpec.additionalRemoteRead) }} remoteRead: +{{- if .Values.prometheus.prometheusSpec.remoteRead }} {{ toYaml .Values.prometheus.prometheusSpec.remoteRead | indent 4 }} {{- end }} -{{- if .Values.prometheus.prometheusSpec.remoteWrite }} +{{- if .Values.prometheus.prometheusSpec.additionalRemoteRead }} +{{ toYaml .Values.prometheus.prometheusSpec.additionalRemoteRead | indent 4 }} +{{- end }} +{{- end }} +{{- if (or .Values.prometheus.prometheusSpec.remoteWrite .Values.prometheus.prometheusSpec.additionalRemoteWrite) }} remoteWrite: +{{- if .Values.prometheus.prometheusSpec.remoteWrite }} {{ toYaml .Values.prometheus.prometheusSpec.remoteWrite | indent 4 }} {{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalRemoteWrite }} +{{ toYaml .Values.prometheus.prometheusSpec.additionalRemoteWrite | indent 4 }} +{{- end }} +{{- end }} {{- if .Values.prometheus.prometheusSpec.securityContext }} securityContext: {{ toYaml .Values.prometheus.prometheusSpec.securityContext | indent 4 }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml index 3be497c..763c4a2 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml @@ -30,7 +30,7 @@ spec: summary: Filesystem is predicted to run out of space within the next 24 hours. expr: |- ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40 + node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 25 and predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0 and @@ -48,7 +48,7 @@ spec: summary: Filesystem is predicted to run out of space within the next 4 hours. expr: |- ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15 + node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 10 and predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0 and @@ -259,4 +259,4 @@ spec: {{- if .Values.defaultRules.additionalRuleLabels }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSIdecar.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml similarity index 95% rename from charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSIdecar.yaml rename to charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml index ee97d49..7c33379 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSIdecar.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml @@ -16,7 +16,7 @@ metadata: {{- end }} spec: type: {{ .Values.prometheus.thanosService.type }} - clusterIP: None + clusterIP: {{ .Values.prometheus.thanosService.clusterIP }} ports: - name: {{ .Values.prometheus.thanosService.portName }} port: {{ .Values.prometheus.thanosService.port }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml index 5848603..c7abeef 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml @@ -117,6 +117,10 @@ alertmanager: ## enabled: true + ## Annotations for Alertmanager + ## + annotations: {} + ## Api that prometheus will use to communicate with alertmanager. Possible values are v1, v2 ## apiVersion: v2 @@ -194,7 +198,7 @@ alertmanager: # *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:> # *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:> # *Details:* - # {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` + # {{ range .Labels.SortedPairs }} - *{{ .Name }}:* `{{ .Value }}` # {{ end }} # {{ end }} # {{ end }} @@ -345,6 +349,10 @@ alertmanager: interval: "" selfMonitor: true + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. scheme: "" @@ -375,7 +383,7 @@ alertmanager: ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#alertmanagerspec ## alertmanagerSpec: - ## Standard object’s metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata + ## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata ## Metadata Labels and Annotations gets propagated to the Alertmanager pods. ## podMetadata: {} @@ -747,6 +755,10 @@ kubeApiServer: ## Scrape interval. If not set, the Prometheus default scrape interval is used. ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + jobLabel: component selector: matchLabels: @@ -771,6 +783,10 @@ kubelet: ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## Enable scraping the kubelet over https. For requirements to enable this see ## https://github.com/prometheus-operator/prometheus-operator/issues/926 ## @@ -891,16 +907,22 @@ kubeControllerManager: ## If using kubeControllerManager.endpoints only the port and targetPort are used ## service: + enabled: true port: 10252 targetPort: 10252 # selector: # component: kube-controller-manager serviceMonitor: + enabled: true ## Scrape interval. If not set, the Prometheus default scrape interval is used. ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## Enable scraping kube-controller-manager over https. ## Requires proper certs (not self-signed) and delegated authentication/authorization checks ## @@ -943,6 +965,10 @@ coreDns: ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## metric relabel configs to apply to samples before ingestion. ## metricRelabelings: [] @@ -978,6 +1004,10 @@ kubeDns: ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## metric relabel configs to apply to samples before ingestion. ## metricRelabelings: [] @@ -1024,6 +1054,7 @@ kubeEtcd: ## Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used ## service: + enabled: true port: 2379 targetPort: 2379 # selector: @@ -1041,9 +1072,13 @@ kubeEtcd: ## keyFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client-key ## serviceMonitor: + enabled: true ## Scrape interval. If not set, the Prometheus default scrape interval is used. ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" scheme: http insecureSkipVerify: false serverName: "" @@ -1084,15 +1119,20 @@ kubeScheduler: ## If using kubeScheduler.endpoints only the port and targetPort are used ## service: + enabled: true port: 10251 targetPort: 10251 # selector: # component: kube-scheduler serviceMonitor: + enabled: true ## Scrape interval. If not set, the Prometheus default scrape interval is used. ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" ## Enable scraping kube-scheduler over https. ## Requires proper certs (not self-signed) and delegated authentication/authorization checks ## @@ -1135,16 +1175,22 @@ kubeProxy: # - 10.141.4.24 service: + enabled: true port: 10249 targetPort: 10249 # selector: # k8s-app: kube-proxy serviceMonitor: + enabled: true ## Scrape interval. If not set, the Prometheus default scrape interval is used. ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## Enable scraping kube-proxy over https. ## Requires proper certs (not self-signed) and delegated authentication/authorization checks ## @@ -1173,9 +1219,15 @@ kubeStateMetrics: ## Scrape interval. If not set, the Prometheus default scrape interval is used. ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" ## Override serviceMonitor selector ## selectorOverride: {} + ## Override namespace selector + ## + namespaceOverride: "" ## metric relabel configs to apply to samples before ingestion. ## @@ -1217,6 +1269,10 @@ nodeExporter: ## interval: "" + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## How long until a scrape request times out. If not set, the Prometheus default scape timeout is used. ## scrapeTimeout: "" @@ -1527,6 +1583,7 @@ prometheus: serviceAccount: create: true name: "" + annotations: {} # Service for thanos service discovery on sidecar # Enable this can make Thanos Query can use @@ -1540,6 +1597,7 @@ prometheus: portName: grpc port: 10901 targetPort: "grpc" + clusterIP: "None" ## Service type ## @@ -1814,7 +1872,7 @@ prometheus: ## image: repository: quay.io/prometheus/prometheus - tag: v2.24.0 + tag: v2.26.0 sha: "" ## Tolerations for use with node taints @@ -2035,7 +2093,7 @@ prometheus: ## routePrefix: / - ## Standard object’s metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata + ## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata ## Metadata Labels and Annotations gets propagated to the prometheus pods. ## podMetadata: {} @@ -2072,11 +2130,15 @@ prometheus: ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#remotereadspec remoteRead: [] # - url: http://remote1/read + ## additionalRemoteRead is appended to remoteRead + additionalRemoteRead: [] ## The remote_write spec configuration for Prometheus. ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#remotewritespec remoteWrite: [] # - url: http://remote1/push + ## additionalRemoteWrite is appended to remoteWrite + additionalRemoteWrite: [] ## Enable/Disable Grafana dashboards provisioning for prometheus remote write feature remoteWriteDashboards: false diff --git a/charts/kubezero-metrics/remove_etcd_grpc_alerts.patch b/charts/kubezero-metrics/remove_etcd_grpc_alerts.patch deleted file mode 100644 index 60a0a0a..0000000 --- a/charts/kubezero-metrics/remove_etcd_grpc_alerts.patch +++ /dev/null @@ -1,39 +0,0 @@ -diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml -index b430951..8358704 100644 ---- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml -+++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml -@@ -71,34 +71,6 @@ spec: - severity: warning - {{- if .Values.defaultRules.additionalRuleLabels }} - {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} --{{- end }} -- - alert: etcdHighNumberOfFailedGRPCRequests -- annotations: -- message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' -- expr: |- -- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method) -- / -- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method) -- > 1 -- for: 10m -- labels: -- severity: warning --{{- if .Values.defaultRules.additionalRuleLabels }} --{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} --{{- end }} -- - alert: etcdHighNumberOfFailedGRPCRequests -- annotations: -- message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' -- expr: |- -- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method) -- / -- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method) -- > 5 -- for: 5m -- labels: -- severity: critical --{{- if .Values.defaultRules.additionalRuleLabels }} --{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} - {{- end }} - - alert: etcdGRPCRequestsSlow - annotations: diff --git a/charts/kubezero-metrics/update.sh b/charts/kubezero-metrics/update.sh index 55549b8..4d36445 100755 --- a/charts/kubezero-metrics/update.sh +++ b/charts/kubezero-metrics/update.sh @@ -1,8 +1,10 @@ #!/bin/bash -VERSION=14.3.0 +VERSION=14.9.0 rm -rf charts/kube-prometheus-stack curl -L -s -o - https://github.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-${VERSION}/kube-prometheus-stack-${VERSION}.tgz | tar xfz - -C charts -patch -p3 -i remove_etcd_grpc_alerts.patch --no-backup-if-mismatch +# The grpc alerts could be re-enabled with etcd 3.5 +# https://github.com/etcd-io/etcd/pull/12196 +patch -p0 -i adjust_alarms.patch --no-backup-if-mismatch diff --git a/charts/kubezero-metrics/values.yaml b/charts/kubezero-metrics/values.yaml index f04415a..5a533d4 100644 --- a/charts/kubezero-metrics/values.yaml +++ b/charts/kubezero-metrics/values.yaml @@ -126,6 +126,8 @@ kube-prometheus-stack: enabled: true size: 4Gi storageClassName: ebs-sc-gp2-xfs + deploymentStrategy: + type: Recreate plugins: - grafana-piechart-panel service: diff --git a/charts/kubezero/templates/istio-ingress.yaml b/charts/kubezero/templates/istio-ingress.yaml index 39fa737..4e8d5c8 100644 --- a/charts/kubezero/templates/istio-ingress.yaml +++ b/charts/kubezero/templates/istio-ingress.yaml @@ -1,4 +1,10 @@ {{- define "istio-ingress-values" }} + +{{- with index .Values "istio-ingress" "global" }} +global: + {{- toYaml . | nindent 2 }} +{{- end }} + {{- if index .Values "istio-ingress" "public" }} istio-ingress: enabled: {{ index .Values "istio-ingress" "public" "enabled" }} diff --git a/charts/kubezero/templates/istio.yaml b/charts/kubezero/templates/istio.yaml index eae91c7..ac750b8 100644 --- a/charts/kubezero/templates/istio.yaml +++ b/charts/kubezero/templates/istio.yaml @@ -1,7 +1,15 @@ {{- define "istio-values" }} + +{{- if .Values.HighAvailableControlplane }} +global: + defaultPodDisruptionBudget: + enabled: true + istio-discovery: pilot: - replicaCount: {{ ternary 2 1 .Values.HighAvailableControlplane }} + replicaCount: 2 +{{- end }} + {{- end }} diff --git a/kubezero-repo.GFV/repo b/kubezero-repo.GFV/repo deleted file mode 160000 index 25b8ebe..0000000 --- a/kubezero-repo.GFV/repo +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 25b8ebe3cc2af88643a4674a63e651e9c31245cb diff --git a/scripts/exec_each_node.sh b/scripts/exec_each_node.sh new file mode 100755 index 0000000..61d9e7b --- /dev/null +++ b/scripts/exec_each_node.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +NODES=$(kubectl get nodes -o json | jq -rc .items[].status.addresses[0].address) + +for n in $NODES; do + >&2 echo "Node: $n" + ssh -q $n "$@" +done diff --git a/scripts/pod_mem_cgroup_limit.sh b/scripts/pod_mem_cgroup_limit.sh new file mode 100755 index 0000000..713c6e5 --- /dev/null +++ b/scripts/pod_mem_cgroup_limit.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +NAME=$1 + +POD_ID="$(crictl pods --name $NAME -q)" +CGROUP_PATH=$(crictl inspectp -o=json $POD_ID | jq -rc .info.runtimeSpec.linux.cgroupsPath) + +echo -n "cgroup memory limit in bytes for $NAME: " +cat /sys/fs/cgroup/memory/$(dirname $CGROUP_PATH)/memory.limit_in_bytes