diff --git a/charts/kubezero-metrics/Chart.yaml b/charts/kubezero-metrics/Chart.yaml index 2dff6e1..d4a3d16 100644 --- a/charts/kubezero-metrics/Chart.yaml +++ b/charts/kubezero-metrics/Chart.yaml @@ -2,12 +2,12 @@ apiVersion: v2 name: kubezero-metrics description: KubeZero Umbrella Chart for prometheus-operator type: application -version: 0.0.1 +version: 0.0.2 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: - kubezero - - prometheus-operator + - prometheus - grafana maintainers: - name: Quarky9 @@ -18,4 +18,7 @@ dependencies: - name: prometheus-operator version: 9.3.0 repository: https://kubernetes-charts.storage.googleapis.com/ + - name: prometheus-adapter + version: 2.5.0 + repository: https://kubernetes-charts.storage.googleapis.com/ kubeVersion: ">= 1.16.0" diff --git a/charts/kubezero-metrics/values.yaml b/charts/kubezero-metrics/values.yaml index 5527892..ebdff4a 100644 --- a/charts/kubezero-metrics/values.yaml +++ b/charts/kubezero-metrics/values.yaml @@ -32,6 +32,8 @@ prometheus-operator: enabled: true image: tag: v0.41.0 + prometheusConfigReloaderImage: + tag: v0.41.0 # Run on controller nodes tolerations: @@ -56,3 +58,38 @@ prometheus-operator: # releaseNamespace: true # additional: # - kube-system + +# Metrics adapter +prometheus-adapter: + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + nodeSelector: + node-role.kubernetes.io/master: "" + + resource: + cpu: + containerQuery: sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>) + nodeQuery: sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod: + resource: pod + containerLabel: container + memory: + containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>) + nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>) + resources: + overrides: + instance: + resource: node + namespace: + resource: namespace + pod: + resource: pod + containerLabel: container + window: 5m diff --git a/deploy/deploy.sh b/deploy/deploy.sh index aee17a8..d2b458a 100755 --- a/deploy/deploy.sh +++ b/deploy/deploy.sh @@ -40,30 +40,31 @@ else EOF fi - helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml --set bootstrap=true > generated-values.yaml - - # Deploy initial argo-cd + # Deploy initial argo-cad + helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml --set kiam.not_ready=true --set cert-manager.not_ready=true --set istio.enabled=false --set prometheus.enabled=false > generated-values.yaml helm install -n argocd kubezero kubezero/kubezero-argo-cd --create-namespace -f generated-values.yaml - # Wait for argocd-server to be running kubectl rollout status deployment -n argocd kubezero-argocd-server - # Now wait for cert-manager to be bootstrapped + # Now wait for cert-manager and the local CA to be bootstrapped echo "Waiting for cert-manager to be deployed..." wait_for kubectl get deployment -n cert-manager cert-manager-webhook 2>/dev/null 1>&2 kubectl rollout status deployment -n cert-manager cert-manager-webhook + wait_for kubectl get Issuer -n kube-system kubezero-local-ca-issuer 2>/dev/null 1>&2 + kubectl wait --for=condition=Ready -n kube-system Issuer/kubezero-local-ca-issuer - # Now lets get kiam and cert-manager to work as they depend on each other, keep advanced options still disabled though - # - istio, prometheus - helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml --set istio.enabled=false --set prometheus.enabled=false > generated-values.yaml - helm upgrade -n argocd kubezero kubezero/kubezero-argo-cd -f generated-values.yaml + # Now lets make sure kiam is working + helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml --set kiam.not_ready=true --set istio.enabled=false --set prometheus.enabled=false > generated-values.yaml + helm upgrade -n argocd kubezero kubezero/kubezero-argo-cd --create-namespace -f generated-values.yaml + wait_for kubectl get daemonset -n kube-system kiam-agent 2>/dev/null 1>&2 + kubectl rollout status daemonset -n kube-system kiam-agent # Install Istio if enabled, but keep ArgoCD istio support disabled for now in case helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml --set argo-cd.istio.enabled=false > generated-values.yaml helm upgrade -n argocd kubezero kubezero/kubezero-argo-cd -f generated-values.yaml + wait_for kubectl get deployment -n istio-operator istio-operator 2>/dev/null 1>&2 + kubectl rollout status deployment -n istio-operator istio-operator - echo "Install kube-prometheus and logging manually for now, before proceeding! " - read # Todo: Now we need to wait till all is synced and healthy ... argocd cli or kubectl ? # Wait for aws-ebs or kiam to be all ready, or all pods running ? diff --git a/deploy/templates/values.yaml b/deploy/templates/values.yaml index b462196..673a1d6 100644 --- a/deploy/templates/values.yaml +++ b/deploy/templates/values.yaml @@ -12,13 +12,14 @@ kubezero: prometheus: {{ .Values.prometheus.enabled }} cert-manager: enabled: {{ index .Values "cert-manager" "enabled" }} -{{- if not .Values.bootstrap }} values: {{- if eq .Values.platform "aws" }} cert-manager: podAnnotations: iam.amazonaws.com/role: "{{ index .Values "cert-manager" "IamArn" }}" {{- end }} + + {{- if not .Values.kiam.not_ready }} clusterIssuer: name: letsencrypt-dns-prod server: https://acme-v02.api.letsencrypt.org/directory @@ -38,19 +39,11 @@ kubezero: {{- . | toYaml | nindent 14 }} {{- end }} {{- end }} + {{- end }} + - # AWS only components {{- if eq .Values.platform "aws" }} - aws-ebs-csi-driver: - enabled: {{ index .Values "aws-ebs-csi-driver" "enabled" }} - values: - aws-ebs-csi-driver: - replicaCount: {{ ternary 2 1 .Values.HighAvailableControlplane }} - podAnnotations: - iam.amazonaws.com/role: "{{ index .Values "aws-ebs-csi-driver" "IamArn" }}" - extraVolumeTags: - Name: {{ .Values.ClusterName }} - + {{- if not ( index .Values "cert-manager" "not_ready" ) }} kiam: enabled: {{ .Values.kiam.enabled }} values: @@ -62,6 +55,20 @@ kubezero: prometheus: servicemonitor: enabled: {{ .Values.prometheus.enabled }} + {{- end }} + + {{- if not .Values.kiam.not_ready }} + # AWS only components + aws-ebs-csi-driver: + enabled: {{ index .Values "aws-ebs-csi-driver" "enabled" }} + values: + aws-ebs-csi-driver: + replicaCount: {{ ternary 2 1 .Values.HighAvailableControlplane }} + podAnnotations: + iam.amazonaws.com/role: "{{ index .Values "aws-ebs-csi-driver" "IamArn" }}" + extraVolumeTags: + Name: {{ .Values.ClusterName }} + {{- end }} aws-efs-csi-driver: enabled: {{ index .Values "aws-efs-csi-driver" "enabled" }} @@ -87,7 +94,6 @@ kubezero: metrics: enabled: {{ .Values.metrics.enabled }} -{{- end }} argo-cd: {{- with index .Values "argo-cd" "server" }}