kube-prometheus-stack: defaultRules: create: false coreDns: enabled: true kubeDns: enabled: false kubeApiServer: enabled: true kubeStateMetrics: enabled: true kubeProxy: enabled: true kubeEtcd: enabled: true service: port: 2381 targetPort: 2381 kubeControllerManager: enabled: true service: port: 10257 targetPort: 10257 serviceMonitor: https: true insecureSkipVerify: true kubeScheduler: enabled: true service: port: 10259 targetPort: 10259 serviceMonitor: https: true insecureSkipVerify: true kubelet: enabled: true serviceMonitor: # removed with 1.18, but still required for all container metrics ?? cAdvisor: true prometheusOperator: enabled: true logFormat: json # Run on controller nodes tolerations: - key: node-role.kubernetes.io/control-plane effect: NoSchedule nodeSelector: node-role.kubernetes.io/control-plane: "" resources: requests: cpu: 10m memory: 64Mi limits: memory: 128Mi admissionWebhooks: patch: tolerations: - key: node-role.kubernetes.io/control-plane effect: NoSchedule nodeSelector: node-role.kubernetes.io/control-plane: "" nodeExporter: enabled: true prometheus-node-exporter: hostRootFsMount: enabled: false prometheus: monitor: relabelings: - sourceLabels: [__meta_kubernetes_pod_node_name] separator: ; regex: ^(.*)$ targetLabel: instance replacement: $1 action: replace resources: requests: cpu: 20m memory: 16Mi prometheus: enabled: true prometheusSpec: retention: 8d portName: http-prometheus logFormat: json # externalUrl: # gather ALL monitors podMonitorSelectorNilUsesHelmValues: false serviceMonitorSelectorNilUsesHelmValues: false ruleSelectorNilUsesHelmValues: false probeSelectorNilUsesHelmValues: false scrapeConfigSelectorNilUsesHelmValues: false resources: requests: memory: 2Gi cpu: 500m limits: memory: 4Gi # cpu: "1000m" storageSpec: volumeClaimTemplate: spec: # storageClassName: ebs-sc-gp3-xfs accessModes: ["ReadWriteOnce"] resources: requests: storage: 16Gi #volumes: # - name: aws-token # projected: # sources: # - serviceAccountToken: # path: token # expirationSeconds: 86400 # audience: "sts.amazonaws.com" #volumeMounts: # - name: aws-token # mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/" # readOnly: true #containers: # - name: prometheus # env: # - name: AWS_ROLE_ARN # value: "" # - name: AWS_WEB_IDENTITY_TOKEN_FILE # value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token" # - name: AWS_STS_REGIONAL_ENDPOINTS # value: regional # Custom Grafana tweaks grafana: enabled: true # Disabled as we use the upstream kube-mixin dashboards directly defaultDashboardsEnabled: false grafana.ini: server: enable_gzip: true analytics: check_for_updates: false check_for_plugin_updates: false security: disable_gravatar: true cookie_secure: true strict_transport_security: true #auth: # disable_login_form: true # disable_signout_menu: true auth.anonymous: enabled: true dashboards: min_refresh_interval: "30s" default_home_dashboard_path: '/tmp/dashboards/KubeZero/home.json' alerting: enabled: false date_formats: default_timezone: UTC log.console: format: json sidecar: dashboards: searchNamespace: ALL provider: foldersFromFilesStructure: true script: "/opt/script.sh" # For the gunzip script extraMounts: - name: script-volume mountPath: /opt/script.sh subPath: script.sh extraContainerVolumes: - name: script-volume configMap: name: script-configmap defaultMode: 0777 #persistence: # enabled: true # size: 4Gi # storageClassName: ebs-sc-gp2-xfs #deploymentStrategy: # type: Recreate plugins: - grafana-piechart-panel service: portName: http-grafana initChownData: enabled: false testFramework: enabled: false # Assign state metrics to control plane kube-state-metrics: tolerations: - key: node-role.kubernetes.io/control-plane effect: NoSchedule nodeSelector: node-role.kubernetes.io/control-plane: "" # Todo alertmanager: enabled: false config: global: resolve_timeout: 5m route: group_by: ['alertname', 'severity', 'status'] group_wait: 30s group_interval: 5m repeat_interval: 4h routes: - matchers: - severity = none receiver: 'null' inhibit_rules: - equal: - namespace - alertname source_matchers: - severity = critical target_matchers: - severity =~ warning|info - equal: - namespace - alertname source_matchers: - severity = warning target_matchers: - severity = info - equal: - namespace source_matchers: - alertname = InfoInhibitor target_matchers: - severity = info # Disable cluster overcommit alerts if we have cluster autoscaler available - source_matchers: - alertname = ClusterAutoscalerNodeGroupsEnabled target_matchers: - alertname =~ "KubeCPUOvercommit|KubeMemoryOvercommit" alertmanagerSpec: # externalUrl: logFormat: json # for none AWS cluster or if SNS AlertHub should NOT be used, remove sns-forwarder by overwriting containers eg.: # containers: [] # Add sns-forwarder to AlertManager pod, see: https://github.com/DataReply/alertmanager-sns-forwarder # uses the alertmanager serviceaccount to assume IAM role, requires annotation: kubezero.com/sns_forwarder_arn_prefix to point to SNSAlertHub # eg: "arn:aws:sns:eu-central-1:123456789012:" containers: - name: alertmanager-sns-forwarder image: datareply/alertmanager-sns-forwarder:latest imagePullPolicy: Always env: - name: SNS_FORWARDER_ARN_PREFIX valueFrom: fieldRef: fieldPath: metadata.annotations['kubezero.com/sns_forwarder_ARN_PREFIX'] - name: AWS_ROLE_ARN valueFrom: fieldRef: fieldPath: metadata.annotations['kubezero.com/sns_forwarder_AWS_ROLE_ARN'] - name: AWS_WEB_IDENTITY_TOKEN_FILE value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token" - name: AWS_STS_REGIONAL_ENDPOINTS value: regional volumeMounts: - name: aws-token mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/" readOnly: true resources: limits: memory: 64Mi cpu: 100m requests: cpu: 25m memory: 32Mi ports: - containerPort: 9087 name: webhook-port livenessProbe: httpGet: path: /health port: webhook-port initialDelaySeconds: 30 timeoutSeconds: 10 readinessProbe: httpGet: path: /health port: webhook-port initialDelaySeconds: 10 timeoutSeconds: 10 volumes: - name: aws-token projected: sources: - serviceAccountToken: path: token expirationSeconds: 86400 audience: "sts.amazonaws.com" # Metrics adapter prometheus-adapter: enabled: true logLevel: 1 metricsRelistInterval: 3m prometheus: url: http://metrics-kube-prometheus-st-prometheus tolerations: - key: node-role.kubernetes.io/control-plane effect: NoSchedule nodeSelector: node-role.kubernetes.io/control-plane: "" # Basic rules for HPA to work replacing heaptster, taken from kube-prometheus project # https://github.com/coreos/kube-prometheus/blob/master/manifests/prometheus-adapter-configMap.yaml rules: default: false resource: cpu: containerQuery: | sum by (<<.GroupBy>>) ( irate ( container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s] ) ) nodeQuery: | sum(1 - irate(node_cpu_seconds_total{<<.LabelMatchers>>, mode="idle"}[120s])) by (<<.GroupBy>>) resources: overrides: instance: resource: node namespace: resource: namespace pod: resource: pod containerLabel: container memory: containerQuery: | sum by (<<.GroupBy>>) ( container_memory_working_set_bytes{<<.LabelMatchers>>,container!="",pod!="",container!="POD"} ) nodeQuery: | sum(node_memory_MemTotal_bytes{<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{<<.LabelMatchers>>}) by (<<.GroupBy>>) resources: overrides: instance: resource: node namespace: resource: namespace pod: resource: pod containerLabel: container window: 2m # Pushgateway prometheus-pushgateway: enabled: false serviceMonitor: enabled: true istio: grafana: enabled: false ipBlocks: [] url: "" gateway: istio-ingress/ingressgateway destination: metrics-grafana port: 80 prometheus: enabled: false ipBlocks: [] url: "" gateway: istio-ingress/ingressgateway destination: metrics-kube-prometheus-st-prometheus port: 9090 alertmanager: enabled: false ipBlocks: [] url: "" gateway: istio-ingress/ingressgateway destination: metrics-kube-prometheus-st-alertmanager port: 9093