kube-prometheus-stack: global: rbac: pspEnabled: false defaultRules: create: false coreDns: enabled: true kubeDns: enabled: false kubeApiServer: enabled: true kubeStateMetrics: enabled: true kubeProxy: enabled: true kubeEtcd: enabled: true service: port: 2381 targetPort: 2381 kubeControllerManager: enabled: true service: port: 10257 targetPort: 10257 serviceMonitor: https: true insecureSkipVerify: true kubeScheduler: enabled: true service: port: 10259 targetPort: 10259 serviceMonitor: https: true insecureSkipVerify: true kubelet: enabled: true serviceMonitor: # removed with 1.18, but still required for all container metrics ?? cAdvisor: true prometheusOperator: enabled: true logFormat: json # Run on controller nodes tolerations: - key: node-role.kubernetes.io/master effect: NoSchedule nodeSelector: node-role.kubernetes.io/control-plane: "" resources: requests: cpu: 20m memory: 32Mi limits: memory: 64Mi admissionWebhooks: patch: tolerations: - key: node-role.kubernetes.io/master effect: NoSchedule nodeSelector: node-role.kubernetes.io/control-plane: "" nodeExporter: enabled: true serviceMonitor: relabelings: - sourceLabels: [__meta_kubernetes_pod_node_name] separator: ; regex: ^(.*)$ targetLabel: node replacement: $1 action: replace prometheus-node-exporter: rbac: pspEnabled: false resources: requests: cpu: 20m memory: 16Mi prometheus: enabled: true prometheusSpec: retention: 8d portName: http-prometheus logFormat: json # externalUrl: # gather ALL monitors podMonitorSelectorNilUsesHelmValues: false serviceMonitorSelectorNilUsesHelmValues: false ruleSelectorNilUsesHelmValues: false resources: requests: memory: 512Mi cpu: 500m limits: memory: 3Gi # cpu: "1000m" walCompression: true storageSpec: volumeClaimTemplate: spec: # storageClassName: ebs-sc-gp3-xfs accessModes: ["ReadWriteOnce"] resources: requests: storage: 16Gi # Custom Grafana tweaks grafana: enabled: true rbac: pspEnabled: false # Disabled as we use the upstream kube-mixin dashboards directly defaultDashboardsEnabled: false grafana.ini: server: enable_gzip: true analytics: check_for_updates: false security: disable_gravatar: true cookie_secure: true strict_transport_security: true #auth: # disable_login_form: true # disable_signout_menu: true auth.anonymous: enabled: true dashboards: min_refresh_interval: "30s" default_home_dashboard_path: '/tmp/dashboards/KubeZero/home.json' alerting: enabled: false date_formats: default_timezone: UTC sidecar: dashboards: searchNamespace: ALL provider: foldersFromFilesStructure: true # For the gunzip script extraContainerVolumes: - name: script-volume configMap: name: script-configmap defaultMode: 0777 #persistence: # enabled: true # size: 4Gi # storageClassName: ebs-sc-gp2-xfs #deploymentStrategy: # type: Recreate plugins: - grafana-piechart-panel service: portName: http-grafana initChownData: enabled: false testFramework: enabled: false # Assign state metrics to control plane kube-state-metrics: podSecurityPolicy: enabled: false tolerations: - key: node-role.kubernetes.io/master effect: NoSchedule nodeSelector: node-role.kubernetes.io/control-plane: "" # Todo alertmanager: enabled: false config: global: resolve_timeout: 5m route: group_by: ['severity', 'clusterName'] group_wait: 30s group_interval: 5m repeat_interval: 6h alertmanagerSpec: # externalUrl: logFormat: json # for none AWS cluster or if SNS AlertHub should NOT be used, remove sns-forwarder by overwriting containers eg.: # containers: [] # Add sns-forwarder to AlertManager pod, see: https://github.com/DataReply/alertmanager-sns-forwarder # uses the alertmanager serviceaccount to assume IAM role, requires annotation: kubezero.com/sns_forwarder_arn_prefix to point to SNSAlertHub # eg: "arn:aws:sns:eu-central-1:123456789012:" containers: - name: alertmanager-sns-forwarder image: datareply/alertmanager-sns-forwarder:latest imagePullPolicy: Always env: - name: SNS_FORWARDER_ARN_PREFIX valueFrom: fieldRef: fieldPath: metadata.annotations['kubezero.com/sns_forwarder_ARN_PREFIX'] - name: AWS_ROLE_ARN valueFrom: fieldRef: fieldPath: metadata.annotations['kubezero.com/sns_forwarder_AWS_ROLE_ARN'] - name: AWS_WEB_IDENTITY_TOKEN_FILE value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token" - name: AWS_STS_REGIONAL_ENDPOINTS value: regional volumeMounts: - name: aws-token mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/" readOnly: true resources: limits: memory: 64Mi cpu: 100m requests: cpu: 25m memory: 32Mi ports: - containerPort: 9087 name: webhook-port livenessProbe: httpGet: path: /health port: webhook-port initialDelaySeconds: 30 timeoutSeconds: 10 readinessProbe: httpGet: path: /health port: webhook-port initialDelaySeconds: 10 timeoutSeconds: 10 volumes: - name: aws-token projected: sources: - serviceAccountToken: path: token expirationSeconds: 86400 audience: "sts.amazonaws.com" # Metrics adapter prometheus-adapter: enabled: true logLevel: 1 prometheus: url: http://metrics-kube-prometheus-st-prometheus tolerations: - key: node-role.kubernetes.io/master effect: NoSchedule nodeSelector: node-role.kubernetes.io/control-plane: "" # Basic rules for HPA to work replacing heaptster, taken from kube-prometheus project # https://github.com/coreos/kube-prometheus/blob/master/manifests/prometheus-adapter-configMap.yaml rules: default: false resource: cpu: containerQuery: sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>) nodeQuery: sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) resources: overrides: node: resource: node namespace: resource: namespace pod: resource: pod containerLabel: container memory: containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>) nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>) resources: overrides: node: resource: node namespace: resource: namespace pod: resource: pod containerLabel: container window: 5m # Pushgateway prometheus-pushgateway: enabled: false serviceMonitor: enabled: true istio: grafana: enabled: false ipBlocks: [] url: "" gateway: istio-ingress/ingressgateway destination: metrics-grafana prometheus: enabled: false ipBlocks: [] url: "" gateway: istio-ingress/ingressgateway destination: metrics-kube-prometheus-st-prometheus alertmanager: enabled: false ipBlocks: [] url: "" gateway: istio-ingress/ingressgateway destination: metrics-kube-prometheus-st-alertmanager