2020-10-10 00:58:44 +00:00
kube-prometheus-stack :
2020-08-03 16:01:39 +00:00
defaultRules :
2021-09-28 14:02:33 +00:00
create : false
2020-08-06 10:52:16 +00:00
2020-07-30 16:19:04 +00:00
coreDns :
2020-08-03 16:01:39 +00:00
enabled : true
2020-08-06 10:52:16 +00:00
kubeDns :
enabled : false
2020-07-30 16:19:04 +00:00
kubeApiServer :
2020-08-03 16:01:39 +00:00
enabled : true
2020-08-03 17:47:11 +00:00
kubeStateMetrics :
enabled : true
2020-08-04 13:45:42 +00:00
kubeProxy :
2020-08-03 17:47:11 +00:00
enabled : true
2020-07-30 16:19:04 +00:00
kubeEtcd :
2020-09-02 14:05:57 +00:00
enabled : true
service :
port : 2381
targetPort : 2381
2020-08-05 14:42:15 +00:00
kubeControllerManager :
enabled : true
service :
port : 10257
targetPort : 10257
serviceMonitor :
https : true
2020-08-05 14:58:37 +00:00
insecureSkipVerify : true
2020-08-05 14:42:15 +00:00
2020-07-30 16:19:04 +00:00
kubeScheduler :
2020-08-05 14:42:15 +00:00
enabled : true
service :
port : 10259
targetPort : 10259
serviceMonitor :
https : true
2020-08-05 14:58:37 +00:00
insecureSkipVerify : true
2020-07-31 00:32:44 +00:00
2020-08-04 13:45:42 +00:00
kubelet :
enabled : true
serviceMonitor :
2020-08-05 14:42:15 +00:00
# removed with 1.18, but still required for all container metrics ??
cAdvisor : true
2020-08-04 13:45:42 +00:00
2020-07-30 16:19:04 +00:00
prometheusOperator :
enabled : true
2020-12-02 14:24:07 +00:00
logFormat : json
2020-07-30 17:56:46 +00:00
2020-07-31 00:18:07 +00:00
# Run on controller nodes
tolerations :
2022-10-27 12:27:42 +00:00
- key : node-role.kubernetes.io/control-plane
effect : NoSchedule
2020-07-31 00:18:07 +00:00
nodeSelector :
2021-12-08 16:29:53 +00:00
node-role.kubernetes.io/control-plane : ""
2021-03-22 10:41:26 +00:00
resources :
requests :
cpu : 20m
memory : 32Mi
limits :
memory : 64Mi
2020-07-31 00:18:07 +00:00
2020-11-21 12:24:57 +00:00
admissionWebhooks :
patch :
tolerations :
2022-10-27 12:27:42 +00:00
- key : node-role.kubernetes.io/control-plane
effect : NoSchedule
2020-11-21 12:24:57 +00:00
nodeSelector :
2021-12-08 16:29:53 +00:00
node-role.kubernetes.io/control-plane : ""
2020-11-21 12:24:57 +00:00
2020-08-03 12:26:00 +00:00
nodeExporter :
enabled : true
2021-03-22 11:05:02 +00:00
prometheus-node-exporter :
2022-04-08 15:11:34 +00:00
hostRootFsMount :
enabled : false
2022-01-28 16:19:41 +00:00
prometheus :
monitor :
relabelings :
- sourceLabels : [ __meta_kubernetes_pod_node_name]
separator : ;
regex : ^(.*)$
2023-05-13 08:38:33 +00:00
targetLabel : instance
2022-01-28 16:19:41 +00:00
replacement : $1
action : replace
2021-03-22 10:41:26 +00:00
resources :
requests :
cpu : 20m
memory : 16Mi
2020-08-03 12:26:00 +00:00
prometheus :
enabled : true
2020-08-03 14:51:44 +00:00
prometheusSpec :
2020-08-03 16:44:58 +00:00
retention : 8d
portName : http-prometheus
2020-12-02 14:24:07 +00:00
logFormat : json
2020-12-17 23:36:23 +00:00
# externalUrl:
2020-08-03 16:44:58 +00:00
2021-09-29 14:33:33 +00:00
# gather ALL monitors
2021-04-25 09:59:54 +00:00
podMonitorSelectorNilUsesHelmValues : false
serviceMonitorSelectorNilUsesHelmValues : false
2021-06-01 16:40:34 +00:00
ruleSelectorNilUsesHelmValues : false
2021-04-25 09:59:54 +00:00
2020-08-03 14:51:44 +00:00
resources :
requests :
2021-03-22 10:41:26 +00:00
memory : 512Mi
cpu : 500m
2020-09-10 13:01:28 +00:00
limits :
2022-05-16 08:15:41 +00:00
memory : 4Gi
2020-10-27 14:13:52 +00:00
# cpu: "1000m"
2020-08-03 12:26:00 +00:00
2020-08-03 14:51:44 +00:00
storageSpec :
volumeClaimTemplate :
spec :
2021-08-02 09:52:07 +00:00
# storageClassName: ebs-sc-gp3-xfs
2020-08-03 14:51:44 +00:00
accessModes : [ "ReadWriteOnce" ]
resources :
requests :
2020-09-10 13:01:28 +00:00
storage : 16Gi
2022-10-27 12:27:42 +00:00
#volumes:
# - name: aws-token
# projected:
# sources:
# - serviceAccountToken:
# path: token
# expirationSeconds: 86400
# audience: "sts.amazonaws.com"
#volumeMounts:
# - name: aws-token
# mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
# readOnly: true
#containers:
# - name: prometheus
# env:
# - name: AWS_ROLE_ARN
# value: "<prometheus IAM ROLE ARN>"
# - name: AWS_WEB_IDENTITY_TOKEN_FILE
# value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
# - name: AWS_STS_REGIONAL_ENDPOINTS
# value: regional
2020-08-03 12:26:00 +00:00
2020-08-03 15:08:16 +00:00
# Custom Grafana tweaks
grafana :
enabled : true
2021-04-25 09:59:54 +00:00
2021-08-25 13:59:17 +00:00
# Disabled as we use the upstream kube-mixin dashboards directly
2021-04-26 14:27:19 +00:00
defaultDashboardsEnabled : false
2021-04-25 09:59:54 +00:00
grafana.ini :
server :
enable_gzip : true
analytics :
check_for_updates : false
security :
disable_gravatar : true
cookie_secure : true
strict_transport_security : true
#auth:
# disable_login_form: true
# disable_signout_menu: true
auth.anonymous :
enabled : true
dashboards :
min_refresh_interval : "30s"
2021-12-03 21:16:22 +00:00
default_home_dashboard_path : '/tmp/dashboards/KubeZero/home.json'
2021-04-25 09:59:54 +00:00
alerting :
enabled : false
date_formats :
default_timezone : UTC
sidecar :
dashboards :
searchNamespace : ALL
provider :
foldersFromFilesStructure : true
2021-12-09 18:03:55 +00:00
script : "/opt/script.sh"
2021-04-25 09:59:54 +00:00
# For the gunzip script
2021-12-09 18:03:55 +00:00
extraMounts :
- name : script-volume
mountPath : /opt/script.sh
subPath : script.sh
2021-04-25 09:59:54 +00:00
extraContainerVolumes :
- name : script-volume
configMap :
name : script-configmap
defaultMode : 0777
#persistence:
# enabled: true
# size: 4Gi
# storageClassName: ebs-sc-gp2-xfs
#deploymentStrategy:
# type: Recreate
2020-08-03 15:08:16 +00:00
plugins :
- grafana-piechart-panel
service :
portName : http-grafana
initChownData :
enabled : false
testFramework :
enabled : false
2020-11-21 12:24:57 +00:00
# Assign state metrics to control plane
kube-state-metrics :
tolerations :
2022-10-27 12:27:42 +00:00
- key : node-role.kubernetes.io/control-plane
effect : NoSchedule
2020-11-21 12:24:57 +00:00
nodeSelector :
2021-12-08 16:29:53 +00:00
node-role.kubernetes.io/control-plane : ""
2020-11-21 12:24:57 +00:00
2020-08-03 16:15:12 +00:00
# Todo
alertmanager :
enabled : false
2021-12-03 21:16:22 +00:00
config :
global :
resolve_timeout : 5m
route :
group_by : [ 'severity' , 'clusterName' ]
2023-01-11 12:08:18 +00:00
group_wait : 10s
2021-12-03 21:16:22 +00:00
group_interval : 5m
2023-01-11 12:08:18 +00:00
repeat_interval : 4h
2022-01-28 16:19:41 +00:00
routes :
- matchers :
2023-01-11 12:08:18 +00:00
- severity = none
2022-01-28 16:19:41 +00:00
receiver : 'null'
inhibit_rules :
- equal :
- namespace
- alertname
source_matchers :
- severity = critical
target_matchers :
- severity =~ warning|info
- equal :
- namespace
- alertname
source_matchers :
- severity = warning
target_matchers :
- severity = info
- equal :
- namespace
source_matchers :
- alertname = InfoInhibitor
target_matchers :
- severity = info
2023-01-11 12:08:18 +00:00
# Disable cluster overcommiy alerts if we have cluster autoscaler available
- source_matchers :
- alertname = ClusterAutoscalerNodeGroupsEnabled
target_matchers :
- alertname =~ "KubeCPUOvercommit|KubeMemoryOvercommit"
2020-12-02 11:53:19 +00:00
alertmanagerSpec :
2020-12-16 11:40:14 +00:00
# externalUrl:
2020-12-02 11:53:19 +00:00
logFormat : json
2020-08-03 16:15:12 +00:00
2021-08-05 11:52:22 +00:00
# for none AWS cluster or if SNS AlertHub should NOT be used, remove sns-forwarder by overwriting containers eg.:
# containers: []
# Add sns-forwarder to AlertManager pod, see: https://github.com/DataReply/alertmanager-sns-forwarder
# uses the alertmanager serviceaccount to assume IAM role, requires annotation: kubezero.com/sns_forwarder_arn_prefix to point to SNSAlertHub
# eg: "arn:aws:sns:eu-central-1:123456789012:"
2021-08-25 13:59:17 +00:00
containers :
- name : alertmanager-sns-forwarder
image : datareply/alertmanager-sns-forwarder:latest
imagePullPolicy : Always
env :
- name : SNS_FORWARDER_ARN_PREFIX
valueFrom :
fieldRef :
fieldPath : metadata.annotations['kubezero.com/sns_forwarder_ARN_PREFIX']
- name : AWS_ROLE_ARN
valueFrom :
fieldRef :
fieldPath : metadata.annotations['kubezero.com/sns_forwarder_AWS_ROLE_ARN']
- name : AWS_WEB_IDENTITY_TOKEN_FILE
value : "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
- name : AWS_STS_REGIONAL_ENDPOINTS
value : regional
volumeMounts :
- name : aws-token
mountPath : "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
readOnly : true
resources :
limits :
memory : 64Mi
cpu : 100m
requests :
cpu : 25m
memory : 32Mi
ports :
- containerPort : 9087
name : webhook-port
livenessProbe :
httpGet :
path : /health
port : webhook-port
initialDelaySeconds : 30
timeoutSeconds : 10
readinessProbe :
httpGet :
path : /health
port : webhook-port
initialDelaySeconds : 10
timeoutSeconds : 10
volumes :
- name : aws-token
projected :
sources :
- serviceAccountToken :
path : token
expirationSeconds : 86400
audience : "sts.amazonaws.com"
2021-08-05 11:52:22 +00:00
2020-08-03 12:06:07 +00:00
# Metrics adapter
prometheus-adapter :
2020-11-21 12:24:57 +00:00
enabled : true
2021-01-20 15:31:00 +00:00
logLevel : 1
2020-08-03 19:52:57 +00:00
prometheus :
2020-10-10 01:41:43 +00:00
url : http://metrics-kube-prometheus-st-prometheus
2020-08-03 12:06:07 +00:00
tolerations :
2022-10-27 12:27:42 +00:00
- key : node-role.kubernetes.io/control-plane
effect : NoSchedule
2020-08-03 12:06:07 +00:00
nodeSelector :
2021-12-08 16:29:53 +00:00
node-role.kubernetes.io/control-plane : ""
2020-08-03 12:16:48 +00:00
# Basic rules for HPA to work replacing heaptster, taken from kube-prometheus project
# https://github.com/coreos/kube-prometheus/blob/master/manifests/prometheus-adapter-configMap.yaml
2020-08-03 17:53:18 +00:00
rules :
2020-08-03 20:29:24 +00:00
default : false
2020-08-03 17:53:18 +00:00
resource :
cpu :
2021-07-01 10:36:35 +00:00
containerQuery : sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery : sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
2020-08-03 17:53:18 +00:00
resources :
overrides :
2020-10-21 11:51:15 +00:00
node :
2020-08-03 17:53:18 +00:00
resource : node
namespace :
resource : namespace
pod :
resource : pod
containerLabel : container
memory :
2020-10-21 11:51:15 +00:00
containerQuery : sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
2020-10-21 12:05:08 +00:00
nodeQuery : sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
2020-08-03 17:53:18 +00:00
resources :
overrides :
2020-10-21 11:51:15 +00:00
node :
2020-08-03 17:53:18 +00:00
resource : node
namespace :
resource : namespace
pod :
resource : pod
containerLabel : container
2021-07-01 10:36:35 +00:00
window : 5m
2020-12-02 11:53:19 +00:00
2021-07-21 13:06:40 +00:00
# Pushgateway
prometheus-pushgateway :
enabled : false
serviceMonitor :
enabled : true
2020-12-02 11:53:19 +00:00
istio :
grafana :
enabled : false
ipBlocks : [ ]
url : ""
gateway : istio-ingress/ingressgateway
2020-12-02 12:30:17 +00:00
destination : metrics-grafana
2020-12-02 11:53:19 +00:00
prometheus :
enabled : false
ipBlocks : [ ]
url : ""
gateway : istio-ingress/ingressgateway
2020-12-02 12:30:17 +00:00
destination : metrics-kube-prometheus-st-prometheus
2020-12-02 11:53:19 +00:00
alertmanager :
enabled : false
ipBlocks : [ ]
url : ""
gateway : istio-ingress/ingressgateway
2020-12-02 12:30:17 +00:00
destination : metrics-kube-prometheus-st-alertmanager