2020-10-09 17:58:44 -07:00
kube-prometheus-stack :
2020-08-03 17:01:39 +01:00
defaultRules :
2021-09-28 16:02:33 +02:00
create : false
2020-08-06 11:52:16 +01:00
2020-07-30 17:19:04 +01:00
coreDns :
2020-08-03 17:01:39 +01:00
enabled : true
2020-08-06 11:52:16 +01:00
kubeDns :
enabled : false
2020-07-30 17:19:04 +01:00
kubeApiServer :
2020-08-03 17:01:39 +01:00
enabled : true
2020-08-03 18:47:11 +01:00
kubeStateMetrics :
enabled : true
2020-08-04 14:45:42 +01:00
kubeProxy :
2020-08-03 18:47:11 +01:00
enabled : true
2020-07-30 17:19:04 +01:00
kubeEtcd :
2020-09-02 15:05:57 +01:00
enabled : true
service :
port : 2381
targetPort : 2381
2020-08-05 15:42:15 +01:00
kubeControllerManager :
enabled : true
service :
port : 10257
targetPort : 10257
serviceMonitor :
https : true
2020-08-05 15:58:37 +01:00
insecureSkipVerify : true
2020-08-05 15:42:15 +01:00
2020-07-30 17:19:04 +01:00
kubeScheduler :
2020-08-05 15:42:15 +01:00
enabled : true
service :
port : 10259
targetPort : 10259
serviceMonitor :
https : true
2020-08-05 15:58:37 +01:00
insecureSkipVerify : true
2020-07-31 01:32:44 +01:00
2020-08-04 14:45:42 +01:00
kubelet :
enabled : true
serviceMonitor :
2020-08-05 15:42:15 +01:00
# removed with 1.18, but still required for all container metrics ??
cAdvisor : true
2020-08-04 14:45:42 +01:00
2020-07-30 17:19:04 +01:00
prometheusOperator :
enabled : true
2020-12-02 06:24:07 -08:00
logFormat : json
2020-07-30 18:56:46 +01:00
2020-07-31 01:18:07 +01:00
# Run on controller nodes
tolerations :
- key : node-role.kubernetes.io/master
effect : NoSchedule
2022-10-27 14:27:42 +02:00
- key : node-role.kubernetes.io/control-plane
effect : NoSchedule
2020-07-31 01:18:07 +01:00
nodeSelector :
2021-12-08 17:29:53 +01:00
node-role.kubernetes.io/control-plane : ""
2021-03-22 11:41:26 +01:00
resources :
requests :
cpu : 20m
memory : 32Mi
limits :
memory : 64Mi
2020-07-31 01:18:07 +01:00
2020-11-21 04:24:57 -08:00
admissionWebhooks :
patch :
tolerations :
- key : node-role.kubernetes.io/master
effect : NoSchedule
2022-10-27 14:27:42 +02:00
- key : node-role.kubernetes.io/control-plane
effect : NoSchedule
2020-11-21 04:24:57 -08:00
nodeSelector :
2021-12-08 17:29:53 +01:00
node-role.kubernetes.io/control-plane : ""
2020-11-21 04:24:57 -08:00
2020-08-03 13:26:00 +01:00
nodeExporter :
enabled : true
2021-03-22 12:05:02 +01:00
prometheus-node-exporter :
2022-04-08 17:11:34 +02:00
hostRootFsMount :
enabled : false
2022-01-28 17:19:41 +01:00
prometheus :
monitor :
relabelings :
- sourceLabels : [ __meta_kubernetes_pod_node_name]
separator : ;
regex : ^(.*)$
targetLabel : node
replacement : $1
action : replace
2021-03-22 11:41:26 +01:00
resources :
requests :
cpu : 20m
memory : 16Mi
2020-08-03 13:26:00 +01:00
prometheus :
enabled : true
2020-08-03 15:51:44 +01:00
prometheusSpec :
2020-08-03 17:44:58 +01:00
retention : 8d
portName : http-prometheus
2020-12-02 06:24:07 -08:00
logFormat : json
2020-12-17 15:36:23 -08:00
# externalUrl:
2020-08-03 17:44:58 +01:00
2021-09-29 16:33:33 +02:00
# gather ALL monitors
2021-04-25 11:59:54 +02:00
podMonitorSelectorNilUsesHelmValues : false
serviceMonitorSelectorNilUsesHelmValues : false
2021-06-01 18:40:34 +02:00
ruleSelectorNilUsesHelmValues : false
2021-04-25 11:59:54 +02:00
2020-08-03 15:51:44 +01:00
resources :
requests :
2021-03-22 11:41:26 +01:00
memory : 512Mi
cpu : 500m
2020-09-10 14:01:28 +01:00
limits :
2022-05-16 10:15:41 +02:00
memory : 4Gi
2020-10-27 14:13:52 +00:00
# cpu: "1000m"
2020-08-03 13:26:00 +01:00
2020-08-03 15:51:44 +01:00
storageSpec :
volumeClaimTemplate :
spec :
2021-08-02 09:52:07 +00:00
# storageClassName: ebs-sc-gp3-xfs
2020-08-03 15:51:44 +01:00
accessModes : [ "ReadWriteOnce" ]
resources :
requests :
2020-09-10 14:01:28 +01:00
storage : 16Gi
2022-10-27 14:27:42 +02:00
#volumes:
# - name: aws-token
# projected:
# sources:
# - serviceAccountToken:
# path: token
# expirationSeconds: 86400
# audience: "sts.amazonaws.com"
#volumeMounts:
# - name: aws-token
# mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
# readOnly: true
#containers:
# - name: prometheus
# env:
# - name: AWS_ROLE_ARN
# value: "<prometheus IAM ROLE ARN>"
# - name: AWS_WEB_IDENTITY_TOKEN_FILE
# value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
# - name: AWS_STS_REGIONAL_ENDPOINTS
# value: regional
2020-08-03 13:26:00 +01:00
2020-08-03 16:08:16 +01:00
# Custom Grafana tweaks
grafana :
enabled : true
2021-04-25 11:59:54 +02:00
2021-08-25 15:59:17 +02:00
# Disabled as we use the upstream kube-mixin dashboards directly
2021-04-26 16:27:19 +02:00
defaultDashboardsEnabled : false
2021-04-25 11:59:54 +02:00
grafana.ini :
server :
enable_gzip : true
analytics :
check_for_updates : false
security :
disable_gravatar : true
cookie_secure : true
strict_transport_security : true
#auth:
# disable_login_form: true
# disable_signout_menu: true
auth.anonymous :
enabled : true
dashboards :
min_refresh_interval : "30s"
2021-12-03 22:16:22 +01:00
default_home_dashboard_path : '/tmp/dashboards/KubeZero/home.json'
2021-04-25 11:59:54 +02:00
alerting :
enabled : false
date_formats :
default_timezone : UTC
sidecar :
dashboards :
searchNamespace : ALL
provider :
foldersFromFilesStructure : true
2021-12-09 19:03:55 +01:00
script : "/opt/script.sh"
2021-04-25 11:59:54 +02:00
# For the gunzip script
2021-12-09 19:03:55 +01:00
extraMounts :
- name : script-volume
mountPath : /opt/script.sh
subPath : script.sh
2021-04-25 11:59:54 +02:00
extraContainerVolumes :
- name : script-volume
configMap :
name : script-configmap
defaultMode : 0777
#persistence:
# enabled: true
# size: 4Gi
# storageClassName: ebs-sc-gp2-xfs
#deploymentStrategy:
# type: Recreate
2020-08-03 16:08:16 +01:00
plugins :
- grafana-piechart-panel
service :
portName : http-grafana
initChownData :
enabled : false
testFramework :
enabled : false
2020-11-21 04:24:57 -08:00
# Assign state metrics to control plane
kube-state-metrics :
tolerations :
- key : node-role.kubernetes.io/master
effect : NoSchedule
2022-10-27 14:27:42 +02:00
- key : node-role.kubernetes.io/control-plane
effect : NoSchedule
2020-11-21 04:24:57 -08:00
nodeSelector :
2021-12-08 17:29:53 +01:00
node-role.kubernetes.io/control-plane : ""
2020-11-21 04:24:57 -08:00
2020-08-03 17:15:12 +01:00
# Todo
alertmanager :
enabled : false
2021-12-03 22:16:22 +01:00
config :
global :
resolve_timeout : 5m
route :
group_by : [ 'severity' , 'clusterName' ]
group_wait : 30s
group_interval : 5m
repeat_interval : 6h
2022-01-28 17:19:41 +01:00
routes :
- matchers :
- alertname = Watchdog
receiver : 'null'
- matchers :
- alertname = InfoInhibitor
receiver : 'null'
inhibit_rules :
- equal :
- namespace
- alertname
source_matchers :
- severity = critical
target_matchers :
- severity =~ warning|info
- equal :
- namespace
- alertname
source_matchers :
- severity = warning
target_matchers :
- severity = info
- equal :
- namespace
source_matchers :
- alertname = InfoInhibitor
target_matchers :
- severity = info
2020-12-02 03:53:19 -08:00
alertmanagerSpec :
2020-12-16 03:40:14 -08:00
# externalUrl:
2020-12-02 03:53:19 -08:00
logFormat : json
2020-08-03 17:15:12 +01:00
2021-08-05 13:52:22 +02:00
# for none AWS cluster or if SNS AlertHub should NOT be used, remove sns-forwarder by overwriting containers eg.:
# containers: []
# Add sns-forwarder to AlertManager pod, see: https://github.com/DataReply/alertmanager-sns-forwarder
# uses the alertmanager serviceaccount to assume IAM role, requires annotation: kubezero.com/sns_forwarder_arn_prefix to point to SNSAlertHub
# eg: "arn:aws:sns:eu-central-1:123456789012:"
2021-08-25 15:59:17 +02:00
containers :
- name : alertmanager-sns-forwarder
image : datareply/alertmanager-sns-forwarder:latest
imagePullPolicy : Always
env :
- name : SNS_FORWARDER_ARN_PREFIX
valueFrom :
fieldRef :
fieldPath : metadata.annotations['kubezero.com/sns_forwarder_ARN_PREFIX']
- name : AWS_ROLE_ARN
valueFrom :
fieldRef :
fieldPath : metadata.annotations['kubezero.com/sns_forwarder_AWS_ROLE_ARN']
- name : AWS_WEB_IDENTITY_TOKEN_FILE
value : "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
- name : AWS_STS_REGIONAL_ENDPOINTS
value : regional
volumeMounts :
- name : aws-token
mountPath : "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
readOnly : true
resources :
limits :
memory : 64Mi
cpu : 100m
requests :
cpu : 25m
memory : 32Mi
ports :
- containerPort : 9087
name : webhook-port
livenessProbe :
httpGet :
path : /health
port : webhook-port
initialDelaySeconds : 30
timeoutSeconds : 10
readinessProbe :
httpGet :
path : /health
port : webhook-port
initialDelaySeconds : 10
timeoutSeconds : 10
volumes :
- name : aws-token
projected :
sources :
- serviceAccountToken :
path : token
expirationSeconds : 86400
audience : "sts.amazonaws.com"
2021-08-05 13:52:22 +02:00
2020-08-03 13:06:07 +01:00
# Metrics adapter
prometheus-adapter :
2020-11-21 04:24:57 -08:00
enabled : true
2021-01-20 15:31:00 +00:00
logLevel : 1
2020-08-03 20:52:57 +01:00
prometheus :
2020-10-09 18:41:43 -07:00
url : http://metrics-kube-prometheus-st-prometheus
2020-08-03 13:06:07 +01:00
tolerations :
- key : node-role.kubernetes.io/master
effect : NoSchedule
2022-10-27 14:27:42 +02:00
- key : node-role.kubernetes.io/control-plane
effect : NoSchedule
2020-08-03 13:06:07 +01:00
nodeSelector :
2021-12-08 17:29:53 +01:00
node-role.kubernetes.io/control-plane : ""
2020-08-03 13:16:48 +01:00
# Basic rules for HPA to work replacing heaptster, taken from kube-prometheus project
# https://github.com/coreos/kube-prometheus/blob/master/manifests/prometheus-adapter-configMap.yaml
2020-08-03 18:53:18 +01:00
rules :
2020-08-03 21:29:24 +01:00
default : false
2020-08-03 18:53:18 +01:00
resource :
cpu :
2021-07-01 12:36:35 +02:00
containerQuery : sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery : sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
2020-08-03 18:53:18 +01:00
resources :
overrides :
2020-10-21 12:51:15 +01:00
node :
2020-08-03 18:53:18 +01:00
resource : node
namespace :
resource : namespace
pod :
resource : pod
containerLabel : container
memory :
2020-10-21 12:51:15 +01:00
containerQuery : sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
2020-10-21 13:05:08 +01:00
nodeQuery : sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
2020-08-03 18:53:18 +01:00
resources :
overrides :
2020-10-21 12:51:15 +01:00
node :
2020-08-03 18:53:18 +01:00
resource : node
namespace :
resource : namespace
pod :
resource : pod
containerLabel : container
2021-07-01 12:36:35 +02:00
window : 5m
2020-12-02 03:53:19 -08:00
2021-07-21 15:06:40 +02:00
# Pushgateway
prometheus-pushgateway :
enabled : false
serviceMonitor :
enabled : true
2020-12-02 03:53:19 -08:00
istio :
grafana :
enabled : false
ipBlocks : [ ]
url : ""
gateway : istio-ingress/ingressgateway
2020-12-02 04:30:17 -08:00
destination : metrics-grafana
2020-12-02 03:53:19 -08:00
prometheus :
enabled : false
ipBlocks : [ ]
url : ""
gateway : istio-ingress/ingressgateway
2020-12-02 04:30:17 -08:00
destination : metrics-kube-prometheus-st-prometheus
2020-12-02 03:53:19 -08:00
alertmanager :
enabled : false
ipBlocks : [ ]
url : ""
gateway : istio-ingress/ingressgateway
2020-12-02 04:30:17 -08:00
destination : metrics-kube-prometheus-st-alertmanager