KubeZero/charts/kubezero-metrics/values.yaml

390 lines
10 KiB
YAML

kube-prometheus-stack:
defaultRules:
create: false
coreDns:
enabled: true
kubeDns:
enabled: false
kubeApiServer:
enabled: true
kubeStateMetrics:
enabled: true
kubeProxy:
enabled: true
kubeEtcd:
enabled: true
service:
port: 2381
targetPort: 2381
kubeControllerManager:
enabled: true
service:
port: 10257
targetPort: 10257
serviceMonitor:
https: true
insecureSkipVerify: true
kubeScheduler:
enabled: true
service:
port: 10259
targetPort: 10259
serviceMonitor:
https: true
insecureSkipVerify: true
kubelet:
enabled: true
serviceMonitor:
# removed with 1.18, but still required for all container metrics ??
cAdvisor: true
prometheusOperator:
enabled: true
logFormat: json
# Run on controller nodes
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: ""
resources:
requests:
cpu: 20m
memory: 32Mi
limits:
memory: 64Mi
admissionWebhooks:
patch:
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: ""
nodeExporter:
enabled: true
prometheus-node-exporter:
hostRootFsMount:
enabled: false
prometheus:
monitor:
relabelings:
- sourceLabels: [__meta_kubernetes_pod_node_name]
separator: ;
regex: ^(.*)$
targetLabel: instance
replacement: $1
action: replace
resources:
requests:
cpu: 20m
memory: 16Mi
prometheus:
enabled: true
prometheusSpec:
retention: 8d
portName: http-prometheus
logFormat: json
# externalUrl:
# gather ALL monitors
podMonitorSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
ruleSelectorNilUsesHelmValues: false
resources:
requests:
memory: 2Gi
cpu: 500m
limits:
memory: 4Gi
# cpu: "1000m"
storageSpec:
volumeClaimTemplate:
spec:
# storageClassName: ebs-sc-gp3-xfs
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 16Gi
#volumes:
# - name: aws-token
# projected:
# sources:
# - serviceAccountToken:
# path: token
# expirationSeconds: 86400
# audience: "sts.amazonaws.com"
#volumeMounts:
# - name: aws-token
# mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
# readOnly: true
#containers:
# - name: prometheus
# env:
# - name: AWS_ROLE_ARN
# value: "<prometheus IAM ROLE ARN>"
# - name: AWS_WEB_IDENTITY_TOKEN_FILE
# value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
# - name: AWS_STS_REGIONAL_ENDPOINTS
# value: regional
# Custom Grafana tweaks
grafana:
enabled: true
# Disabled as we use the upstream kube-mixin dashboards directly
defaultDashboardsEnabled: false
grafana.ini:
server:
enable_gzip: true
analytics:
check_for_updates: false
security:
disable_gravatar: true
cookie_secure: true
strict_transport_security: true
#auth:
# disable_login_form: true
# disable_signout_menu: true
auth.anonymous:
enabled: true
dashboards:
min_refresh_interval: "30s"
default_home_dashboard_path: '/tmp/dashboards/KubeZero/home.json'
alerting:
enabled: false
date_formats:
default_timezone: UTC
sidecar:
dashboards:
searchNamespace: ALL
provider:
foldersFromFilesStructure: true
script: "/opt/script.sh"
# For the gunzip script
extraMounts:
- name: script-volume
mountPath: /opt/script.sh
subPath: script.sh
extraContainerVolumes:
- name: script-volume
configMap:
name: script-configmap
defaultMode: 0777
#persistence:
# enabled: true
# size: 4Gi
# storageClassName: ebs-sc-gp2-xfs
#deploymentStrategy:
# type: Recreate
plugins:
- grafana-piechart-panel
service:
portName: http-grafana
initChownData:
enabled: false
testFramework:
enabled: false
# Assign state metrics to control plane
kube-state-metrics:
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: ""
# Todo
alertmanager:
enabled: false
config:
global:
resolve_timeout: 5m
route:
group_by: ['severity', 'clusterName']
group_wait: 10s
group_interval: 5m
repeat_interval: 4h
routes:
- matchers:
- severity = none
receiver: 'null'
inhibit_rules:
- equal:
- namespace
- alertname
source_matchers:
- severity = critical
target_matchers:
- severity =~ warning|info
- equal:
- namespace
- alertname
source_matchers:
- severity = warning
target_matchers:
- severity = info
- equal:
- namespace
source_matchers:
- alertname = InfoInhibitor
target_matchers:
- severity = info
# Disable cluster overcommiy alerts if we have cluster autoscaler available
- source_matchers:
- alertname = ClusterAutoscalerNodeGroupsEnabled
target_matchers:
- alertname =~ "KubeCPUOvercommit|KubeMemoryOvercommit"
alertmanagerSpec:
# externalUrl:
logFormat: json
# for none AWS cluster or if SNS AlertHub should NOT be used, remove sns-forwarder by overwriting containers eg.:
# containers: []
# Add sns-forwarder to AlertManager pod, see: https://github.com/DataReply/alertmanager-sns-forwarder
# uses the alertmanager serviceaccount to assume IAM role, requires annotation: kubezero.com/sns_forwarder_arn_prefix to point to SNSAlertHub
# eg: "arn:aws:sns:eu-central-1:123456789012:"
containers:
- name: alertmanager-sns-forwarder
image: datareply/alertmanager-sns-forwarder:latest
imagePullPolicy: Always
env:
- name: SNS_FORWARDER_ARN_PREFIX
valueFrom:
fieldRef:
fieldPath: metadata.annotations['kubezero.com/sns_forwarder_ARN_PREFIX']
- name: AWS_ROLE_ARN
valueFrom:
fieldRef:
fieldPath: metadata.annotations['kubezero.com/sns_forwarder_AWS_ROLE_ARN']
- name: AWS_WEB_IDENTITY_TOKEN_FILE
value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
- name: AWS_STS_REGIONAL_ENDPOINTS
value: regional
volumeMounts:
- name: aws-token
mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
readOnly: true
resources:
limits:
memory: 64Mi
cpu: 100m
requests:
cpu: 25m
memory: 32Mi
ports:
- containerPort: 9087
name: webhook-port
livenessProbe:
httpGet:
path: /health
port: webhook-port
initialDelaySeconds: 30
timeoutSeconds: 10
readinessProbe:
httpGet:
path: /health
port: webhook-port
initialDelaySeconds: 10
timeoutSeconds: 10
volumes:
- name: aws-token
projected:
sources:
- serviceAccountToken:
path: token
expirationSeconds: 86400
audience: "sts.amazonaws.com"
# Metrics adapter
prometheus-adapter:
enabled: true
logLevel: 1
prometheus:
url: http://metrics-kube-prometheus-st-prometheus
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: ""
# Basic rules for HPA to work replacing heaptster, taken from kube-prometheus project
# https://github.com/coreos/kube-prometheus/blob/master/manifests/prometheus-adapter-configMap.yaml
rules:
default: false
resource:
cpu:
containerQuery: sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod:
resource: pod
containerLabel: container
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod:
resource: pod
containerLabel: container
window: 5m
# Pushgateway
prometheus-pushgateway:
enabled: false
serviceMonitor:
enabled: true
istio:
grafana:
enabled: false
ipBlocks: []
url: ""
gateway: istio-ingress/ingressgateway
destination: metrics-grafana
port: 80
prometheus:
enabled: false
ipBlocks: []
url: ""
gateway: istio-ingress/ingressgateway
destination: metrics-kube-prometheus-st-prometheus
port: 9090
alertmanager:
enabled: false
ipBlocks: []
url: ""
gateway: istio-ingress/ingressgateway
destination: metrics-kube-prometheus-st-alertmanager
port: 9093