406 lines
10 KiB
YAML
406 lines
10 KiB
YAML
kube-prometheus-stack:
|
|
defaultRules:
|
|
create: false
|
|
|
|
coreDns:
|
|
enabled: true
|
|
kubeDns:
|
|
enabled: false
|
|
|
|
kubeApiServer:
|
|
enabled: true
|
|
kubeStateMetrics:
|
|
enabled: true
|
|
kubeProxy:
|
|
enabled: true
|
|
|
|
kubeEtcd:
|
|
enabled: true
|
|
service:
|
|
port: 2381
|
|
targetPort: 2381
|
|
|
|
kubeControllerManager:
|
|
enabled: true
|
|
service:
|
|
port: 10257
|
|
targetPort: 10257
|
|
serviceMonitor:
|
|
https: true
|
|
insecureSkipVerify: true
|
|
|
|
kubeScheduler:
|
|
enabled: true
|
|
service:
|
|
port: 10259
|
|
targetPort: 10259
|
|
serviceMonitor:
|
|
https: true
|
|
insecureSkipVerify: true
|
|
|
|
kubelet:
|
|
enabled: true
|
|
serviceMonitor:
|
|
# removed with 1.18, but still required for all container metrics ??
|
|
cAdvisor: true
|
|
|
|
prometheusOperator:
|
|
enabled: true
|
|
logFormat: json
|
|
|
|
# Run on controller nodes
|
|
tolerations:
|
|
- key: node-role.kubernetes.io/control-plane
|
|
effect: NoSchedule
|
|
nodeSelector:
|
|
node-role.kubernetes.io/control-plane: ""
|
|
resources:
|
|
requests:
|
|
cpu: 10m
|
|
memory: 64Mi
|
|
limits:
|
|
memory: 128Mi
|
|
|
|
admissionWebhooks:
|
|
patch:
|
|
tolerations:
|
|
- key: node-role.kubernetes.io/control-plane
|
|
effect: NoSchedule
|
|
nodeSelector:
|
|
node-role.kubernetes.io/control-plane: ""
|
|
|
|
nodeExporter:
|
|
enabled: true
|
|
|
|
prometheus-node-exporter:
|
|
hostRootFsMount:
|
|
enabled: false
|
|
prometheus:
|
|
monitor:
|
|
relabelings:
|
|
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
|
separator: ;
|
|
regex: ^(.*)$
|
|
targetLabel: instance
|
|
replacement: $1
|
|
action: replace
|
|
resources:
|
|
requests:
|
|
cpu: 20m
|
|
memory: 16Mi
|
|
|
|
prometheus:
|
|
enabled: true
|
|
|
|
prometheusSpec:
|
|
retention: 8d
|
|
portName: http-prometheus
|
|
logFormat: json
|
|
# externalUrl:
|
|
|
|
# gather ALL monitors
|
|
podMonitorSelectorNilUsesHelmValues: false
|
|
serviceMonitorSelectorNilUsesHelmValues: false
|
|
ruleSelectorNilUsesHelmValues: false
|
|
probeSelectorNilUsesHelmValues: false
|
|
scrapeConfigSelectorNilUsesHelmValues: false
|
|
|
|
resources:
|
|
requests:
|
|
memory: 2Gi
|
|
cpu: 500m
|
|
limits:
|
|
memory: 4Gi
|
|
# cpu: "1000m"
|
|
|
|
storageSpec:
|
|
volumeClaimTemplate:
|
|
spec:
|
|
# storageClassName: ebs-sc-gp3-xfs
|
|
accessModes: ["ReadWriteOnce"]
|
|
resources:
|
|
requests:
|
|
storage: 16Gi
|
|
#volumes:
|
|
# - name: aws-token
|
|
# projected:
|
|
# sources:
|
|
# - serviceAccountToken:
|
|
# path: token
|
|
# expirationSeconds: 86400
|
|
# audience: "sts.amazonaws.com"
|
|
#volumeMounts:
|
|
# - name: aws-token
|
|
# mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
|
|
# readOnly: true
|
|
#containers:
|
|
# - name: prometheus
|
|
# env:
|
|
# - name: AWS_ROLE_ARN
|
|
# value: "<prometheus IAM ROLE ARN>"
|
|
# - name: AWS_WEB_IDENTITY_TOKEN_FILE
|
|
# value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
|
|
# - name: AWS_STS_REGIONAL_ENDPOINTS
|
|
# value: regional
|
|
|
|
# Custom Grafana tweaks
|
|
grafana:
|
|
enabled: true
|
|
|
|
# Disabled as we use the upstream kube-mixin dashboards directly
|
|
defaultDashboardsEnabled: false
|
|
|
|
grafana.ini:
|
|
server:
|
|
enable_gzip: true
|
|
analytics:
|
|
check_for_updates: false
|
|
check_for_plugin_updates: false
|
|
security:
|
|
disable_gravatar: true
|
|
cookie_secure: true
|
|
strict_transport_security: true
|
|
#auth:
|
|
# disable_login_form: true
|
|
# disable_signout_menu: true
|
|
auth.anonymous:
|
|
enabled: true
|
|
dashboards:
|
|
min_refresh_interval: "30s"
|
|
default_home_dashboard_path: '/tmp/dashboards/KubeZero/home.json'
|
|
alerting:
|
|
enabled: false
|
|
date_formats:
|
|
default_timezone: UTC
|
|
log.console:
|
|
format: json
|
|
|
|
sidecar:
|
|
dashboards:
|
|
searchNamespace: ALL
|
|
provider:
|
|
foldersFromFilesStructure: true
|
|
script: "/opt/script.sh"
|
|
# For the gunzip script
|
|
extraMounts:
|
|
- name: script-volume
|
|
mountPath: /opt/script.sh
|
|
subPath: script.sh
|
|
extraContainerVolumes:
|
|
- name: script-volume
|
|
configMap:
|
|
name: script-configmap
|
|
defaultMode: 0777
|
|
|
|
#persistence:
|
|
# enabled: true
|
|
# size: 4Gi
|
|
# storageClassName: ebs-sc-gp2-xfs
|
|
#deploymentStrategy:
|
|
# type: Recreate
|
|
|
|
plugins:
|
|
- grafana-piechart-panel
|
|
service:
|
|
portName: http-grafana
|
|
initChownData:
|
|
enabled: false
|
|
testFramework:
|
|
enabled: false
|
|
|
|
# Assign state metrics to control plane
|
|
kube-state-metrics:
|
|
tolerations:
|
|
- key: node-role.kubernetes.io/control-plane
|
|
effect: NoSchedule
|
|
nodeSelector:
|
|
node-role.kubernetes.io/control-plane: ""
|
|
|
|
# Todo
|
|
alertmanager:
|
|
enabled: false
|
|
config:
|
|
global:
|
|
resolve_timeout: 5m
|
|
route:
|
|
group_by: ['alertname', 'severity', 'status']
|
|
group_wait: 30s
|
|
group_interval: 5m
|
|
repeat_interval: 4h
|
|
routes:
|
|
- matchers:
|
|
- severity = none
|
|
receiver: 'null'
|
|
inhibit_rules:
|
|
- equal:
|
|
- namespace
|
|
- alertname
|
|
source_matchers:
|
|
- severity = critical
|
|
target_matchers:
|
|
- severity =~ warning|info
|
|
- equal:
|
|
- namespace
|
|
- alertname
|
|
source_matchers:
|
|
- severity = warning
|
|
target_matchers:
|
|
- severity = info
|
|
- equal:
|
|
- namespace
|
|
source_matchers:
|
|
- alertname = InfoInhibitor
|
|
target_matchers:
|
|
- severity = info
|
|
# Disable cluster overcommit alerts if we have cluster autoscaler available
|
|
- source_matchers:
|
|
- alertname = ClusterAutoscalerNodeGroupsEnabled
|
|
target_matchers:
|
|
- alertname =~ "KubeCPUOvercommit|KubeMemoryOvercommit"
|
|
alertmanagerSpec:
|
|
# externalUrl:
|
|
logFormat: json
|
|
|
|
# for none AWS cluster or if SNS AlertHub should NOT be used, remove sns-forwarder by overwriting containers eg.:
|
|
# containers: []
|
|
|
|
# Add sns-forwarder to AlertManager pod, see: https://github.com/DataReply/alertmanager-sns-forwarder
|
|
# uses the alertmanager serviceaccount to assume IAM role, requires annotation: kubezero.com/sns_forwarder_arn_prefix to point to SNSAlertHub
|
|
# eg: "arn:aws:sns:eu-central-1:123456789012:"
|
|
containers:
|
|
- name: alertmanager-sns-forwarder
|
|
image: datareply/alertmanager-sns-forwarder:latest
|
|
imagePullPolicy: Always
|
|
env:
|
|
- name: SNS_FORWARDER_ARN_PREFIX
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.annotations['kubezero.com/sns_forwarder_ARN_PREFIX']
|
|
- name: AWS_ROLE_ARN
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.annotations['kubezero.com/sns_forwarder_AWS_ROLE_ARN']
|
|
- name: AWS_WEB_IDENTITY_TOKEN_FILE
|
|
value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
|
|
- name: AWS_STS_REGIONAL_ENDPOINTS
|
|
value: regional
|
|
volumeMounts:
|
|
- name: aws-token
|
|
mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
|
|
readOnly: true
|
|
resources:
|
|
limits:
|
|
memory: 64Mi
|
|
cpu: 100m
|
|
requests:
|
|
cpu: 25m
|
|
memory: 32Mi
|
|
ports:
|
|
- containerPort: 9087
|
|
name: webhook-port
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: webhook-port
|
|
initialDelaySeconds: 30
|
|
timeoutSeconds: 10
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: webhook-port
|
|
initialDelaySeconds: 10
|
|
timeoutSeconds: 10
|
|
volumes:
|
|
- name: aws-token
|
|
projected:
|
|
sources:
|
|
- serviceAccountToken:
|
|
path: token
|
|
expirationSeconds: 86400
|
|
audience: "sts.amazonaws.com"
|
|
|
|
# Metrics adapter
|
|
prometheus-adapter:
|
|
enabled: true
|
|
logLevel: 1
|
|
metricsRelistInterval: 3m
|
|
prometheus:
|
|
url: http://metrics-kube-prometheus-st-prometheus
|
|
tolerations:
|
|
- key: node-role.kubernetes.io/control-plane
|
|
effect: NoSchedule
|
|
nodeSelector:
|
|
node-role.kubernetes.io/control-plane: ""
|
|
# Basic rules for HPA to work replacing heaptster, taken from kube-prometheus project
|
|
# https://github.com/coreos/kube-prometheus/blob/master/manifests/prometheus-adapter-configMap.yaml
|
|
rules:
|
|
default: false
|
|
resource:
|
|
cpu:
|
|
containerQuery: |
|
|
sum by (<<.GroupBy>>) (
|
|
irate (
|
|
container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s]
|
|
)
|
|
)
|
|
nodeQuery: |
|
|
sum(1 - irate(node_cpu_seconds_total{<<.LabelMatchers>>, mode="idle"}[120s])) by (<<.GroupBy>>)
|
|
resources:
|
|
overrides:
|
|
instance:
|
|
resource: node
|
|
namespace:
|
|
resource: namespace
|
|
pod:
|
|
resource: pod
|
|
containerLabel: container
|
|
memory:
|
|
containerQuery: |
|
|
sum by (<<.GroupBy>>) (
|
|
container_memory_working_set_bytes{<<.LabelMatchers>>,container!="",pod!="",container!="POD"}
|
|
)
|
|
nodeQuery: |
|
|
sum(node_memory_MemTotal_bytes{<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
|
resources:
|
|
overrides:
|
|
instance:
|
|
resource: node
|
|
namespace:
|
|
resource: namespace
|
|
pod:
|
|
resource: pod
|
|
containerLabel: container
|
|
window: 2m
|
|
|
|
# Pushgateway
|
|
prometheus-pushgateway:
|
|
enabled: false
|
|
|
|
serviceMonitor:
|
|
enabled: true
|
|
|
|
istio:
|
|
grafana:
|
|
enabled: false
|
|
ipBlocks: []
|
|
url: ""
|
|
gateway: istio-ingress/ingressgateway
|
|
destination: metrics-grafana
|
|
port: 80
|
|
|
|
prometheus:
|
|
enabled: false
|
|
ipBlocks: []
|
|
url: ""
|
|
gateway: istio-ingress/ingressgateway
|
|
destination: metrics-kube-prometheus-st-prometheus
|
|
port: 9090
|
|
|
|
alertmanager:
|
|
enabled: false
|
|
ipBlocks: []
|
|
url: ""
|
|
gateway: istio-ingress/ingressgateway
|
|
destination: metrics-kube-prometheus-st-alertmanager
|
|
port: 9093
|