You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
338 lines
8.5 KiB
338 lines
8.5 KiB
kube-prometheus-stack: |
|
global: |
|
rbac: |
|
pspEnabled: false |
|
|
|
defaultRules: |
|
create: false |
|
|
|
coreDns: |
|
enabled: true |
|
kubeDns: |
|
enabled: false |
|
|
|
kubeApiServer: |
|
enabled: true |
|
kubeStateMetrics: |
|
enabled: true |
|
kubeProxy: |
|
enabled: true |
|
|
|
kubeEtcd: |
|
enabled: true |
|
service: |
|
port: 2381 |
|
targetPort: 2381 |
|
|
|
kubeControllerManager: |
|
enabled: true |
|
service: |
|
port: 10257 |
|
targetPort: 10257 |
|
serviceMonitor: |
|
https: true |
|
insecureSkipVerify: true |
|
|
|
kubeScheduler: |
|
enabled: true |
|
service: |
|
port: 10259 |
|
targetPort: 10259 |
|
serviceMonitor: |
|
https: true |
|
insecureSkipVerify: true |
|
|
|
kubelet: |
|
enabled: true |
|
serviceMonitor: |
|
# removed with 1.18, but still required for all container metrics ?? |
|
cAdvisor: true |
|
|
|
prometheusOperator: |
|
enabled: true |
|
logFormat: json |
|
|
|
# Run on controller nodes |
|
tolerations: |
|
- key: node-role.kubernetes.io/master |
|
effect: NoSchedule |
|
nodeSelector: |
|
node-role.kubernetes.io/control-plane: "" |
|
resources: |
|
requests: |
|
cpu: 20m |
|
memory: 32Mi |
|
limits: |
|
memory: 64Mi |
|
|
|
admissionWebhooks: |
|
patch: |
|
tolerations: |
|
- key: node-role.kubernetes.io/master |
|
effect: NoSchedule |
|
nodeSelector: |
|
node-role.kubernetes.io/control-plane: "" |
|
|
|
nodeExporter: |
|
enabled: true |
|
serviceMonitor: |
|
relabelings: |
|
- sourceLabels: [__meta_kubernetes_pod_node_name] |
|
separator: ; |
|
regex: ^(.*)$ |
|
targetLabel: node |
|
replacement: $1 |
|
action: replace |
|
|
|
prometheus-node-exporter: |
|
rbac: |
|
pspEnabled: false |
|
resources: |
|
requests: |
|
cpu: 20m |
|
memory: 16Mi |
|
|
|
prometheus: |
|
enabled: true |
|
|
|
prometheusSpec: |
|
retention: 8d |
|
portName: http-prometheus |
|
logFormat: json |
|
# externalUrl: |
|
|
|
# gather ALL monitors |
|
podMonitorSelectorNilUsesHelmValues: false |
|
serviceMonitorSelectorNilUsesHelmValues: false |
|
ruleSelectorNilUsesHelmValues: false |
|
|
|
resources: |
|
requests: |
|
memory: 512Mi |
|
cpu: 500m |
|
limits: |
|
memory: 3Gi |
|
# cpu: "1000m" |
|
|
|
walCompression: true |
|
storageSpec: |
|
volumeClaimTemplate: |
|
spec: |
|
# storageClassName: ebs-sc-gp3-xfs |
|
accessModes: ["ReadWriteOnce"] |
|
resources: |
|
requests: |
|
storage: 16Gi |
|
|
|
# Custom Grafana tweaks |
|
grafana: |
|
enabled: true |
|
rbac: |
|
pspEnabled: false |
|
|
|
# Disabled as we use the upstream kube-mixin dashboards directly |
|
defaultDashboardsEnabled: false |
|
|
|
grafana.ini: |
|
server: |
|
enable_gzip: true |
|
analytics: |
|
check_for_updates: false |
|
security: |
|
disable_gravatar: true |
|
cookie_secure: true |
|
strict_transport_security: true |
|
#auth: |
|
# disable_login_form: true |
|
# disable_signout_menu: true |
|
auth.anonymous: |
|
enabled: true |
|
dashboards: |
|
min_refresh_interval: "30s" |
|
default_home_dashboard_path: '/tmp/dashboards/KubeZero/home.json' |
|
alerting: |
|
enabled: false |
|
date_formats: |
|
default_timezone: UTC |
|
|
|
sidecar: |
|
dashboards: |
|
searchNamespace: ALL |
|
provider: |
|
foldersFromFilesStructure: true |
|
# For the gunzip script |
|
extraContainerVolumes: |
|
- name: script-volume |
|
configMap: |
|
name: script-configmap |
|
defaultMode: 0777 |
|
|
|
#persistence: |
|
# enabled: true |
|
# size: 4Gi |
|
# storageClassName: ebs-sc-gp2-xfs |
|
#deploymentStrategy: |
|
# type: Recreate |
|
|
|
plugins: |
|
- grafana-piechart-panel |
|
service: |
|
portName: http-grafana |
|
initChownData: |
|
enabled: false |
|
testFramework: |
|
enabled: false |
|
|
|
# Assign state metrics to control plane |
|
kube-state-metrics: |
|
podSecurityPolicy: |
|
enabled: false |
|
tolerations: |
|
- key: node-role.kubernetes.io/master |
|
effect: NoSchedule |
|
nodeSelector: |
|
node-role.kubernetes.io/control-plane: "" |
|
|
|
# Todo |
|
alertmanager: |
|
enabled: false |
|
config: |
|
global: |
|
resolve_timeout: 5m |
|
route: |
|
group_by: ['severity', 'clusterName'] |
|
group_wait: 30s |
|
group_interval: 5m |
|
repeat_interval: 6h |
|
alertmanagerSpec: |
|
# externalUrl: |
|
logFormat: json |
|
|
|
# for none AWS cluster or if SNS AlertHub should NOT be used, remove sns-forwarder by overwriting containers eg.: |
|
# containers: [] |
|
|
|
# Add sns-forwarder to AlertManager pod, see: https://github.com/DataReply/alertmanager-sns-forwarder |
|
# uses the alertmanager serviceaccount to assume IAM role, requires annotation: kubezero.com/sns_forwarder_arn_prefix to point to SNSAlertHub |
|
# eg: "arn:aws:sns:eu-central-1:123456789012:" |
|
containers: |
|
- name: alertmanager-sns-forwarder |
|
image: datareply/alertmanager-sns-forwarder:latest |
|
imagePullPolicy: Always |
|
env: |
|
- name: SNS_FORWARDER_ARN_PREFIX |
|
valueFrom: |
|
fieldRef: |
|
fieldPath: metadata.annotations['kubezero.com/sns_forwarder_ARN_PREFIX'] |
|
- name: AWS_ROLE_ARN |
|
valueFrom: |
|
fieldRef: |
|
fieldPath: metadata.annotations['kubezero.com/sns_forwarder_AWS_ROLE_ARN'] |
|
- name: AWS_WEB_IDENTITY_TOKEN_FILE |
|
value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token" |
|
- name: AWS_STS_REGIONAL_ENDPOINTS |
|
value: regional |
|
volumeMounts: |
|
- name: aws-token |
|
mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/" |
|
readOnly: true |
|
resources: |
|
limits: |
|
memory: 64Mi |
|
cpu: 100m |
|
requests: |
|
cpu: 25m |
|
memory: 32Mi |
|
ports: |
|
- containerPort: 9087 |
|
name: webhook-port |
|
livenessProbe: |
|
httpGet: |
|
path: /health |
|
port: webhook-port |
|
initialDelaySeconds: 30 |
|
timeoutSeconds: 10 |
|
readinessProbe: |
|
httpGet: |
|
path: /health |
|
port: webhook-port |
|
initialDelaySeconds: 10 |
|
timeoutSeconds: 10 |
|
volumes: |
|
- name: aws-token |
|
projected: |
|
sources: |
|
- serviceAccountToken: |
|
path: token |
|
expirationSeconds: 86400 |
|
audience: "sts.amazonaws.com" |
|
|
|
# Metrics adapter |
|
prometheus-adapter: |
|
enabled: true |
|
logLevel: 1 |
|
prometheus: |
|
url: http://metrics-kube-prometheus-st-prometheus |
|
tolerations: |
|
- key: node-role.kubernetes.io/master |
|
effect: NoSchedule |
|
nodeSelector: |
|
node-role.kubernetes.io/control-plane: "" |
|
# Basic rules for HPA to work replacing heaptster, taken from kube-prometheus project |
|
# https://github.com/coreos/kube-prometheus/blob/master/manifests/prometheus-adapter-configMap.yaml |
|
rules: |
|
default: false |
|
resource: |
|
cpu: |
|
containerQuery: sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>) |
|
nodeQuery: sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) |
|
resources: |
|
overrides: |
|
node: |
|
resource: node |
|
namespace: |
|
resource: namespace |
|
pod: |
|
resource: pod |
|
containerLabel: container |
|
memory: |
|
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>) |
|
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>) |
|
resources: |
|
overrides: |
|
node: |
|
resource: node |
|
namespace: |
|
resource: namespace |
|
pod: |
|
resource: pod |
|
containerLabel: container |
|
window: 5m |
|
|
|
# Pushgateway |
|
prometheus-pushgateway: |
|
enabled: false |
|
|
|
serviceMonitor: |
|
enabled: true |
|
|
|
istio: |
|
grafana: |
|
enabled: false |
|
ipBlocks: [] |
|
url: "" |
|
gateway: istio-ingress/ingressgateway |
|
destination: metrics-grafana |
|
|
|
prometheus: |
|
enabled: false |
|
ipBlocks: [] |
|
url: "" |
|
gateway: istio-ingress/ingressgateway |
|
destination: metrics-kube-prometheus-st-prometheus |
|
|
|
alertmanager: |
|
enabled: false |
|
ipBlocks: [] |
|
url: "" |
|
gateway: istio-ingress/ingressgateway |
|
destination: metrics-kube-prometheus-st-alertmanager
|
|
|