KubeZero/charts/kubezero-telemetry/values.yaml

462 lines
10 KiB
YAML

opentelemetry-collector:
enabled: false
mode: deployment
jaeger:
enabled: false
agent:
enabled: false
collector:
service:
otlp:
grpc:
name: otlp-grpc
port: 4317
http:
name: otlp-http
port: 4318
extraEnv:
- name: ES_TAGS_AS_FIELDS_ALL
value: "true"
serviceMonitor:
enabled: false
# https://www.jaegertracing.io/docs/1.53/deployment/#collector
storage:
type: elasticsearch
elasticsearch:
scheme: https
host: telemetry
user: admin
password: admin
cmdlineParams:
es.tls.enabled: ""
es.tls.skip-host-verify: ""
provisionDataStore:
cassandra: false
elasticsearch: false
query:
agentSidecar:
enabled: false
serviceMonitor:
enabled: false
istio:
enabled: false
gateway: istio-ingress/private-ingressgateway
url: jaeger.example.com
opensearch:
version: 2.14.0
prometheus: false
# custom cluster settings
#settings:
# index.number_of_shards: 1
nodeSets: []
#- name: default-nodes
# replicas: 2
# storage:
# size: 16Gi
# class: my-fancy-SSDs
# zone: us-west-2a
# resources:
# limits:
# #cpu: 1
# memory: 2Gi
# requests:
# cpu: 500m
# memory: 2Gi
dashboard:
enabled: false
istio:
enabled: false
gateway: istio-ingress/private-ingressgateway
url: telemetry-dashboard.example.com
# New logging pipeline
fluentd:
enabled: false
image:
repository: public.ecr.aws/zero-downtime/fluentd-concenter
tag: v1.16.5-1-g09dc31c
istio:
enabled: false
kind: StatefulSet
replicaCount: 1
#plugins:
#- fluent-plugin-s3
source:
sharedKey: secretref+k8s://v1/Secret/kube-system/kubezero-secrets/telemetry.fluentd.source.sharedKey # "cloudbender"
output:
# Defaults to OpenSearch in same namespace
host: telemetry
user: admin
password: admin
service:
ports:
- name: tcp-forward
protocol: TCP
containerPort: 24224
- name: http-fluentd
protocol: TCP
containerPort: 9880
metrics:
serviceMonitor:
enabled: false
dashboards:
enabled: false
# No need for docker nor /var/log
mountVarLogDirectory: false
mountDockerContainersDirectory: false
# no rbac required until we need WebAuth identity for eg. s3
rbac:
create: false
resources:
requests:
cpu: 200m
memory: 256Mi
limits:
memory: 512Mi
persistence:
enabled: true
storageClass: ""
size: 1Gi
volumes:
- name: trust-store
secret:
secretName: telemetry-nodes-http-tls
items:
- key: tls.crt
path: ca.crt
volumeMounts:
- name: trust-store
mountPath: "/run/pki"
readOnly: true
securityContext:
capabilities:
drop:
- ALL
#readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 100
configMapConfigs:
- fluentd-prometheus-conf
fileConfigs:
00_system.conf: |-
<system>
root_dir /fluentd/log
log_level info
ignore_repeated_log_interval 60s
ignore_same_log_interval 60s
workers 1
</system>
01_sources.conf: |-
<source>
@type http
@label @KUBERNETES
port 9880
bind 0.0.0.0
keepalive_timeout 30
</source>
<source>
@type forward
@label @KUBERNETES
port 24224
bind 0.0.0.0
# skip_invalid_event true
send_keepalive_packet true
<security>
self_hostname "telemetry-fluentd"
shared_key {{ .Values.source.sharedKey }}
</security>
</source>
02_filters.conf: |-
<label @KUBERNETES>
# prevent log feedback loops, discard logs from our own pods
<match kube.logging.fluentd>
@type relabel
@label @FLUENT_LOG
</match>
# Exclude current fluent-bit multiline noise
# Still relevant ??
<filter kube.logging.fluent-bit>
@type grep
<exclude>
key log
pattern /could not append content to multiline context/
</exclude>
</filter>
# Generate Hash ID to break endless loop for already ingested events during retries
<filter **>
@type opensearch_genid
use_entire_record true
</filter>
# Route through DISPATCH for Prometheus metrics
<match **>
@type relabel
@label @DISPATCH
</match>
</label>
04_outputs.conf: |-
<label @OUTPUT>
<match **>
@id out_os
@type opensearch
# @log_level debug
include_tag_key true
id_key _hash
remove_keys _hash
write_operation create
# we have oj in the fluentd-concenter image
prefer_oj_serializer true
# KubeZero pipeline incl. GeoIP etc.
#pipeline fluentd
http_backend typhoeus
ca_file /run/pki/ca.crt
port 9200
scheme https
hosts {{ .Values.output.host }}
user {{ .Values.output.user }}
password {{ .Values.output.password }}
log_es_400_reason
logstash_format true
reconnect_on_error true
reload_on_failure true
request_timeout 300s
#sniffer_class_name Fluent::Plugin::OpenSearchSimpleSniffer
#with_transporter_log true
verify_es_version_at_startup false
default_opensearch_version 2
#suppress_type_name true
# Retry failed bulk requests
# https://github.com/uken/fluent-plugin-elasticsearch#unrecoverable-error-types
unrecoverable_error_types ["out_of_memory_error"]
bulk_message_request_threshold 1048576
<buffer>
@type file
flush_mode interval
flush_thread_count 2
flush_interval 10s
chunk_limit_size 2MB
total_limit_size 1GB
flush_at_shutdown true
retry_type exponential_backoff
retry_timeout 6h
overflow_action drop_oldest_chunk
disable_chunk_backup true
</buffer>
</match>
</label>
fluent-bit:
enabled: false
#image:
#repository: public.ecr.aws/zero-downtime/fluent-bit
#tag: 2.0.10
testFramework:
enabled: false
serviceMonitor:
enabled: false
#rbac:
# nodeAccess: true
#hostNetwork: true
#dnsPolicy: ClusterFirstWithHostNet
tolerations:
- effect: NoSchedule
operator: Exists
resources:
requests:
cpu: 20m
memory: 48Mi
limits:
memory: 128Mi
config:
output:
host: telemetry-fluentd
sharedKey: secretref+k8s://v1/Secret/kube-system/kubezero-secrets/telemetry.fluentd.source.sharedKey
tls: false
input:
memBufLimit: 16MB
refreshInterval: 5
logLevel: info
flushInterval: 5
#extraRecords:
# source.clustername: MyKubeCluster
service: |
[SERVICE]
Flush {{ .Values.config.flushInterval }}
Daemon Off
Log_Level {{ .Values.config.logLevel }}
Parsers_File parsers.conf
Parsers_File custom_parsers.conf
HTTP_Server On
HTTP_Listen 0.0.0.0
HTTP_Port {{ .Values.service.port }}
Health_Check On
inputs: |
[INPUT]
Name tail
Path /var/log/containers/*.log
# Exclude ourselves to current error spam, https://github.com/fluent/fluent-bit/issues/5769
Exclude_Path *logging-fluent-bit*
multiline.parser cri
Tag cri.*
Skip_Long_Lines On
Skip_Empty_Lines On
DB /var/log/flb_kube.db
DB.Sync Normal
DB.locking true
# Buffer_Max_Size 1M
{{- with .Values.config.input }}
Mem_Buf_Limit {{ default "16MB" .memBufLimit }}
Refresh_Interval {{ default 5 .refreshInterval }}
{{- end }}
filters: |
[FILTER]
Name parser
Match cri.*
Parser cri-log
Key_Name log
[FILTER]
Name kubernetes
Match cri.*
Merge_Log On
Merge_Log_Key kube
Kube_Tag_Prefix cri.var.log.containers.
Keep_Log Off
K8S-Logging.Parser Off
K8S-Logging.Exclude Off
Kube_Meta_Cache_TTL 3600s
Buffer_Size 0
#Use_Kubelet true
{{- if index .Values "config" "extraRecords" }}
[FILTER]
Name record_modifier
Match cri.*
{{- range $k,$v := index .Values "config" "extraRecords" }}
Record {{ $k }} {{ $v }}
{{- end }}
{{- end }}
[FILTER]
Name rewrite_tag
Match cri.*
Emitter_Name kube_tag_rewriter
Rule $kubernetes['pod_id'] .* kube.$kubernetes['namespace_name'].$kubernetes['container_name'] false
[FILTER]
Name lua
Match kube.*
script /fluent-bit/scripts/kubezero.lua
call nest_k8s_ns
outputs: |
[OUTPUT]
Match *
Name forward
Host {{ .Values.config.output.host }}
Port 24224
Shared_Key {{ .Values.config.output.sharedKey }}
tls {{ ternary "on" "off" .Values.config.output.tls }}
Send_options true
Require_ack_response true
customParsers: |
[PARSER]
Name cri-log
Format regex
Regex ^(?<time>.+) (?<stream>stdout|stderr) (?<logtag>F|P) (?<log>.*)$
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L%z
luaScripts:
kubezero.lua: |
function nest_k8s_ns(tag, timestamp, record)
if not record['kubernetes']['namespace_name'] then
return 0, 0, 0
end
new_record = {}
for key, val in pairs(record) do
if key == 'kube' then
new_record[key] = {}
new_record[key][record['kubernetes']['namespace_name']] = record[key]
else
new_record[key] = record[key]
end
end
return 1, timestamp, new_record
end
daemonSetVolumes:
- name: varlog
hostPath:
path: /var/log
- name: newlog
hostPath:
path: /var/lib/containers/logs
daemonSetVolumeMounts:
- name: varlog
mountPath: /var/log
- name: newlog
mountPath: /var/lib/containers/logs