fix[logging]: Use custom fluentd image, further tune fluentd ES connection

This commit is contained in:
Stefan Reimer 2021-09-03 19:21:14 +02:00
parent ab1e2c5922
commit f803531dc8
3 changed files with 28 additions and 21 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-logging
description: KubeZero Umbrella Chart for complete EFK stack
type: application
version: 0.7.10
version: 0.7.12
appVersion: 1.6.0
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png

View File

@ -1,6 +1,6 @@
# kubezero-logging
![Version: 0.7.6](https://img.shields.io/badge/Version-0.7.6-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.6.0](https://img.shields.io/badge/AppVersion-1.6.0-informational?style=flat-square)
![Version: 0.7.11](https://img.shields.io/badge/Version-0.7.11-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.6.0](https://img.shields.io/badge/AppVersion-1.6.0-informational?style=flat-square)
KubeZero Umbrella Chart for complete EFK stack
@ -19,8 +19,8 @@ Kubernetes: `>= 1.18.0`
| Repository | Name | Version |
|------------|------|---------|
| | eck-operator | 1.6.0 |
| | fluent-bit | 0.15.15 |
| | fluentd | 0.2.6 |
| | fluent-bit | 0.16.3 |
| | fluentd | 0.2.10 |
| https://zero-down-time.github.io/kubezero/ | kubezero-lib | >= 0.1.3 |
## Changes from upstream
@ -78,7 +78,7 @@ Kubernetes: `>= 1.18.0`
| fluent-bit.config.output.sharedKey | string | `"cloudbender"` | |
| fluent-bit.config.output.tls | bool | `false` | |
| fluent-bit.config.outputs | string | `"[OUTPUT]\n Match *\n Name forward\n Host {{ .Values.config.output.host }}\n Port 24224\n Shared_Key {{ .Values.config.output.sharedKey }}\n tls {{ ternary \"on\" \"off\" .Values.config.output.tls }}\n Send_options true\n Require_ack_response true\n"` | |
| fluent-bit.config.service | string | `"[SERVICE]\n Flush {{ .Values.config.flushInterval }}\n Daemon Off\n Log_Level {{ .Values.config.logLevel }}\n Parsers_File parsers.conf\n Parsers_File custom_parsers.conf\n HTTP_Server On\n HTTP_Listen 0.0.0.0\n HTTP_Port {{ .Values.service.port }}\n"` | |
| fluent-bit.config.service | string | `"[SERVICE]\n Flush {{ .Values.config.flushInterval }}\n Daemon Off\n Log_Level {{ .Values.config.logLevel }}\n Parsers_File parsers.conf\n Parsers_File custom_parsers.conf\n HTTP_Server On\n HTTP_Listen 0.0.0.0\n HTTP_Port {{ .Values.service.port }}\n Health_Check On\n"` | |
| fluent-bit.daemonSetVolumeMounts[0].mountPath | string | `"/var/log"` | |
| fluent-bit.daemonSetVolumeMounts[0].name | string | `"varlog"` | |
| fluent-bit.daemonSetVolumeMounts[1].mountPath | string | `"/etc/machine-id"` | |
@ -90,7 +90,7 @@ Kubernetes: `>= 1.18.0`
| fluent-bit.daemonSetVolumes[1].hostPath.type | string | `"File"` | |
| fluent-bit.daemonSetVolumes[1].name | string | `"etcmachineid"` | |
| fluent-bit.enabled | bool | `false` | |
| fluent-bit.image.tag | string | `"1.8.3"` | |
| fluent-bit.image.tag | string | `"1.8.5"` | |
| fluent-bit.luaScripts."kubezero.lua" | string | `"function nest_k8s_ns(tag, timestamp, record)\n if not record['kubernetes']['namespace_name'] then\n return 0, 0, 0\n end\n new_record = {}\n for key, val in pairs(record) do\n if key == 'kube' then\n new_record[key] = {}\n new_record[key][record['kubernetes']['namespace_name']] = record[key]\n else\n new_record[key] = record[key]\n end\n end\n return 1, timestamp, new_record\nend\n"` | |
| fluent-bit.resources.limits.memory | string | `"64Mi"` | |
| fluent-bit.resources.requests.cpu | string | `"20m"` | |
@ -109,17 +109,18 @@ Kubernetes: `>= 1.18.0`
| fluentd.env[1].name | string | `"OUTPUT_PASSWORD"` | |
| fluentd.env[1].valueFrom.secretKeyRef.key | string | `"elastic"` | |
| fluentd.env[1].valueFrom.secretKeyRef.name | string | `"logging-es-elastic-user"` | |
| fluentd.fileConfigs."00_system.conf" | string | `"<system>\n root_dir /var/log/fluentd\n log_level info\n ignore_repeated_log_interval 60s\n ignore_same_log_interval 60s\n workers 2\n</system>"` | |
| fluentd.fileConfigs."00_system.conf" | string | `"<system>\n root_dir /var/log/fluentd\n log_level info\n ignore_repeated_log_interval 60s\n ignore_same_log_interval 60s\n workers 1\n</system>"` | |
| fluentd.fileConfigs."01_sources.conf" | string | `"<source>\n @type http\n @label @KUBERNETES\n port 9880\n bind 0.0.0.0\n keepalive_timeout 30\n</source>\n\n<source>\n @type forward\n @label @KUBERNETES\n port 24224\n bind 0.0.0.0\n # skip_invalid_event true\n send_keepalive_packet true\n <security>\n self_hostname \"#{ENV['HOSTNAME']}\"\n shared_key {{ .Values.shared_key }}\n </security>\n</source>"` | |
| fluentd.fileConfigs."02_filters.conf" | string | `"<label @KUBERNETES>\n # prevent log feedback loops eg. ES has issues etc.\n # discard logs from our own pods\n <match kube.logging.fluentd>\n @type relabel\n @label @FLUENT_LOG\n </match>\n\n # Exclude current fluent-bit multiline noise\n <filter kube.logging.fluent-bit>\n @type grep\n <exclude>\n key log\n pattern /could not append content to multiline context/\n </exclude>\n </filter>\n\n <match **>\n @type relabel\n @label @DISPATCH\n </match>\n</label>"` | |
| fluentd.fileConfigs."04_outputs.conf" | string | `"<label @OUTPUT>\n <match **>\n @id out_es\n @type elasticsearch\n # @log_level debug\n include_tag_key true\n id_key id\n remove_keys id\n\n # KubeZero pipeline incl. GeoIP etc.\n pipeline fluentd\n\n hosts \"{{ .Values.output.host }}\"\n port 9200\n scheme http\n user elastic\n password \"#{ENV['OUTPUT_PASSWORD']}\"\n\n log_es_400_reason\n logstash_format true\n reconnect_on_error true\n reload_on_failure true\n request_timeout 60s\n suppress_type_name true\n slow_flush_log_threshold 55.0\n\n # Retry failed bulk requests\n # https://github.com/uken/fluent-plugin-elasticsearch#unrecoverable-error-types\n unrecoverable_error_types [\"out_of_memory_error\"]\n bulk_message_request_threshold 2097152\n\n <buffer>\n @type file\n\n flush_mode interval\n flush_thread_count 1\n flush_interval 30s\n\n chunk_limit_size 8MB\n total_limit_size 2GB\n\n flush_at_shutdown true\n retry_type exponential_backoff\n retry_timeout 6h\n overflow_action drop_oldest_chunk\n disable_chunk_backup true\n </buffer>\n </match>\n</label>"` | |
| fluentd.image.repository | string | `"fluent/fluentd-kubernetes-daemonset"` | |
| fluentd.image.tag | string | `"v1-debian-elasticsearch"` | |
| fluentd.fileConfigs."02_filters.conf" | string | `"<label @KUBERNETES>\n # prevent log feedback loops eg. ES has issues etc.\n # discard logs from our own pods\n <match kube.logging.fluentd>\n @type relabel\n @label @FLUENT_LOG\n </match>\n\n # Exclude current fluent-bit multiline noise\n <filter kube.logging.fluent-bit>\n @type grep\n <exclude>\n key log\n pattern /could not append content to multiline context/\n </exclude>\n </filter>\n\n # Generate Hash ID to break endless loop for already ingested events during retries\n <filter **>\n @type elasticsearch_genid\n use_entire_record true\n </filter>\n\n # Route through DISPATCH for Prometheus metrics\n <match **>\n @type relabel\n @label @DISPATCH\n </match>\n</label>"` | |
| fluentd.fileConfigs."04_outputs.conf" | string | `"<label @OUTPUT>\n <match **>\n @id out_es\n @type elasticsearch\n # @log_level debug\n include_tag_key true\n\n id_key _hash\n remove_keys _hash\n write_operation create\n\n # KubeZero pipeline incl. GeoIP etc.\n pipeline fluentd\n\n hosts \"{{ .Values.output.host }}\"\n port 9200\n scheme http\n user elastic\n password \"#{ENV['OUTPUT_PASSWORD']}\"\n\n log_es_400_reason\n logstash_format true\n reconnect_on_error true\n reload_on_failure true\n request_timeout 60s\n slow_flush_log_threshold 55.0\n\n with_transporter_log true\n\n verify_es_version_at_startup false\n default_elasticsearch_version 7\n suppress_type_name true\n\n # Retry failed bulk requests\n # https://github.com/uken/fluent-plugin-elasticsearch#unrecoverable-error-types\n unrecoverable_error_types [\"out_of_memory_error\"]\n bulk_message_request_threshold 2097152\n\n <buffer>\n @type file\n\n flush_mode interval\n flush_thread_count 2\n flush_interval 30s\n\n chunk_limit_size 4MB\n total_limit_size 2GB\n\n flush_at_shutdown true\n retry_type exponential_backoff\n retry_timeout 6h\n overflow_action drop_oldest_chunk\n disable_chunk_backup true\n </buffer>\n </match>\n</label>"` | |
| fluentd.image.repository | string | `"public.ecr.aws/zero-downtime/fluentd-concenter"` | |
| fluentd.image.tag | string | `"v1.14.1"` | |
| fluentd.istio.enabled | bool | `false` | |
| fluentd.kind | string | `"Deployment"` | |
| fluentd.metrics.serviceMonitor.additionalLabels.release | string | `"metrics"` | |
| fluentd.metrics.serviceMonitor.enabled | bool | `false` | |
| fluentd.output.host | string | `"logging-es-http"` | |
| fluentd.podSecurityPolicy.enabled | bool | `false` | |
| fluentd.replicaCount | int | `1` | |
| fluentd.resources.limits.memory | string | `"512Mi"` | |
| fluentd.resources.requests.cpu | string | `"200m"` | |

View File

@ -45,9 +45,8 @@ kibana:
fluentd:
enabled: false
image:
repository: fluent/fluentd-kubernetes-daemonset
# tag: v1-debian-elasticsearch
tag: v1.11.1-debian-elasticsearch7-1.3
repository: public.ecr.aws/zero-downtime/fluentd-concenter
tag: v1.14.1
istio:
enabled: false
@ -123,7 +122,7 @@ fluentd:
fileConfigs:
00_system.conf: |-
<system>
root_dir /var/log/fluentd
root_dir /fluentd/log
log_level info
ignore_repeated_log_interval 60s
ignore_same_log_interval 60s
@ -189,8 +188,10 @@ fluentd:
@type elasticsearch
# @log_level debug
include_tag_key true
id_key _hash
remove_keys _hash
write_operation create
# KubeZero pipeline incl. GeoIP etc.
pipeline fluentd
@ -205,24 +206,29 @@ fluentd:
logstash_format true
reconnect_on_error true
reload_on_failure true
request_timeout 60s
suppress_type_name true
request_timeout 300s
slow_flush_log_threshold 55.0
#with_transporter_log true
verify_es_version_at_startup false
default_elasticsearch_version 7
suppress_type_name true
# Retry failed bulk requests
# https://github.com/uken/fluent-plugin-elasticsearch#unrecoverable-error-types
unrecoverable_error_types ["out_of_memory_error"]
bulk_message_request_threshold 2097152
bulk_message_request_threshold 1048576
<buffer>
@type file
flush_mode interval
flush_thread_count 2
flush_interval 30s
flush_interval 10s
chunk_limit_size 4MB
total_limit_size 2GB
chunk_limit_size 2MB
total_limit_size 1GB
flush_at_shutdown true
retry_type exponential_backoff