metrics: enabled: false data-prepper: enabled: false image: tag: 2.10.1 securityContext: capabilities: drop: - ALL #readOnlyRootFilesystem: true #runAsNonRoot: true #runAsUser: 1000 pipelineConfig: config: simple-sample-pipeline: null otel-trace-pipeline: workers: 1 delay: "100" source: otel_trace_source: ssl: false buffer: bounded_blocking: # default value is 12800 #buffer_size: 25600 # Default is 200. # Make sure buffer_size >= workers * batch_size #batch_size: 400 sink: - pipeline: name: "raw-traces-pipeline" - pipeline: name: "otel-service-map-pipeline" raw-traces-pipeline: workers: 1 delay: 3000 source: pipeline: name: "otel-trace-pipeline" buffer: bounded_blocking: #buffer_size: 25600 #batch_size: 400 processor: - otel_traces: - otel_trace_group: hosts: [ "https://telemetry:9200" ] insecure: true username: "admin" password: "admin" sink: - opensearch: hosts: ["https://telemetry:9200"] username: "admin" password: "admin" insecure: true index_type: trace-analytics-raw otel-service-map-pipeline: workers: 1 delay: 3000 source: pipeline: name: "otel-trace-pipeline" processor: - service_map: window_duration: 180 buffer: bounded_blocking: #buffer_size: 25600 #batch_size: 400 sink: - opensearch: hosts: ["https://telemetry:9200"] username: "admin" password: "admin" insecure: true index_type: trace-analytics-service-map #index: otel-v1-apm-span-%{yyyy.MM.dd} #max_retries: 20 bulk_size: 4 config: data-prepper-config.yaml: | ssl: false peer_forwarder: ssl: false log4j2-rolling.properties: | status = error dest = err name = PropertiesConfig appender.console.type = Console appender.console.name = STDOUT appender.console.layout.type = PatternLayout appender.console.layout.pattern = %d{ISO8601} [%t] %-5p %40C - %m%n rootLogger.level = warn rootLogger.appenderRef.stdout.ref = STDOUT logger.pipeline.name = org.opensearch.dataprepper.pipeline logger.pipeline.level = info logger.parser.name = org.opensearch.dataprepper.parser logger.parser.level = info logger.plugins.name = org.opensearch.dataprepper.plugins logger.plugins.level = info opentelemetry-collector: enabled: false mode: deployment image: repository: "otel/opentelemetry-collector-contrib" config: extensions: health_check: endpoint: ${env:MY_POD_IP}:13133 exporters: otlp/jaeger: endpoint: telemetry-jaeger-collector:4317 tls: insecure: true otlp/data-prepper: endpoint: telemetry-data-prepper:21890 tls: insecure: true receivers: otlp: protocols: grpc: endpoint: ${env:MY_POD_IP}:4317 http: endpoint: ${env:MY_POD_IP}:4318 service: telemetry: metrics: address: ${env:MY_POD_IP}:8888 extensions: - health_check pipelines: traces: exporters: - otlp/jaeger - otlp/data-prepper processors: - memory_limiter - batch receivers: - otlp ports: jaeger-compact: enabled: false jaeger-thrift: enabled: false jaeger-grpc: enabled: false zipkin: enabled: false metrics: enabled: true serviceMonitor: enabled: false podDisruptionBudget: enabled: false # minAvailable: 2 # maxUnavailable: 1 jaeger: enabled: false agent: enabled: false collector: service: otlp: grpc: name: otlp-grpc port: 4317 http: name: otlp-http port: 4318 serviceMonitor: enabled: false # https://www.jaegertracing.io/docs/1.53/deployment/#collector storage: type: elasticsearch elasticsearch: scheme: https host: telemetry user: admin password: admin cmdlineParams: es.tls.enabled: "" es.tls.skip-host-verify: "" es.num-replicas: 1 es.num-shards: 2 #es.tags-as-fields.all: "" provisionDataStore: cassandra: false elasticsearch: false query: agentSidecar: enabled: false serviceMonitor: enabled: false istio: enabled: false gateway: istio-ingress/private-ingressgateway url: jaeger.example.com opensearch: version: 2.17.0 prometheus: false # custom cluster settings #settings: # index.number_of_shards: 1 nodeSets: [] #- name: default-nodes # replicas: 2 # storage: # size: 16Gi # class: my-fancy-SSDs # zone: us-west-2a # resources: # limits: # #cpu: 1 # memory: 2Gi # requests: # cpu: 500m # memory: 2Gi dashboard: enabled: false istio: enabled: false gateway: istio-ingress/private-ingressgateway url: telemetry-dashboard.example.com # New logging pipeline fluentd: enabled: false image: repository: public.ecr.aws/zero-downtime/fluentd-concenter tag: v1.16.5-1-g09dc31c istio: enabled: false kind: StatefulSet replicaCount: 1 #plugins: #- fluent-plugin-s3 source: sharedKey: secretref+k8s://v1/Secret/kubezero/kubezero-secrets/telemetry.fluentd.source.sharedKey # "cloudbender" output: # Defaults to OpenSearch in same namespace host: telemetry user: admin password: admin service: ports: - name: tcp-forward protocol: TCP containerPort: 24224 - name: http-fluentd protocol: TCP containerPort: 9880 metrics: serviceMonitor: enabled: false dashboards: enabled: false # No need for docker nor /var/log mountVarLogDirectory: false mountDockerContainersDirectory: false # no rbac required until we need WebAuth identity for eg. s3 rbac: create: false resources: requests: cpu: 200m memory: 256Mi limits: memory: 512Mi persistence: enabled: true storageClass: "" size: 1Gi volumes: - name: trust-store secret: secretName: telemetry-nodes-http-tls items: - key: tls.crt path: ca.crt volumeMounts: - name: trust-store mountPath: "/run/pki" readOnly: true securityContext: capabilities: drop: - ALL #readOnlyRootFilesystem: true runAsNonRoot: true runAsUser: 100 configMapConfigs: - fluentd-prometheus-conf fileConfigs: 00_system.conf: |- <system> root_dir /fluentd/log log_level info ignore_repeated_log_interval 60s ignore_same_log_interval 60s workers 1 </system> 01_sources.conf: |- <source> @type http @label @KUBERNETES port 9880 bind 0.0.0.0 keepalive_timeout 30 </source> <source> @type forward @label @KUBERNETES port 24224 bind 0.0.0.0 # skip_invalid_event true send_keepalive_packet true <security> self_hostname "telemetry-fluentd" shared_key {{ .Values.source.sharedKey }} </security> </source> 02_filters.conf: |- <label @KUBERNETES> # prevent log feedback loops, discard logs from our own pods <match kube.logging.fluentd> @type relabel @label @FLUENT_LOG </match> # Exclude current fluent-bit multiline noise # Still relevant ?? <filter kube.logging.fluent-bit> @type grep <exclude> key log pattern /could not append content to multiline context/ </exclude> </filter> # Generate Hash ID to break endless loop for already ingested events during retries <filter **> @type opensearch_genid use_entire_record true </filter> # Route through DISPATCH for Prometheus metrics <match **> @type relabel @label @DISPATCH </match> </label> 04_outputs.conf: |- <label @OUTPUT> <match **> @id out_os @type opensearch # @log_level debug include_tag_key true id_key _hash remove_keys _hash write_operation create # we have oj in the fluentd-concenter image prefer_oj_serializer true # KubeZero pipeline incl. GeoIP etc. #pipeline fluentd http_backend typhoeus ca_file /run/pki/ca.crt port 9200 scheme https hosts {{ .Values.output.host }} user {{ .Values.output.user }} password {{ .Values.output.password }} log_es_400_reason logstash_format true reconnect_on_error true reload_on_failure true request_timeout 300s #sniffer_class_name Fluent::Plugin::OpenSearchSimpleSniffer #with_transporter_log true verify_es_version_at_startup false default_opensearch_version 2 #suppress_type_name true # Retry failed bulk requests # https://github.com/uken/fluent-plugin-elasticsearch#unrecoverable-error-types unrecoverable_error_types ["out_of_memory_error"] bulk_message_request_threshold 1048576 <buffer> @type file flush_mode interval flush_thread_count 2 flush_interval 10s chunk_limit_size 2MB total_limit_size 1GB flush_at_shutdown true retry_type exponential_backoff retry_timeout 6h overflow_action drop_oldest_chunk disable_chunk_backup true </buffer> </match> </label> fluent-bit: enabled: false #image: #repository: public.ecr.aws/zero-downtime/fluent-bit #tag: 2.0.10 testFramework: enabled: false service: internalTrafficPolicy: Local extraPorts: - name: otel port: 4318 containerPort: 4318 protocol: TCP serviceMonitor: enabled: false #rbac: # nodeAccess: true #hostNetwork: true #dnsPolicy: ClusterFirstWithHostNet tolerations: - effect: NoSchedule operator: Exists resources: requests: cpu: 20m memory: 48Mi limits: memory: 128Mi config: output: host: telemetry-fluentd sharedKey: secretref+k8s://v1/Secret/kubezero/kubezero-secrets/telemetry.fluentd.source.sharedKey tls: false output_otel: host: telemetry-opentelemetry-collector input: memBufLimit: 16MB refreshInterval: 5 logLevel: info flushInterval: 5 #extraRecords: # source.clustername: MyKubeCluster service: | [SERVICE] Flush {{ .Values.config.flushInterval }} Daemon Off Log_Level {{ .Values.config.logLevel }} Parsers_File parsers.conf Parsers_File custom_parsers.conf HTTP_Server On HTTP_Listen 0.0.0.0 HTTP_Port {{ .Values.service.port }} Health_Check On inputs: | [INPUT] Name tail Path /var/log/containers/*.log # Exclude ourselves to current error spam, https://github.com/fluent/fluent-bit/issues/5769 # Todo: Rather limit / filter spam message than exclude all together -> ideally locally, next dataprepper Exclude_Path *logging-fluent-bit* multiline.parser cri Tag cri.* Skip_Long_Lines On Skip_Empty_Lines On DB /var/log/flb_kube.db DB.Sync Normal DB.locking true # Buffer_Max_Size 1M {{- with .Values.config.input }} Mem_Buf_Limit {{ .memBufLimit }} Refresh_Interval {{ .refreshInterval }} {{- end }} [INPUT] Name opentelemetry Tag otel filters: | [FILTER] Name parser Match cri.* Parser cri-log Key_Name log [FILTER] Name kubernetes Match cri.* Merge_Log On Merge_Log_Key kube Kube_Tag_Prefix cri.var.log.containers. Keep_Log Off Annotations Off K8S-Logging.Parser Off K8S-Logging.Exclude Off Kube_Meta_Cache_TTL 3600s Buffer_Size 0 #Use_Kubelet true {{- if index .Values "config" "extraRecords" }} [FILTER] Name record_modifier Match cri.* {{- range $k,$v := index .Values "config" "extraRecords" }} Record {{ $k }} {{ $v }} {{- end }} {{- end }} [FILTER] Name rewrite_tag Match cri.* Emitter_Name kube_tag_rewriter Rule $kubernetes['pod_id'] .* kube.$kubernetes['namespace_name'].$kubernetes['container_name'] false [FILTER] Name lua Match kube.* script /fluent-bit/scripts/kubezero.lua call nest_k8s_ns outputs: | [OUTPUT] Match kube.* Name forward Host {{ .Values.config.output.host }} Port 24224 Shared_Key {{ .Values.config.output.sharedKey }} tls {{ ternary "on" "off" .Values.config.output.tls }} Send_options true Require_ack_response true [OUTPUT] Name opentelemetry Match otel Host {{ .Values.config.output_otel.host }} Port 4318 #Metrics_uri /v1/metrics Traces_uri /v1/traces #Logs_uri /v1/logs customParsers: | [PARSER] Name cri-log Format regex Regex ^(?<time>.+) (?<stream>stdout|stderr) (?<logtag>F|P) (?<log>.*)$ Time_Key time Time_Format %Y-%m-%dT%H:%M:%S.%L%z luaScripts: kubezero.lua: | function nest_k8s_ns(tag, timestamp, record) if not record['kubernetes']['namespace_name'] then return 0, 0, 0 end new_record = {} for key, val in pairs(record) do if key == 'kube' then new_record[key] = {} new_record[key][record['kubernetes']['namespace_name']] = record[key] else new_record[key] = record[key] end end return 1, timestamp, new_record end daemonSetVolumes: - name: varlog hostPath: path: /var/log - name: newlog hostPath: path: /var/lib/containers/logs daemonSetVolumeMounts: - name: varlog mountPath: /var/log - name: newlog mountPath: /var/lib/containers/logs