From 4b349aac76d20c8265df1648ae0595be331e695d Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Tue, 16 Jul 2024 13:32:36 +0000 Subject: [PATCH] feat: point release for telemetry module beta roll out --- admin/libhelm.sh | 7 +- charts/kubezero-telemetry/Chart.yaml | 7 +- charts/kubezero-telemetry/README.md | 27 +++-- charts/kubezero-telemetry/values.yaml | 123 +++++++++++++++++++++-- charts/kubezero/Chart.yaml | 2 +- charts/kubezero/templates/telemetry.yaml | 17 ++++ charts/kubezero/values.yaml | 4 +- 7 files changed, 160 insertions(+), 27 deletions(-) diff --git a/admin/libhelm.sh b/admin/libhelm.sh index ac8e7b65..0826421c 100644 --- a/admin/libhelm.sh +++ b/admin/libhelm.sh @@ -158,9 +158,10 @@ function _helm() { local namespace="$(yq eval '.spec.destination.namespace' $WORKDIR/kubezero/templates/${module}.yaml)" targetRevision="" - _version="$(yq eval '.spec.source.targetRevision' $WORKDIR/kubezero/templates/${module}.yaml)" - - [ -n "$_version" ] && targetRevision="--version $_version" + if [ -z "$LOCAL_DEV" ]; then + _version="$(yq eval '.spec.source.targetRevision' $WORKDIR/kubezero/templates/${module}.yaml)" + [ -n "$_version" ] && targetRevision="--version $_version" + fi yq eval '.spec.source.helm.values' $WORKDIR/kubezero/templates/${module}.yaml > $WORKDIR/values.yaml diff --git a/charts/kubezero-telemetry/Chart.yaml b/charts/kubezero-telemetry/Chart.yaml index 4653e6b9..e692ac51 100644 --- a/charts/kubezero-telemetry/Chart.yaml +++ b/charts/kubezero-telemetry/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-telemetry description: KubeZero Umbrella Chart for OpenTelemetry, Jaeger etc. type: application -version: 0.3.3 +version: 0.4.0 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: @@ -10,6 +10,7 @@ keywords: - otel - OpenTelemetry - jaeger + - dataprepper maintainers: - name: Stefan Reimer email: stefan@zero-downtime.net @@ -21,6 +22,10 @@ dependencies: version: 0.97.1 repository: https://open-telemetry.github.io/opentelemetry-helm-charts condition: opentelemetry-collector.enabled + - name: data-prepper + version: 0.1.0 + repository: https://opensearch-project.github.io/helm-charts/ + condition: data-prepper.enabled - name: jaeger version: 3.1.1 repository: https://jaegertracing.github.io/helm-charts diff --git a/charts/kubezero-telemetry/README.md b/charts/kubezero-telemetry/README.md index 28553899..ded18109 100644 --- a/charts/kubezero-telemetry/README.md +++ b/charts/kubezero-telemetry/README.md @@ -1,6 +1,6 @@ # kubezero-telemetry -![Version: 0.3.3](https://img.shields.io/badge/Version-0.3.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) KubeZero Umbrella Chart for OpenTelemetry, Jaeger etc. @@ -23,6 +23,7 @@ Kubernetes: `>= 1.26.0` | https://fluent.github.io/helm-charts | fluentd | 0.5.2 | | https://jaegertracing.github.io/helm-charts | jaeger | 3.1.1 | | https://open-telemetry.github.io/opentelemetry-helm-charts | opentelemetry-collector | 0.97.1 | +| https://opensearch-project.github.io/helm-charts/ | data-prepper | 0.1.0 | ## Values @@ -33,12 +34,13 @@ Kubernetes: `>= 1.26.0` | fluent-bit.config.flushInterval | int | `5` | | | fluent-bit.config.input.memBufLimit | string | `"16MB"` | | | fluent-bit.config.input.refreshInterval | int | `5` | | -| fluent-bit.config.inputs | string | `"[INPUT]\n Name tail\n Path /var/log/containers/*.log\n # Exclude ourselves to current error spam, https://github.com/fluent/fluent-bit/issues/5769\n Exclude_Path *logging-fluent-bit*\n multiline.parser cri\n Tag cri.*\n Skip_Long_Lines On\n Skip_Empty_Lines On\n DB /var/log/flb_kube.db\n DB.Sync Normal\n DB.locking true\n # Buffer_Max_Size 1M\n {{- with .Values.config.input }}\n Mem_Buf_Limit {{ default \"16MB\" .memBufLimit }}\n Refresh_Interval {{ default 5 .refreshInterval }}\n {{- end }}\n"` | | +| fluent-bit.config.inputs | string | `"[INPUT]\n Name tail\n Path /var/log/containers/*.log\n # Exclude ourselves to current error spam, https://github.com/fluent/fluent-bit/issues/5769\n # Todo: Rather limit / filter spam message than exclude all together -> ideally locally, next dataprepper\n Exclude_Path *logging-fluent-bit*\n multiline.parser cri\n Tag cri.*\n Skip_Long_Lines On\n Skip_Empty_Lines On\n DB /var/log/flb_kube.db\n DB.Sync Normal\n DB.locking true\n # Buffer_Max_Size 1M\n {{- with .Values.config.input }}\n Mem_Buf_Limit {{ .memBufLimit }}\n Refresh_Interval {{ .refreshInterval }}\n {{- end }}\n\n[INPUT]\n Name opentelemetry\n Tag otel\n"` | | | fluent-bit.config.logLevel | string | `"info"` | | | fluent-bit.config.output.host | string | `"telemetry-fluentd"` | | | fluent-bit.config.output.sharedKey | string | `"secretref+k8s://v1/Secret/kube-system/kubezero-secrets/telemetry.fluentd.source.sharedKey"` | | | fluent-bit.config.output.tls | bool | `false` | | -| fluent-bit.config.outputs | string | `"[OUTPUT]\n Match *\n Name forward\n Host {{ .Values.config.output.host }}\n Port 24224\n Shared_Key {{ .Values.config.output.sharedKey }}\n tls {{ ternary \"on\" \"off\" .Values.config.output.tls }}\n Send_options true\n Require_ack_response true\n"` | | +| fluent-bit.config.output_otel.host | string | `"telemetry-opentelemetry-collector"` | | +| fluent-bit.config.outputs | string | `"[OUTPUT]\n Match kube.*\n Name forward\n Host {{ .Values.config.output.host }}\n Port 24224\n Shared_Key {{ .Values.config.output.sharedKey }}\n tls {{ ternary \"on\" \"off\" .Values.config.output.tls }}\n Send_options true\n Require_ack_response true\n\n[OUTPUT]\n Name opentelemetry\n Match otel\n Host {{ .Values.config.output_otel.host }}\n Port 4318\n #Metrics_uri /v1/metrics\n Traces_uri /v1/traces\n #Logs_uri /v1/logs\n"` | | | fluent-bit.config.service | string | `"[SERVICE]\n Flush {{ .Values.config.flushInterval }}\n Daemon Off\n Log_Level {{ .Values.config.logLevel }}\n Parsers_File parsers.conf\n Parsers_File custom_parsers.conf\n HTTP_Server On\n HTTP_Listen 0.0.0.0\n HTTP_Port {{ .Values.service.port }}\n Health_Check On\n"` | | | fluent-bit.daemonSetVolumeMounts[0].mountPath | string | `"/var/log"` | | | fluent-bit.daemonSetVolumeMounts[0].name | string | `"varlog"` | | @@ -49,10 +51,15 @@ Kubernetes: `>= 1.26.0` | fluent-bit.daemonSetVolumes[1].hostPath.path | string | `"/var/lib/containers/logs"` | | | fluent-bit.daemonSetVolumes[1].name | string | `"newlog"` | | | fluent-bit.enabled | bool | `false` | | +| fluent-bit.extraPorts[0].containerPort | int | `4318` | | +| fluent-bit.extraPorts[0].name | string | `"otel"` | | +| fluent-bit.extraPorts[0].port | int | `4318` | | +| fluent-bit.extraPorts[0].protocol | string | `"TCP"` | | | fluent-bit.luaScripts."kubezero.lua" | string | `"function nest_k8s_ns(tag, timestamp, record)\n if not record['kubernetes']['namespace_name'] then\n return 0, 0, 0\n end\n new_record = {}\n for key, val in pairs(record) do\n if key == 'kube' then\n new_record[key] = {}\n new_record[key][record['kubernetes']['namespace_name']] = record[key]\n else\n new_record[key] = record[key]\n end\n end\n return 1, timestamp, new_record\nend\n"` | | | fluent-bit.resources.limits.memory | string | `"128Mi"` | | | fluent-bit.resources.requests.cpu | string | `"20m"` | | | fluent-bit.resources.requests.memory | string | `"48Mi"` | | +| fluent-bit.service.internalTrafficPolicy | string | `"Local"` | | | fluent-bit.serviceMonitor.enabled | bool | `false` | | | fluent-bit.testFramework.enabled | bool | `false` | | | fluent-bit.tolerations[0].effect | string | `"NoSchedule"` | | @@ -100,9 +107,6 @@ Kubernetes: `>= 1.26.0` | fluentd.volumes[0].secret.items[0].path | string | `"ca.crt"` | | | fluentd.volumes[0].secret.secretName | string | `"telemetry-nodes-http-tls"` | | | jaeger.agent.enabled | bool | `false` | | -| jaeger.collector.enabled | bool | `false` | | -| jaeger.collector.extraEnv[0].name | string | `"ES_TAGS_AS_FIELDS_ALL"` | | -| jaeger.collector.extraEnv[0].value | string | `"true"` | | | jaeger.collector.service.otlp.grpc.name | string | `"otlp-grpc"` | | | jaeger.collector.service.otlp.grpc.port | int | `4317` | | | jaeger.collector.service.otlp.http.name | string | `"otlp-http"` | | @@ -130,12 +134,8 @@ Kubernetes: `>= 1.26.0` | opensearch.nodeSets | list | `[]` | | | opensearch.prometheus | bool | `false` | | | opensearch.version | string | `"2.15.0"` | | -| opentelemetry-collector.config.exporters.opensearch/trace.http.auth.authenticator | string | `"basicauth/client"` | | -| opentelemetry-collector.config.exporters.opensearch/trace.http.endpoint | string | `"https://telemetry:9200"` | | -| opentelemetry-collector.config.exporters.opensearch/trace.http.tls.insecure | bool | `false` | | -| opentelemetry-collector.config.exporters.opensearch/trace.http.tls.insecure_skip_verify | bool | `true` | | -| opentelemetry-collector.config.extensions.basicauth/client.client_auth.password | string | `"admin"` | | -| opentelemetry-collector.config.extensions.basicauth/client.client_auth.username | string | `"admin"` | | +| opentelemetry-collector.config.exporters.otlp/jaeger.endpoint | string | `"telemetry-jaeger-collector:4317"` | | +| opentelemetry-collector.config.exporters.otlp/jaeger.tls.insecure | bool | `true` | | | opentelemetry-collector.config.extensions.health_check.endpoint | string | `"${env:MY_POD_IP}:13133"` | | | opentelemetry-collector.config.extensions.memory_ballast | object | `{}` | | | opentelemetry-collector.config.processors.batch | object | `{}` | | @@ -146,10 +146,9 @@ Kubernetes: `>= 1.26.0` | opentelemetry-collector.config.receivers.zipkin | string | `nil` | | | opentelemetry-collector.config.service.extensions[0] | string | `"health_check"` | | | opentelemetry-collector.config.service.extensions[1] | string | `"memory_ballast"` | | -| opentelemetry-collector.config.service.extensions[2] | string | `"basicauth/client"` | | | opentelemetry-collector.config.service.pipelines.logs | string | `nil` | | | opentelemetry-collector.config.service.pipelines.metrics | string | `nil` | | -| opentelemetry-collector.config.service.pipelines.traces.exporters[0] | string | `"opensearch/trace"` | | +| opentelemetry-collector.config.service.pipelines.traces.exporters[0] | string | `"otlp/jaeger"` | | | opentelemetry-collector.config.service.pipelines.traces.processors[0] | string | `"memory_limiter"` | | | opentelemetry-collector.config.service.pipelines.traces.processors[1] | string | `"batch"` | | | opentelemetry-collector.config.service.pipelines.traces.receivers[0] | string | `"otlp"` | | diff --git a/charts/kubezero-telemetry/values.yaml b/charts/kubezero-telemetry/values.yaml index 10a8367d..21daba55 100644 --- a/charts/kubezero-telemetry/values.yaml +++ b/charts/kubezero-telemetry/values.yaml @@ -1,3 +1,109 @@ +data-prepper: + enabled: false + + securityContext: + capabilities: + drop: + - ALL + #readOnlyRootFilesystem: true + #runAsNonRoot: true + #runAsUser: 1000 + + pipelineConfig: + config: + simple-sample-pipeline: null + otel-trace-pipeline: + workers: 1 + delay: "100" + source: + otel_trace_source: + ssl: false + buffer: + bounded_blocking: + # default value is 12800 + #buffer_size: 25600 + # Default is 200. + # Make sure buffer_size >= workers * batch_size + #batch_size: 400 + sink: + - pipeline: + name: "raw-traces-pipeline" + - pipeline: + name: "otel-service-map-pipeline" + raw-traces-pipeline: + workers: 1 + delay: 3000 + source: + pipeline: + name: "otel-trace-pipeline" + buffer: + bounded_blocking: + #buffer_size: 25600 + #batch_size: 400 + processor: + - otel_traces: + - otel_trace_group: + hosts: [ "https://telemetry:9200" ] + insecure: true + username: "admin" + password: "admin" + sink: + - opensearch: + hosts: ["https://telemetry:9200"] + username: "admin" + password: "admin" + insecure: true + index_type: trace-analytics-raw + otel-service-map-pipeline: + workers: 1 + delay: 3000 + source: + pipeline: + name: "otel-trace-pipeline" + processor: + - service_map: + # The window duration is the maximum length of time the data prepper stores the most recent trace data to evaluvate service-map relationships. + # The default is 3 minutes, this means we can detect relationships between services from spans reported in last 3 minutes. + # Set higher value if your applications have higher latency. + window_duration: 180 + buffer: + bounded_blocking: + #buffer_size: 25600 + #batch_size: 400 + sink: + - opensearch: + hosts: ["https://telemetry:9200"] + username: "admin" + password: "admin" + insecure: true + index_type: trace-analytics-service-map + #index: otel-v1-apm-span-%{yyyy.MM.dd} + #max_retries: 20 + bulk_size: 4 + + config: + log4j2-rolling.properties: | + status = error + dest = err + name = PropertiesConfig + + appender.console.type = Console + appender.console.name = STDOUT + appender.console.layout.type = PatternLayout + appender.console.layout.pattern = %d{ISO8601} [%t] %-5p %40C - %m%n + + rootLogger.level = warn + rootLogger.appenderRef.stdout.ref = STDOUT + + logger.pipeline.name = org.opensearch.dataprepper.pipeline + logger.pipeline.level = info + + logger.parser.name = org.opensearch.dataprepper.parser + logger.parser.level = info + + logger.plugins.name = org.opensearch.dataprepper.plugins + logger.plugins.level = info + opentelemetry-collector: enabled: false @@ -17,6 +123,10 @@ opentelemetry-collector: endpoint: telemetry-jaeger-collector:4317 tls: insecure: true + otlp/data-prepper: + endpoint: telemetry-data-prepper:21890 + tls: + insecure: true processors: batch: {} @@ -45,6 +155,7 @@ opentelemetry-collector: traces: exporters: - otlp/jaeger + - otlp/data-prepper processors: - memory_limiter - batch @@ -83,12 +194,9 @@ jaeger: grpc: name: otlp-grpc port: 4317 - # http: - # name: otlp-http - # port: 4318 - #extraEnv: - # - name: ES_TAGS_AS_FIELDS_ALL - # value: "true" + http: + name: otlp-http + port: 4318 serviceMonitor: enabled: false @@ -103,6 +211,9 @@ jaeger: cmdlineParams: es.tls.enabled: "" es.tls.skip-host-verify: "" + es.num-replicas: 1 + es.num-shards: 2 + #es.tags-as-fields.all: "" provisionDataStore: cassandra: false diff --git a/charts/kubezero/Chart.yaml b/charts/kubezero/Chart.yaml index f6982b53..f7fbf3cd 100644 --- a/charts/kubezero/Chart.yaml +++ b/charts/kubezero/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero description: KubeZero - Root App of Apps chart type: application -version: 1.28.9-1 +version: 1.28.9-2 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: diff --git a/charts/kubezero/templates/telemetry.yaml b/charts/kubezero/templates/telemetry.yaml index fa93fc60..d2ae8259 100644 --- a/charts/kubezero/templates/telemetry.yaml +++ b/charts/kubezero/templates/telemetry.yaml @@ -21,6 +21,23 @@ fluentd: enabled: {{ .Values.metrics.enabled }} {{- end }} +{{- if index .Values "telemetry" "data-prepper" }} +data-prepper: + {{- with index .Values "telemetry" "data-prepper" }} + {{- toYaml . | nindent 2 }} + {{- end }} +{{- end }} + +{{- if index .Values "telemetry" "opentelemetry-collector" }} +opentelemetry-collector: + {{- with index .Values "telemetry" "opentelemetry-collector" }} + {{- toYaml . | nindent 2 }} + {{- end }} + + serviceMonitor: + enabled: {{ .Values.metrics.enabled }} +{{- end }} + {{- if .Values.telemetry.jaeger }} jaeger: {{- with .Values.telemetry.jaeger }} diff --git a/charts/kubezero/values.yaml b/charts/kubezero/values.yaml index 8fb693f0..33641b9b 100644 --- a/charts/kubezero/values.yaml +++ b/charts/kubezero/values.yaml @@ -85,12 +85,12 @@ falco: telemetry: enabled: false namespace: telemetry - targetRevision: 0.3.0 + targetRevision: 0.4.0 operators: enabled: false namespace: operators - targetRevision: 0.1.3 + targetRevision: 0.1.4 metrics: enabled: false