feat: point release for telemetry module beta roll out

This commit is contained in:
Stefan Reimer 2024-07-16 13:32:36 +00:00
parent f1f196c860
commit 4b349aac76
7 changed files with 160 additions and 27 deletions

View File

@ -158,9 +158,10 @@ function _helm() {
local namespace="$(yq eval '.spec.destination.namespace' $WORKDIR/kubezero/templates/${module}.yaml)" local namespace="$(yq eval '.spec.destination.namespace' $WORKDIR/kubezero/templates/${module}.yaml)"
targetRevision="" targetRevision=""
if [ -z "$LOCAL_DEV" ]; then
_version="$(yq eval '.spec.source.targetRevision' $WORKDIR/kubezero/templates/${module}.yaml)" _version="$(yq eval '.spec.source.targetRevision' $WORKDIR/kubezero/templates/${module}.yaml)"
[ -n "$_version" ] && targetRevision="--version $_version" [ -n "$_version" ] && targetRevision="--version $_version"
fi
yq eval '.spec.source.helm.values' $WORKDIR/kubezero/templates/${module}.yaml > $WORKDIR/values.yaml yq eval '.spec.source.helm.values' $WORKDIR/kubezero/templates/${module}.yaml > $WORKDIR/values.yaml

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-telemetry name: kubezero-telemetry
description: KubeZero Umbrella Chart for OpenTelemetry, Jaeger etc. description: KubeZero Umbrella Chart for OpenTelemetry, Jaeger etc.
type: application type: application
version: 0.3.3 version: 0.4.0
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords: keywords:
@ -10,6 +10,7 @@ keywords:
- otel - otel
- OpenTelemetry - OpenTelemetry
- jaeger - jaeger
- dataprepper
maintainers: maintainers:
- name: Stefan Reimer - name: Stefan Reimer
email: stefan@zero-downtime.net email: stefan@zero-downtime.net
@ -21,6 +22,10 @@ dependencies:
version: 0.97.1 version: 0.97.1
repository: https://open-telemetry.github.io/opentelemetry-helm-charts repository: https://open-telemetry.github.io/opentelemetry-helm-charts
condition: opentelemetry-collector.enabled condition: opentelemetry-collector.enabled
- name: data-prepper
version: 0.1.0
repository: https://opensearch-project.github.io/helm-charts/
condition: data-prepper.enabled
- name: jaeger - name: jaeger
version: 3.1.1 version: 3.1.1
repository: https://jaegertracing.github.io/helm-charts repository: https://jaegertracing.github.io/helm-charts

View File

@ -1,6 +1,6 @@
# kubezero-telemetry # kubezero-telemetry
![Version: 0.3.3](https://img.shields.io/badge/Version-0.3.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Umbrella Chart for OpenTelemetry, Jaeger etc. KubeZero Umbrella Chart for OpenTelemetry, Jaeger etc.
@ -23,6 +23,7 @@ Kubernetes: `>= 1.26.0`
| https://fluent.github.io/helm-charts | fluentd | 0.5.2 | | https://fluent.github.io/helm-charts | fluentd | 0.5.2 |
| https://jaegertracing.github.io/helm-charts | jaeger | 3.1.1 | | https://jaegertracing.github.io/helm-charts | jaeger | 3.1.1 |
| https://open-telemetry.github.io/opentelemetry-helm-charts | opentelemetry-collector | 0.97.1 | | https://open-telemetry.github.io/opentelemetry-helm-charts | opentelemetry-collector | 0.97.1 |
| https://opensearch-project.github.io/helm-charts/ | data-prepper | 0.1.0 |
## Values ## Values
@ -33,12 +34,13 @@ Kubernetes: `>= 1.26.0`
| fluent-bit.config.flushInterval | int | `5` | | | fluent-bit.config.flushInterval | int | `5` | |
| fluent-bit.config.input.memBufLimit | string | `"16MB"` | | | fluent-bit.config.input.memBufLimit | string | `"16MB"` | |
| fluent-bit.config.input.refreshInterval | int | `5` | | | fluent-bit.config.input.refreshInterval | int | `5` | |
| fluent-bit.config.inputs | string | `"[INPUT]\n Name tail\n Path /var/log/containers/*.log\n # Exclude ourselves to current error spam, https://github.com/fluent/fluent-bit/issues/5769\n Exclude_Path *logging-fluent-bit*\n multiline.parser cri\n Tag cri.*\n Skip_Long_Lines On\n Skip_Empty_Lines On\n DB /var/log/flb_kube.db\n DB.Sync Normal\n DB.locking true\n # Buffer_Max_Size 1M\n {{- with .Values.config.input }}\n Mem_Buf_Limit {{ default \"16MB\" .memBufLimit }}\n Refresh_Interval {{ default 5 .refreshInterval }}\n {{- end }}\n"` | | | fluent-bit.config.inputs | string | `"[INPUT]\n Name tail\n Path /var/log/containers/*.log\n # Exclude ourselves to current error spam, https://github.com/fluent/fluent-bit/issues/5769\n # Todo: Rather limit / filter spam message than exclude all together -> ideally locally, next dataprepper\n Exclude_Path *logging-fluent-bit*\n multiline.parser cri\n Tag cri.*\n Skip_Long_Lines On\n Skip_Empty_Lines On\n DB /var/log/flb_kube.db\n DB.Sync Normal\n DB.locking true\n # Buffer_Max_Size 1M\n {{- with .Values.config.input }}\n Mem_Buf_Limit {{ .memBufLimit }}\n Refresh_Interval {{ .refreshInterval }}\n {{- end }}\n\n[INPUT]\n Name opentelemetry\n Tag otel\n"` | |
| fluent-bit.config.logLevel | string | `"info"` | | | fluent-bit.config.logLevel | string | `"info"` | |
| fluent-bit.config.output.host | string | `"telemetry-fluentd"` | | | fluent-bit.config.output.host | string | `"telemetry-fluentd"` | |
| fluent-bit.config.output.sharedKey | string | `"secretref+k8s://v1/Secret/kube-system/kubezero-secrets/telemetry.fluentd.source.sharedKey"` | | | fluent-bit.config.output.sharedKey | string | `"secretref+k8s://v1/Secret/kube-system/kubezero-secrets/telemetry.fluentd.source.sharedKey"` | |
| fluent-bit.config.output.tls | bool | `false` | | | fluent-bit.config.output.tls | bool | `false` | |
| fluent-bit.config.outputs | string | `"[OUTPUT]\n Match *\n Name forward\n Host {{ .Values.config.output.host }}\n Port 24224\n Shared_Key {{ .Values.config.output.sharedKey }}\n tls {{ ternary \"on\" \"off\" .Values.config.output.tls }}\n Send_options true\n Require_ack_response true\n"` | | | fluent-bit.config.output_otel.host | string | `"telemetry-opentelemetry-collector"` | |
| fluent-bit.config.outputs | string | `"[OUTPUT]\n Match kube.*\n Name forward\n Host {{ .Values.config.output.host }}\n Port 24224\n Shared_Key {{ .Values.config.output.sharedKey }}\n tls {{ ternary \"on\" \"off\" .Values.config.output.tls }}\n Send_options true\n Require_ack_response true\n\n[OUTPUT]\n Name opentelemetry\n Match otel\n Host {{ .Values.config.output_otel.host }}\n Port 4318\n #Metrics_uri /v1/metrics\n Traces_uri /v1/traces\n #Logs_uri /v1/logs\n"` | |
| fluent-bit.config.service | string | `"[SERVICE]\n Flush {{ .Values.config.flushInterval }}\n Daemon Off\n Log_Level {{ .Values.config.logLevel }}\n Parsers_File parsers.conf\n Parsers_File custom_parsers.conf\n HTTP_Server On\n HTTP_Listen 0.0.0.0\n HTTP_Port {{ .Values.service.port }}\n Health_Check On\n"` | | | fluent-bit.config.service | string | `"[SERVICE]\n Flush {{ .Values.config.flushInterval }}\n Daemon Off\n Log_Level {{ .Values.config.logLevel }}\n Parsers_File parsers.conf\n Parsers_File custom_parsers.conf\n HTTP_Server On\n HTTP_Listen 0.0.0.0\n HTTP_Port {{ .Values.service.port }}\n Health_Check On\n"` | |
| fluent-bit.daemonSetVolumeMounts[0].mountPath | string | `"/var/log"` | | | fluent-bit.daemonSetVolumeMounts[0].mountPath | string | `"/var/log"` | |
| fluent-bit.daemonSetVolumeMounts[0].name | string | `"varlog"` | | | fluent-bit.daemonSetVolumeMounts[0].name | string | `"varlog"` | |
@ -49,10 +51,15 @@ Kubernetes: `>= 1.26.0`
| fluent-bit.daemonSetVolumes[1].hostPath.path | string | `"/var/lib/containers/logs"` | | | fluent-bit.daemonSetVolumes[1].hostPath.path | string | `"/var/lib/containers/logs"` | |
| fluent-bit.daemonSetVolumes[1].name | string | `"newlog"` | | | fluent-bit.daemonSetVolumes[1].name | string | `"newlog"` | |
| fluent-bit.enabled | bool | `false` | | | fluent-bit.enabled | bool | `false` | |
| fluent-bit.extraPorts[0].containerPort | int | `4318` | |
| fluent-bit.extraPorts[0].name | string | `"otel"` | |
| fluent-bit.extraPorts[0].port | int | `4318` | |
| fluent-bit.extraPorts[0].protocol | string | `"TCP"` | |
| fluent-bit.luaScripts."kubezero.lua" | string | `"function nest_k8s_ns(tag, timestamp, record)\n if not record['kubernetes']['namespace_name'] then\n return 0, 0, 0\n end\n new_record = {}\n for key, val in pairs(record) do\n if key == 'kube' then\n new_record[key] = {}\n new_record[key][record['kubernetes']['namespace_name']] = record[key]\n else\n new_record[key] = record[key]\n end\n end\n return 1, timestamp, new_record\nend\n"` | | | fluent-bit.luaScripts."kubezero.lua" | string | `"function nest_k8s_ns(tag, timestamp, record)\n if not record['kubernetes']['namespace_name'] then\n return 0, 0, 0\n end\n new_record = {}\n for key, val in pairs(record) do\n if key == 'kube' then\n new_record[key] = {}\n new_record[key][record['kubernetes']['namespace_name']] = record[key]\n else\n new_record[key] = record[key]\n end\n end\n return 1, timestamp, new_record\nend\n"` | |
| fluent-bit.resources.limits.memory | string | `"128Mi"` | | | fluent-bit.resources.limits.memory | string | `"128Mi"` | |
| fluent-bit.resources.requests.cpu | string | `"20m"` | | | fluent-bit.resources.requests.cpu | string | `"20m"` | |
| fluent-bit.resources.requests.memory | string | `"48Mi"` | | | fluent-bit.resources.requests.memory | string | `"48Mi"` | |
| fluent-bit.service.internalTrafficPolicy | string | `"Local"` | |
| fluent-bit.serviceMonitor.enabled | bool | `false` | | | fluent-bit.serviceMonitor.enabled | bool | `false` | |
| fluent-bit.testFramework.enabled | bool | `false` | | | fluent-bit.testFramework.enabled | bool | `false` | |
| fluent-bit.tolerations[0].effect | string | `"NoSchedule"` | | | fluent-bit.tolerations[0].effect | string | `"NoSchedule"` | |
@ -100,9 +107,6 @@ Kubernetes: `>= 1.26.0`
| fluentd.volumes[0].secret.items[0].path | string | `"ca.crt"` | | | fluentd.volumes[0].secret.items[0].path | string | `"ca.crt"` | |
| fluentd.volumes[0].secret.secretName | string | `"telemetry-nodes-http-tls"` | | | fluentd.volumes[0].secret.secretName | string | `"telemetry-nodes-http-tls"` | |
| jaeger.agent.enabled | bool | `false` | | | jaeger.agent.enabled | bool | `false` | |
| jaeger.collector.enabled | bool | `false` | |
| jaeger.collector.extraEnv[0].name | string | `"ES_TAGS_AS_FIELDS_ALL"` | |
| jaeger.collector.extraEnv[0].value | string | `"true"` | |
| jaeger.collector.service.otlp.grpc.name | string | `"otlp-grpc"` | | | jaeger.collector.service.otlp.grpc.name | string | `"otlp-grpc"` | |
| jaeger.collector.service.otlp.grpc.port | int | `4317` | | | jaeger.collector.service.otlp.grpc.port | int | `4317` | |
| jaeger.collector.service.otlp.http.name | string | `"otlp-http"` | | | jaeger.collector.service.otlp.http.name | string | `"otlp-http"` | |
@ -130,12 +134,8 @@ Kubernetes: `>= 1.26.0`
| opensearch.nodeSets | list | `[]` | | | opensearch.nodeSets | list | `[]` | |
| opensearch.prometheus | bool | `false` | | | opensearch.prometheus | bool | `false` | |
| opensearch.version | string | `"2.15.0"` | | | opensearch.version | string | `"2.15.0"` | |
| opentelemetry-collector.config.exporters.opensearch/trace.http.auth.authenticator | string | `"basicauth/client"` | | | opentelemetry-collector.config.exporters.otlp/jaeger.endpoint | string | `"telemetry-jaeger-collector:4317"` | |
| opentelemetry-collector.config.exporters.opensearch/trace.http.endpoint | string | `"https://telemetry:9200"` | | | opentelemetry-collector.config.exporters.otlp/jaeger.tls.insecure | bool | `true` | |
| opentelemetry-collector.config.exporters.opensearch/trace.http.tls.insecure | bool | `false` | |
| opentelemetry-collector.config.exporters.opensearch/trace.http.tls.insecure_skip_verify | bool | `true` | |
| opentelemetry-collector.config.extensions.basicauth/client.client_auth.password | string | `"admin"` | |
| opentelemetry-collector.config.extensions.basicauth/client.client_auth.username | string | `"admin"` | |
| opentelemetry-collector.config.extensions.health_check.endpoint | string | `"${env:MY_POD_IP}:13133"` | | | opentelemetry-collector.config.extensions.health_check.endpoint | string | `"${env:MY_POD_IP}:13133"` | |
| opentelemetry-collector.config.extensions.memory_ballast | object | `{}` | | | opentelemetry-collector.config.extensions.memory_ballast | object | `{}` | |
| opentelemetry-collector.config.processors.batch | object | `{}` | | | opentelemetry-collector.config.processors.batch | object | `{}` | |
@ -146,10 +146,9 @@ Kubernetes: `>= 1.26.0`
| opentelemetry-collector.config.receivers.zipkin | string | `nil` | | | opentelemetry-collector.config.receivers.zipkin | string | `nil` | |
| opentelemetry-collector.config.service.extensions[0] | string | `"health_check"` | | | opentelemetry-collector.config.service.extensions[0] | string | `"health_check"` | |
| opentelemetry-collector.config.service.extensions[1] | string | `"memory_ballast"` | | | opentelemetry-collector.config.service.extensions[1] | string | `"memory_ballast"` | |
| opentelemetry-collector.config.service.extensions[2] | string | `"basicauth/client"` | |
| opentelemetry-collector.config.service.pipelines.logs | string | `nil` | | | opentelemetry-collector.config.service.pipelines.logs | string | `nil` | |
| opentelemetry-collector.config.service.pipelines.metrics | string | `nil` | | | opentelemetry-collector.config.service.pipelines.metrics | string | `nil` | |
| opentelemetry-collector.config.service.pipelines.traces.exporters[0] | string | `"opensearch/trace"` | | | opentelemetry-collector.config.service.pipelines.traces.exporters[0] | string | `"otlp/jaeger"` | |
| opentelemetry-collector.config.service.pipelines.traces.processors[0] | string | `"memory_limiter"` | | | opentelemetry-collector.config.service.pipelines.traces.processors[0] | string | `"memory_limiter"` | |
| opentelemetry-collector.config.service.pipelines.traces.processors[1] | string | `"batch"` | | | opentelemetry-collector.config.service.pipelines.traces.processors[1] | string | `"batch"` | |
| opentelemetry-collector.config.service.pipelines.traces.receivers[0] | string | `"otlp"` | | | opentelemetry-collector.config.service.pipelines.traces.receivers[0] | string | `"otlp"` | |

View File

@ -1,3 +1,109 @@
data-prepper:
enabled: false
securityContext:
capabilities:
drop:
- ALL
#readOnlyRootFilesystem: true
#runAsNonRoot: true
#runAsUser: 1000
pipelineConfig:
config:
simple-sample-pipeline: null
otel-trace-pipeline:
workers: 1
delay: "100"
source:
otel_trace_source:
ssl: false
buffer:
bounded_blocking:
# default value is 12800
#buffer_size: 25600
# Default is 200.
# Make sure buffer_size >= workers * batch_size
#batch_size: 400
sink:
- pipeline:
name: "raw-traces-pipeline"
- pipeline:
name: "otel-service-map-pipeline"
raw-traces-pipeline:
workers: 1
delay: 3000
source:
pipeline:
name: "otel-trace-pipeline"
buffer:
bounded_blocking:
#buffer_size: 25600
#batch_size: 400
processor:
- otel_traces:
- otel_trace_group:
hosts: [ "https://telemetry:9200" ]
insecure: true
username: "admin"
password: "admin"
sink:
- opensearch:
hosts: ["https://telemetry:9200"]
username: "admin"
password: "admin"
insecure: true
index_type: trace-analytics-raw
otel-service-map-pipeline:
workers: 1
delay: 3000
source:
pipeline:
name: "otel-trace-pipeline"
processor:
- service_map:
# The window duration is the maximum length of time the data prepper stores the most recent trace data to evaluvate service-map relationships.
# The default is 3 minutes, this means we can detect relationships between services from spans reported in last 3 minutes.
# Set higher value if your applications have higher latency.
window_duration: 180
buffer:
bounded_blocking:
#buffer_size: 25600
#batch_size: 400
sink:
- opensearch:
hosts: ["https://telemetry:9200"]
username: "admin"
password: "admin"
insecure: true
index_type: trace-analytics-service-map
#index: otel-v1-apm-span-%{yyyy.MM.dd}
#max_retries: 20
bulk_size: 4
config:
log4j2-rolling.properties: |
status = error
dest = err
name = PropertiesConfig
appender.console.type = Console
appender.console.name = STDOUT
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{ISO8601} [%t] %-5p %40C - %m%n
rootLogger.level = warn
rootLogger.appenderRef.stdout.ref = STDOUT
logger.pipeline.name = org.opensearch.dataprepper.pipeline
logger.pipeline.level = info
logger.parser.name = org.opensearch.dataprepper.parser
logger.parser.level = info
logger.plugins.name = org.opensearch.dataprepper.plugins
logger.plugins.level = info
opentelemetry-collector: opentelemetry-collector:
enabled: false enabled: false
@ -17,6 +123,10 @@ opentelemetry-collector:
endpoint: telemetry-jaeger-collector:4317 endpoint: telemetry-jaeger-collector:4317
tls: tls:
insecure: true insecure: true
otlp/data-prepper:
endpoint: telemetry-data-prepper:21890
tls:
insecure: true
processors: processors:
batch: {} batch: {}
@ -45,6 +155,7 @@ opentelemetry-collector:
traces: traces:
exporters: exporters:
- otlp/jaeger - otlp/jaeger
- otlp/data-prepper
processors: processors:
- memory_limiter - memory_limiter
- batch - batch
@ -83,12 +194,9 @@ jaeger:
grpc: grpc:
name: otlp-grpc name: otlp-grpc
port: 4317 port: 4317
# http: http:
# name: otlp-http name: otlp-http
# port: 4318 port: 4318
#extraEnv:
# - name: ES_TAGS_AS_FIELDS_ALL
# value: "true"
serviceMonitor: serviceMonitor:
enabled: false enabled: false
@ -103,6 +211,9 @@ jaeger:
cmdlineParams: cmdlineParams:
es.tls.enabled: "" es.tls.enabled: ""
es.tls.skip-host-verify: "" es.tls.skip-host-verify: ""
es.num-replicas: 1
es.num-shards: 2
#es.tags-as-fields.all: ""
provisionDataStore: provisionDataStore:
cassandra: false cassandra: false

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero name: kubezero
description: KubeZero - Root App of Apps chart description: KubeZero - Root App of Apps chart
type: application type: application
version: 1.28.9-1 version: 1.28.9-2
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords: keywords:

View File

@ -21,6 +21,23 @@ fluentd:
enabled: {{ .Values.metrics.enabled }} enabled: {{ .Values.metrics.enabled }}
{{- end }} {{- end }}
{{- if index .Values "telemetry" "data-prepper" }}
data-prepper:
{{- with index .Values "telemetry" "data-prepper" }}
{{- toYaml . | nindent 2 }}
{{- end }}
{{- end }}
{{- if index .Values "telemetry" "opentelemetry-collector" }}
opentelemetry-collector:
{{- with index .Values "telemetry" "opentelemetry-collector" }}
{{- toYaml . | nindent 2 }}
{{- end }}
serviceMonitor:
enabled: {{ .Values.metrics.enabled }}
{{- end }}
{{- if .Values.telemetry.jaeger }} {{- if .Values.telemetry.jaeger }}
jaeger: jaeger:
{{- with .Values.telemetry.jaeger }} {{- with .Values.telemetry.jaeger }}

View File

@ -85,12 +85,12 @@ falco:
telemetry: telemetry:
enabled: false enabled: false
namespace: telemetry namespace: telemetry
targetRevision: 0.3.0 targetRevision: 0.4.0
operators: operators:
enabled: false enabled: false
namespace: operators namespace: operators
targetRevision: 0.1.3 targetRevision: 0.1.4
metrics: metrics:
enabled: false enabled: false