Logging version bump, metrics fixes

This commit is contained in:
Stefan Reimer 2023-11-28 18:42:00 +00:00
parent 39db495adc
commit 1c076eab61
15 changed files with 122 additions and 52 deletions

View File

@ -15,6 +15,13 @@ def migrate(values):
if not values["addons"]:
values.pop("addons")
# fix argoCD CM
try:
if not values["argocd"]["configs"]["cm"]["url"].startswith("http"):
values["argocd"]["configs"]["cm"]["url"] = "https://" + values["argocd"]["configs"]["cm"]["url"]
except KeyError:
pass
# migrate eck operator to new operator module
try:
if values["logging"]["eck-operator"]["enabled"]:

View File

@ -1,4 +1,5 @@
{{- /* Feature gates for all control plane components */ -}}
{{- /* ToAdd: "PodAndContainerStatsFromCRI" */ -}}
{{- define "kubeadm.featuregates" }}
{{- $gates := list "CustomCPUCFSQuotaPeriod" "MemoryQoS" }}
{{- if eq .return "csv" }}

View File

@ -1,6 +1,6 @@
# kubezero-logging
![Version: 0.8.6](https://img.shields.io/badge/Version-0.8.6-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.6.0](https://img.shields.io/badge/AppVersion-1.6.0-informational?style=flat-square)
![Version: 0.8.9](https://img.shields.io/badge/Version-0.8.9-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.6.0](https://img.shields.io/badge/AppVersion-1.6.0-informational?style=flat-square)
KubeZero Umbrella Chart for complete EFK stack
@ -14,14 +14,13 @@ KubeZero Umbrella Chart for complete EFK stack
## Requirements
Kubernetes: `>= 1.24.0`
Kubernetes: `>= 1.26.0`
| Repository | Name | Version |
|------------|------|---------|
| | eck-operator | 2.4.0 |
| | fluent-bit | 0.24.0 |
| | fluentd | 0.3.9 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
| https://fluent.github.io/helm-charts | fluent-bit | 0.40.0 |
| https://fluent.github.io/helm-charts | fluentd | 0.5.0 |
## Changes from upstream
### ECK
@ -88,9 +87,9 @@ Kubernetes: `>= 1.24.0`
| fluent-bit.daemonSetVolumes[1].hostPath.path | string | `"/var/lib/containers/logs"` | |
| fluent-bit.daemonSetVolumes[1].name | string | `"newlog"` | |
| fluent-bit.enabled | bool | `false` | |
| fluent-bit.image.tag | string | `"2.0.10"` | |
| fluent-bit.image | string | `nil` | |
| fluent-bit.luaScripts."kubezero.lua" | string | `"function nest_k8s_ns(tag, timestamp, record)\n if not record['kubernetes']['namespace_name'] then\n return 0, 0, 0\n end\n new_record = {}\n for key, val in pairs(record) do\n if key == 'kube' then\n new_record[key] = {}\n new_record[key][record['kubernetes']['namespace_name']] = record[key]\n else\n new_record[key] = record[key]\n end\n end\n return 1, timestamp, new_record\nend\n"` | |
| fluent-bit.resources.limits.memory | string | `"64Mi"` | |
| fluent-bit.resources.limits.memory | string | `"128Mi"` | |
| fluent-bit.resources.requests.cpu | string | `"20m"` | |
| fluent-bit.resources.requests.memory | string | `"32Mi"` | |
| fluent-bit.serviceMonitor.enabled | bool | `false` | |
@ -98,6 +97,7 @@ Kubernetes: `>= 1.24.0`
| fluent-bit.testFramework.enabled | bool | `false` | |
| fluent-bit.tolerations[0].effect | string | `"NoSchedule"` | |
| fluent-bit.tolerations[0].operator | string | `"Exists"` | |
| fluentd.configMapConfigs[0] | string | `"fluentd-prometheus-conf"` | |
| fluentd.dashboards.enabled | bool | `false` | |
| fluentd.enabled | bool | `false` | |
| fluentd.env[0].name | string | `"FLUENTD_CONF"` | |
@ -115,6 +115,8 @@ Kubernetes: `>= 1.24.0`
| fluentd.kind | string | `"Deployment"` | |
| fluentd.metrics.serviceMonitor.additionalLabels.release | string | `"metrics"` | |
| fluentd.metrics.serviceMonitor.enabled | bool | `false` | |
| fluentd.mountDockerContainersDirectory | bool | `false` | |
| fluentd.mountVarLogDirectory | bool | `false` | |
| fluentd.output.host | string | `"logging-es-http"` | |
| fluentd.podSecurityPolicy.enabled | bool | `false` | |
| fluentd.replicaCount | int | `1` | |
@ -128,16 +130,6 @@ Kubernetes: `>= 1.24.0`
| fluentd.service.ports[1].name | string | `"http-fluentd"` | |
| fluentd.service.ports[1].protocol | string | `"TCP"` | |
| fluentd.shared_key | string | `"cloudbender"` | |
| fluentd.volumeMounts[0].mountPath | string | `"/etc/fluent"` | |
| fluentd.volumeMounts[0].name | string | `"etcfluentd-main"` | |
| fluentd.volumeMounts[1].mountPath | string | `"/etc/fluent/config.d/"` | |
| fluentd.volumeMounts[1].name | string | `"etcfluentd-config"` | |
| fluentd.volumes[0].configMap.defaultMode | int | `511` | |
| fluentd.volumes[0].configMap.name | string | `"fluentd-main"` | |
| fluentd.volumes[0].name | string | `"etcfluentd-main"` | |
| fluentd.volumes[1].configMap.defaultMode | int | `511` | |
| fluentd.volumes[1].configMap.name | string | `"fluentd-config"` | |
| fluentd.volumes[1].name | string | `"etcfluentd-config"` | |
| kibana.count | int | `1` | |
| kibana.istio.enabled | bool | `false` | |
| kibana.istio.gateway | string | `"istio-system/ingressgateway"` | |

View File

@ -1,9 +1,9 @@
annotations:
artifacthub.io/changes: |
- kind: added
description: "Added events permission to ClusteRole"
- kind: changed
description: "Updated Fluent Bit OCI image to v2.2.0."
apiVersion: v1
appVersion: 2.1.8
appVersion: 2.2.0
description: Fast and lightweight log processor and forwarder or Linux, OSX and BSD
family operating systems.
home: https://fluentbit.io/
@ -24,4 +24,4 @@ maintainers:
name: fluent-bit
sources:
- https://github.com/fluent/fluent-bit/
version: 0.37.1
version: 0.40.0

View File

@ -5,7 +5,7 @@
"builtIn": 1,
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"enable": true,
"hide": true,
@ -28,7 +28,7 @@
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"gridPos": {
"h": 1,
@ -42,7 +42,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"refId": "A"
}
@ -144,7 +144,7 @@
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"gridPos": {
"h": 1,
@ -158,7 +158,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"refId": "A"
}
@ -1171,7 +1171,7 @@
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"gridPos": {
"h": 1,
@ -1185,7 +1185,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"refId": "A"
}
@ -1321,7 +1321,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"fieldConfig": {
"defaults": {
@ -1420,7 +1420,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"editorMode": "code",
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\",pod=~\"$pod\",container=\"fluent-bit\"}) by (pod)",
@ -1432,7 +1432,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
"uid": "$DS_PROMETHEUS"
},
"editorMode": "code",
"expr": "avg(kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"$namespace\",pod=~\"$pod\",container=\"fluent-bit\",resource=\"cpu\"})",

View File

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: v1.15.2
appVersion: v1.16.2
description: A Helm chart for Kubernetes
home: https://www.fluentd.org/
icon: https://www.fluentd.org/images/miscellany/fluentd-logo_2x.png
@ -12,4 +12,4 @@ name: fluentd
sources:
- https://github.com/fluent/fluentd/
- https://github.com/fluent/fluentd-kubernetes-daemonset
version: 0.4.3
version: 0.5.0

View File

@ -1,5 +1,5 @@
{{- define "fluentd.pod" -}}
{{- $defaultTag := printf "%s-debian-elasticsearch7-1.0" (.Chart.AppVersion) -}}
{{- $defaultTag := printf "%s-debian-%s-1.0" (.Chart.AppVersion) (.Values.variant) -}}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 2 }}

View File

@ -0,0 +1,44 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "fluentd.fullname" . -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "fluentd.fullname" . }}
labels:
{{- include "fluentd.labels" . | nindent 4 }}
{{- with .Values.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
{{- with .secretName }}
secretName: {{ . }}
{{- end }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: {{ $fullName }}
port:
number: {{ .port }}
{{ if .host -}}
host: {{ .host | quote }}
{{- end -}}
{{- end -}}
{{- end -}}

View File

@ -1,4 +1,4 @@
{{- if .Values.service.enabled -}}
apiVersion: v1
kind: Service
metadata:
@ -32,3 +32,4 @@ spec:
{{- end }}
selector:
{{- include "fluentd.selectorLabels" . | nindent 4 }}
{{- end -}}

View File

@ -3,7 +3,8 @@ fullnameOverride: ""
# DaemonSet, Deployment or StatefulSet
kind: "DaemonSet"
# azureblob, cloudwatch, elasticsearch7, elasticsearch8, gcs, graylog , kafka, kafka2, kinesis, opensearch
variant: elasticsearch7
# # Only applicable for Deployment or StatefulSet
# replicaCount: 1
@ -202,6 +203,7 @@ persistence:
## Fluentd service
##
service:
enabled: true
type: "ClusterIP"
annotations: {}
# loadBalancerIP:
@ -378,3 +380,17 @@ fileConfigs:
password changeme
</match>
</label>
ingress:
enabled: false
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
# - host: fluentd.example.tld
- port: 9880
tls: []
# - secretName: fluentd-tls
# hosts:
# - fluentd.example.tld

View File

@ -18,3 +18,5 @@ rm -f charts/fluentd/templates/files.conf/systemd.yaml
# Fetch dashboards from Grafana.com and update ZDT CM
../kubezero-metrics/sync_grafana_dashboards.py dashboards.yaml templates/fluent-bit/grafana-dashboards.yaml
../kubezero-metrics/sync_grafana_dashboards.py dashboards-es.yaml templates/eck/grafana-dashboards.yaml
update_docs

View File

@ -1,15 +1,6 @@
# use this for backwards compatability
# fullnameOverride: ""
eck-operator:
enabled: false
installCRDs: false
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: ""
# Version for ElasticSearch and Kibana have to match so we define it at top-level
version: 7.17.3

View File

@ -331,11 +331,17 @@ prometheus-adapter:
default: false
resource:
cpu:
containerQuery: sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
containerQuery: |
sum by (<<.GroupBy>>) (
irate (
container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[60s]
)
)
nodeQuery: |
sum(1 - irate(node_cpu_seconds_total{<<.LabelMatchers>>, mode="idle"}[60s])) by (<<.GroupBy>>)
resources:
overrides:
node:
instance:
resource: node
namespace:
resource: namespace
@ -343,11 +349,15 @@ prometheus-adapter:
resource: pod
containerLabel: container
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
containerQuery: |
sum by (<<.GroupBy>>) (
container_memory_working_set_bytes{<<.LabelMatchers>>,container!="",pod!="",container!="POD"}
)
nodeQuery: |
sum(node_memory_MemTotal_bytes{<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
instance:
resource: node
namespace:
resource: namespace

View File

@ -108,7 +108,7 @@ metrics:
logging:
enabled: false
namespace: logging
targetRevision: 0.8.8
targetRevision: 0.8.9
argocd:
enabled: false

View File

@ -9,9 +9,15 @@
- new, optional, OpenSearch operator
- all instances now enforce IMDSv2
## Fixes
- `kubectl top nodes` works now using node-exporter metrics rather than cadvisor
## Version upgrades
- cilium 1.14.4
- istio 1.19.4
- fluent-bit 2.2.0
- ArgoCD 2.9
- Prometheus / Grafana
### FeatureGates
- CustomCPUCFSQuotaPeriod