From da32f87d3cd5c8f43ff4b2053e28740c8b68439b Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Wed, 12 Feb 2025 12:37:47 +0000 Subject: [PATCH] feat: reorg cluster upgrade logic, migrate aws-iam-authenticator to system service, basic network and addons for 1.31 --- Dockerfile | 6 +- admin/hooks-1.31.sh | 43 +++++ admin/kubezero.sh | 141 ++++++++-------- admin/libhelm.sh | 2 +- admin/upgrade_cluster.sh | 25 +-- charts/kubeadm/Chart.yaml | 2 +- .../templates/ClusterConfiguration.yaml | 134 ++++++++++----- .../kubeadm/templates/InitConfiguration.yaml | 11 +- .../templates/UpgradeConfiguration.yaml | 16 ++ charts/kubeadm/templates/admin-aws-iam.yaml | 27 --- .../templates/apiserver/authz-config.yaml | 32 ++++ .../apiserver/aws-iam-authenticator.yaml | 19 --- .../50-aws-iam-authenticator-crds.yaml | 46 ------ .../51-aws-iam-authenticator-deployment.yaml | 155 ------------------ .../52-aws-iam-authenticator-mappings.yaml | 23 --- charts/kubeadm/values.yaml | 5 +- charts/kubezero-addons/Chart.yaml | 2 +- charts/kubezero-addons/README.md | 12 +- .../aws-node-termination-handler/Chart.yaml | 4 +- .../templates/daemonset.linux.yaml | 2 + .../templates/daemonset.windows.yaml | 2 + .../templates/deployment.yaml | 4 + .../aws-node-termination-handler/values.yaml | 3 + charts/kubezero-addons/values.yaml | 7 +- charts/kubezero-ci/values.yaml | 2 + .../templates/neo4j/secrets.yaml | 9 + charts/kubezero-graph/values.yaml | 3 + .../templates/bootstrap-config.yaml | 2 +- .../templates/tests/test-connection.yaml | 4 +- .../charts/fluentd/values.yaml | 3 - charts/kubezero-logging/fluentd.patch | 33 ---- charts/kubezero-network/Chart.yaml | 2 +- charts/kubezero-network/README.md | 6 +- .../templates/multus/daemonset.yaml | 9 +- charts/kubezero-telemetry/README.md | 10 +- .../templates/tests/test-connection.yaml | 4 +- .../charts/fluentd/values.yaml | 3 - charts/kubezero-telemetry/fluentd.patch | 33 ---- charts/kubezero/Chart.yaml | 2 +- charts/kubezero/templates/_app.tpl | 1 + charts/kubezero/templates/addons.yaml | 8 +- charts/kubezero/templates/istio-ingress.yaml | 4 + charts/kubezero/values.yaml | 4 +- 43 files changed, 344 insertions(+), 521 deletions(-) create mode 100644 admin/hooks-1.31.sh create mode 100644 charts/kubeadm/templates/UpgradeConfiguration.yaml delete mode 100644 charts/kubeadm/templates/admin-aws-iam.yaml create mode 100644 charts/kubeadm/templates/apiserver/authz-config.yaml delete mode 100644 charts/kubeadm/templates/apiserver/aws-iam-authenticator.yaml delete mode 100644 charts/kubeadm/templates/resources/50-aws-iam-authenticator-crds.yaml delete mode 100644 charts/kubeadm/templates/resources/51-aws-iam-authenticator-deployment.yaml delete mode 100644 charts/kubeadm/templates/resources/52-aws-iam-authenticator-mappings.yaml create mode 100644 charts/kubezero-graph/templates/neo4j/secrets.yaml diff --git a/Dockerfile b/Dockerfile index f4ca5149..b2231cbe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ ARG ALPINE_VERSION=3.21 FROM docker.io/alpine:${ALPINE_VERSION} ARG ALPINE_VERSION -ARG KUBE_VERSION=1.31.4 +ARG KUBE_VERSION=1.31 ARG SOPS_VERSION="3.9.1" ARG VALS_VERSION="0.37.6" @@ -41,8 +41,8 @@ RUN mkdir -p $(helm env HELM_PLUGINS) && \ # vals RUN wget -qO - https://github.com/helmfile/vals/releases/download/v${VALS_VERSION}/vals_${VALS_VERSION}_linux_amd64.tar.gz | tar -C /usr/local/bin -xzf- vals -ADD admin/kubezero.sh admin/libhelm.sh admin/migrate_argo_values.py /usr/bin -ADD admin/libhelm.sh /var/lib/kubezero +ADD admin/kubezero.sh admin/migrate_argo_values.py /usr/bin +ADD admin/libhelm.sh admin/hooks-$KUBE_VERSION.sh /var/lib/kubezero ADD charts/kubeadm /charts/kubeadm ADD charts/kubezero /charts/kubezero diff --git a/admin/hooks-1.31.sh b/admin/hooks-1.31.sh new file mode 100644 index 00000000..d5e0ea03 --- /dev/null +++ b/admin/hooks-1.31.sh @@ -0,0 +1,43 @@ +### v1.31 + +# All things BEFORE the first controller / control plane upgrade +pre_control_plane_upgrade_cluster() { + # add kubezero version label to existing controller nodes for aws-iam migration + for n in $(kubectl get nodes -l "node-role.kubernetes.io/control-plane=" | grep v1.30 | awk {'print $1}'); do + kubectl label node $n 'node.kubernetes.io/kubezero.version=v1.30.6' || true + done + + # patch aws-iam-authentiator DS to NOT run pods on 1.31 controllers + kubectl patch ds aws-iam-authentiator -p '{"spec": {"template": {"spec": {"nodeSelector": {"node.kubernetes.io/kubezero.version": "v1.30.6"}}}}}' || true +} + + +# All things after the first controller / control plane upgrade +post_control_plane_upgrade_cluster() { + echo +} + + +# All things AFTER all contollers are on the new version +pre_cluster_upgrade_final() { + + if [ "$PLATFORM" == "aws" ];then + # cleanup aws-iam-authentiator + kubectl delete clusterrolebinding aws-iam-authentiator || true + kubectl delete clusterrole aws-iam-authentiator || true + kubectl delete serviceaccount aws-iam-authentiator -n kube-system || true + kubectl delete cm aws-iam-authentiator -n kube-system || true + kubectl delete ds aws-iam-authentiator -n kube-system || true + kubectl delete IAMIdentityMapping kubezero-worker-nodes || true + kubectl delete IAMIdentityMapping kubernetes-admin || true + kubectl delete crd iamidentitymappings.iamauthenticator.k8s.aws || true + + kubectl delete secret aws-iam-certs -n kube-system || true + fi +} + + +# Last call +post_cluster_upgrade_final() { + echo +} diff --git a/admin/kubezero.sh b/admin/kubezero.sh index 95cbd740..ab26d9e0 100755 --- a/admin/kubezero.sh +++ b/admin/kubezero.sh @@ -29,6 +29,9 @@ export ETCDCTL_KEY=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.key mkdir -p ${WORKDIR} +# Import version specific hooks +. /var/lib/kubezero/hooks-${KUBE_VERSION_MINOR##v}.sh + # Generic retry utility retry() { local tries=$1 @@ -64,7 +67,9 @@ render_kubeadm() { cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml done - if [[ "$phase" =~ ^(bootstrap|join|restore)$ ]]; then + if [[ "$phase" == "upgrade" ]]; then + cat ${WORKDIR}/kubeadm/templates/UpgradeConfiguration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml + elif [[ "$phase" =~ ^(bootstrap|join|restore)$ ]]; then cat ${WORKDIR}/kubeadm/templates/InitConfiguration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml fi @@ -83,7 +88,6 @@ parse_kubezero() { export ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml) export NODENAME=$(yq eval '.nodeName' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml) export PROVIDER_ID=$(yq eval '.providerID // ""' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml) - export AWS_IAM_AUTH=$(yq eval '.api.awsIamAuth.enabled // "false"' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml) } @@ -92,20 +96,6 @@ pre_kubeadm() { # update all apiserver addons first cp -r ${WORKDIR}/kubeadm/templates/apiserver ${HOSTFS}/etc/kubernetes - # aws-iam-authenticator enabled ? - if [ "$AWS_IAM_AUTH" == "true" ]; then - - # Initialize webhook - if [ ! -f ${HOSTFS}/etc/kubernetes/pki/aws-iam-authenticator.crt ]; then - ${HOSTFS}/usr/bin/aws-iam-authenticator init -i ${CLUSTERNAME} - mv key.pem ${HOSTFS}/etc/kubernetes/pki/aws-iam-authenticator.key - mv cert.pem ${HOSTFS}/etc/kubernetes/pki/aws-iam-authenticator.crt - fi - - # Patch the aws-iam-authenticator config with the actual cert.pem - yq eval -Mi ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/aws-iam-authenticator.crt| base64 -w0)\"" ${HOSTFS}/etc/kubernetes/apiserver/aws-iam-authenticator.yaml - fi - # copy patches to host to make --rootfs of kubeadm work cp -r ${WORKDIR}/kubeadm/templates/patches ${HOSTFS}/etc/kubernetes } @@ -120,60 +110,63 @@ post_kubeadm() { } -kubeadm_upgrade() { - # pre upgrade hook +# Control plane upgrade +control_plane_upgrade() { + CMD=$1 # get current values, argo app over cm get_kubezero_values $ARGOCD - # tumble new config through migrate.py - migrate_argo_values.py < "$WORKDIR"/kubezero-values.yaml > "$WORKDIR"/new-kubezero-values.yaml + if [[ "$CMD" =~ ^(cluster)$ ]]; then + # tumble new config through migrate.py + migrate_argo_values.py < "$WORKDIR"/kubezero-values.yaml > "$WORKDIR"/new-kubezero-values.yaml - # Update kubezero-values CM - kubectl get cm -n kubezero kubezero-values -o=yaml | \ - yq e '.data."values.yaml" |= load_str("/tmp/kubezero/new-kubezero-values.yaml")' | \ - kubectl apply --server-side --force-conflicts -f - + # Update kubezero-values CM + kubectl get cm -n kubezero kubezero-values -o=yaml | \ + yq e '.data."values.yaml" |= load_str("/tmp/kubezero/new-kubezero-values.yaml")' | \ + kubectl apply --server-side --force-conflicts -f - - if [ "$ARGOCD" == "True" ]; then - # update argo app - export kubezero_chart_version=$(yq .version $CHARTS/kubezero/Chart.yaml) - kubectl get application kubezero -n argocd -o yaml | \ - yq '.spec.source.helm.valuesObject |= load("/tmp/kubezero/new-kubezero-values.yaml") | .spec.source.targetRevision = strenv(kubezero_chart_version)' \ - > $WORKDIR/new-argocd-app.yaml - kubectl apply --server-side --force-conflicts -f $WORKDIR/new-argocd-app.yaml + if [ "$ARGOCD" == "True" ]; then + # update argo app + export kubezero_chart_version=$(yq .version $CHARTS/kubezero/Chart.yaml) + kubectl get application kubezero -n argocd -o yaml | \ + yq '.spec.source.helm.valuesObject |= load("/tmp/kubezero/new-kubezero-values.yaml") | .spec.source.targetRevision = strenv(kubezero_chart_version)' \ + > $WORKDIR/new-argocd-app.yaml + kubectl apply --server-side --force-conflicts -f $WORKDIR/new-argocd-app.yaml - # finally remove annotation to allow argo to sync again - kubectl patch app kubezero -n argocd --type json -p='[{"op": "remove", "path": "/metadata/annotations"}]' || true + # finally remove annotation to allow argo to sync again + kubectl patch app kubezero -n argocd --type json -p='[{"op": "remove", "path": "/metadata/annotations"}]' || true + fi + + # Local node upgrade + render_kubeadm upgrade + + pre_kubeadm + + _kubeadm init phase upload-config kubeadm + + _kubeadm upgrade apply $KUBE_VERSION + + post_kubeadm + + # install re-certed kubectl config for root + cp ${HOSTFS}/etc/kubernetes/super-admin.conf ${HOSTFS}/root/.kube/config + + echo "Successfully upgraded KubeZero control plane to $KUBE_VERSION using kubeadm." + + elif [[ "$CMD" =~ ^(final)$ ]]; then + render_kubeadm upgrade + + # Finally upgrade addons last, with 1.32 we can ONLY call addon phase + #_kubeadm upgrade apply phase addon all $KUBE_VERSION + _kubeadm upgrade apply $KUBE_VERSION + + echo "Upgraded addons and applied final migrations" fi - # Local node upgrade - render_kubeadm upgrade - - pre_kubeadm - - # Upgrade - we upload the new config first so we can use --patch during 1.30 - _kubeadm init phase upload-config kubeadm - - kubeadm upgrade apply --yes --patches /etc/kubernetes/patches $KUBE_VERSION --rootfs ${HOSTFS} $LOG - - post_kubeadm - - # install re-certed kubectl config for root - cp ${HOSTFS}/etc/kubernetes/super-admin.conf ${HOSTFS}/root/.kube/config - - # post upgrade - # Cleanup after kubeadm on the host rm -rf ${HOSTFS}/etc/kubernetes/tmp - echo "Successfully upgraded kubeadm control plane." - - # TODO - # Send Notification currently done via CloudBender -> SNS -> Slack - # Better deploy https://github.com/opsgenie/kubernetes-event-exporter and set proper routes and labels on this Job - - # Removed: - # - update oidc do we need that ? } @@ -200,6 +193,10 @@ control_plane_node() { # Put PKI in place cp -r ${WORKDIR}/pki ${HOSTFS}/etc/kubernetes + ### 1.31 only to clean up previous aws-iam-auth certs + rm -f ${HOSTFS}/etc/kubernetes/pki/aws-iam-authenticator.key ${HOSTFS}/etc/kubernetes/pki/aws-iam-authenticator.crt + ### + # Always use kubeadm kubectl config to never run into chicken egg with custom auth hooks cp ${WORKDIR}/super-admin.conf ${HOSTFS}/root/.kube/config @@ -220,7 +217,7 @@ control_plane_node() { rm -f ${HOSTFS}/etc/kubernetes/pki/etcd/peer.* ${HOSTFS}/etc/kubernetes/pki/etcd/server.* ${HOSTFS}/etc/kubernetes/pki/etcd/healthcheck-client.* \ ${HOSTFS}/etc/kubernetes/pki/apiserver* ${HOSTFS}/etc/kubernetes/pki/front-proxy-client.* - # Issue all certs first, needed for eg. aws-iam-authenticator setup + # Issue all certs first _kubeadm init phase certs all pre_kubeadm @@ -286,6 +283,9 @@ control_plane_node() { -endpoint https://${ETCD_NODENAME}:2379 \ change-provider-id ${NODENAME} $PROVIDER_ID fi + + # update node label for single node control plane + kubectl label node $NODENAME "node.kubernetes.io/kubezero.version=$KUBE_VERSION" --overwrite=true fi _kubeadm init phase upload-config all @@ -305,17 +305,6 @@ control_plane_node() { _kubeadm init phase addon all fi - # Ensure aws-iam-authenticator secret is in place - if [ "$AWS_IAM_AUTH" == "true" ]; then - kubectl get secrets -n kube-system aws-iam-certs || \ - kubectl create secret generic aws-iam-certs -n kube-system \ - --from-file=key.pem=${HOSTFS}/etc/kubernetes/pki/aws-iam-authenticator.key \ - --from-file=cert.pem=${HOSTFS}/etc/kubernetes/pki/aws-iam-authenticator.crt - - # Store aws-iam-auth admin on SSM - yq eval -M ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/ca.crt | base64 -w0)\"" ${WORKDIR}/kubeadm/templates/admin-aws-iam.yaml > ${HOSTFS}/etc/kubernetes/admin-aws-iam.yaml - fi - post_kubeadm echo "${CMD}ed cluster $CLUSTERNAME successfully." @@ -413,7 +402,17 @@ for t in $@; do restore) control_plane_node restore;; kubeadm_upgrade) ARGOCD=$(argo_used) - kubeadm_upgrade;; + # call hooks + pre_control_plane_upgrade_cluster + control_plane_upgrade cluster + post_control_plane_upgrade_cluster + ;; + finalize_cluster_upgrade) + ARGOCD=$(argo_used) + pre_cluster_upgrade_final + control_plane_upgrade final + post_cluster_upgrade_final + ;; apply_*) ARGOCD=$(argo_used) apply_module "${t##apply_}";; diff --git a/admin/libhelm.sh b/admin/libhelm.sh index 27aa0758..9d4fa19f 100644 --- a/admin/libhelm.sh +++ b/admin/libhelm.sh @@ -148,7 +148,7 @@ for manifest in yaml.safe_load_all(sys.stdin): # helm template | kubectl apply -f - # confine to one namespace if possible function render() { - helm secrets --evaluate-templates template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --skip-crds -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION $@ \ + helm secrets --evaluate-templates template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --skip-tests --skip-crds -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION $@ \ | python3 -c ' #!/usr/bin/python3 import yaml diff --git a/admin/upgrade_cluster.sh b/admin/upgrade_cluster.sh index ab538728..d61507d1 100755 --- a/admin/upgrade_cluster.sh +++ b/admin/upgrade_cluster.sh @@ -19,22 +19,26 @@ echo "Checking that all pods in kube-system are running ..." [ "$ARGOCD" == "True" ] && disable_argo -control_plane_upgrade kubeadm_upgrade +# Check if we already have all controllers on the current version +OLD_CONTROLLERS=$(kubectl get nodes -l "node-role.kubernetes.io/control-plane=" --no-headers=true | grep -cv $KUBE_VERSION || true) -echo "Control plane upgraded, to continue" -read -r +# All controllers already on current version +if [ "$OLD_CONTROLLERS" == "0" ]; then + control_plane_upgrade finalize_cluster_upgrade + exit + +# Otherwise run control plane upgrade +else + control_plane_upgrade kubeadm_upgrade + echo " to continue" + read -r +fi #echo "Adjust kubezero values as needed:" # shellcheck disable=SC2015 #[ "$ARGOCD" == "True" ] && kubectl edit app kubezero -n argocd || kubectl edit cm kubezero-values -n kubezero -### v1.31 - # upgrade modules -# -# Preload cilium images to running nodes, disabled till 1.31 -# all_nodes_upgrade "chroot /host crictl pull quay.io/cilium/cilium:v1.16.3; chroot /host crictl pull ghcr.io/k8snetworkplumbingwg/multus-cni:v3.9.3" - control_plane_upgrade "apply_network, apply_addons, apply_storage, apply_operators" echo "Checking that all pods in kube-system are running ..." @@ -45,8 +49,7 @@ echo "Applying remaining KubeZero modules..." control_plane_upgrade "apply_cert-manager, apply_istio, apply_istio-ingress, apply_istio-private-ingress, apply_logging, apply_metrics, apply_telemetry, apply_argo" # Final step is to commit the new argocd kubezero app -# remove the del(.spec.source.helm.values) with 1.31 -kubectl get app kubezero -n argocd -o yaml | yq 'del(.spec.source.helm.values) | del(.status) | del(.metadata) | del(.operation) | .metadata.name="kubezero" | .metadata.namespace="argocd"' | yq 'sort_keys(..)' > $ARGO_APP +kubectl get app kubezero -n argocd -o yaml | del(.status) | del(.metadata) | del(.operation) | .metadata.name="kubezero" | .metadata.namespace="argocd"' | yq 'sort_keys(..)' > $ARGO_APP # Trigger backup of upgraded cluster state kubectl create job --from=cronjob/kubezero-backup kubezero-backup-$KUBE_VERSION -n kube-system diff --git a/charts/kubeadm/Chart.yaml b/charts/kubeadm/Chart.yaml index 6af3c51e..b167a8ba 100644 --- a/charts/kubeadm/Chart.yaml +++ b/charts/kubeadm/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubeadm description: KubeZero Kubeadm cluster config type: application -version: 1.31.4 +version: 1.31.5 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: diff --git a/charts/kubeadm/templates/ClusterConfiguration.yaml b/charts/kubeadm/templates/ClusterConfiguration.yaml index 16ef0a91..7461ee42 100644 --- a/charts/kubeadm/templates/ClusterConfiguration.yaml +++ b/charts/kubeadm/templates/ClusterConfiguration.yaml @@ -1,4 +1,4 @@ -apiVersion: kubeadm.k8s.io/v1beta3 +apiVersion: kubeadm.k8s.io/v1beta4 kind: ClusterConfiguration kubernetesVersion: {{ .Chart.Version }} clusterName: {{ .Values.global.clusterName }} @@ -11,20 +11,33 @@ etcd: local: # imageTag: 3.5.12-0 extraArgs: + - name: advertise-client-urls + value: https://{{ .Values.etcd.nodeName }}:2379 + - name: initial-advertise-peer-urls + value: https://{{ .Values.etcd.nodeName }}:2380 + - name: initial-cluster + value: {{ include "kubeadm.etcd.initialCluster" .Values.etcd | quote }} + - name: initial-cluster-state + value: {{ .Values.etcd.state }} + - name: initial-cluster-token + value: etcd-{{ .Values.global.clusterName }} + - name: name + value: {{ .Values.etcd.nodeName }} + - name: listen-peer-urls + value: https://{{ .Values.listenAddress }}:2380 + - name: listen-client-urls + value: https://{{ .Values.listenAddress }}:2379 + - name: listen-metrics-urls + value: http://0.0.0.0:2381 + - name: logger + value: zap + - name: log-level + value: warn ### DNS discovery - #discovery-srv: {{ .Values.domain }} - #discovery-srv-name: {{ .Values.global.clusterName }} - advertise-client-urls: https://{{ .Values.etcd.nodeName }}:2379 - initial-advertise-peer-urls: https://{{ .Values.etcd.nodeName }}:2380 - initial-cluster: {{ include "kubeadm.etcd.initialCluster" .Values.etcd | quote }} - initial-cluster-state: {{ .Values.etcd.state }} - initial-cluster-token: etcd-{{ .Values.global.clusterName }} - name: {{ .Values.etcd.nodeName }} - listen-peer-urls: https://{{ .Values.listenAddress }}:2380 - listen-client-urls: https://{{ .Values.listenAddress }}:2379 - listen-metrics-urls: http://0.0.0.0:2381 - logger: zap - # log-level: "warn" + #- name: discovery-srv + # value: {{ .Values.domain }} + #- name: discovery-srv-name + # value: {{ .Values.global.clusterName }} {{- with .Values.etcd.extraArgs }} {{- toYaml . | nindent 6 }} {{- end }} @@ -38,49 +51,82 @@ etcd: - "{{ .Values.domain }}" controllerManager: extraArgs: - profiling: "false" - terminated-pod-gc-threshold: "300" - leader-elect: {{ .Values.global.highAvailable | quote }} - logging-format: json - feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }} + - name: profiling + value: "false" + - name: terminated-pod-gc-threshold + value: "300" + - name: leader-elect + value: {{ .Values.global.highAvailable | quote }} + - name: logging-format + value: json + - name: feature-gates + value: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }} scheduler: extraArgs: - profiling: "false" - leader-elect: {{ .Values.global.highAvailable | quote }} - logging-format: json - feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }} + - name: feature-gates + value: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }} + - name: leader-elect + value: {{ .Values.global.highAvailable | quote }} + - name: logging-format + value: json + - name: profiling + value: "false" apiServer: certSANs: - {{ regexSplit ":" .Values.api.endpoint -1 | first }} extraArgs: - etcd-servers: {{ .Values.api.etcdServers }} - profiling: "false" - audit-log-path: "/var/log/kubernetes/audit.log" - audit-policy-file: /etc/kubernetes/apiserver/audit-policy.yaml - audit-log-maxage: "7" - audit-log-maxsize: "100" - audit-log-maxbackup: "1" - audit-log-compress: "true" + - name: profiling + value: "false" + - name: etcd-servers + value: {{ .Values.api.etcdServers }} + - name: audit-log-path + value: /var/log/kubernetes/audit.log + - name: audit-policy-file + value: /etc/kubernetes/apiserver/audit-policy.yaml + - name: audit-log-maxage + value: "7" + - name: audit-log-maxsize + value: "100" + - name: audit-log-maxbackup + value: "1" + - name: audit-log-compress + value: "true" {{- if .Values.api.falco.enabled }} - audit-webhook-config-file: /etc/kubernetes/apiserver/audit-webhook.yaml + - name: audit-webhook-config-file + value: /etc/kubernetes/apiserver/audit-webhook.yaml {{- end }} - tls-cipher-suites: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384" - admission-control-config-file: /etc/kubernetes/apiserver/admission-configuration.yaml - api-audiences: {{ .Values.api.apiAudiences }} + - name: tls-cipher-suites + value: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384" + - name: admission-control-config-file + value: /etc/kubernetes/apiserver/admission-configuration.yaml + - name: api-audiences + value: {{ .Values.api.apiAudiences }} {{- if .Values.api.serviceAccountIssuer }} - service-account-issuer: "{{ .Values.api.serviceAccountIssuer }}" - service-account-jwks-uri: "{{ .Values.api.serviceAccountIssuer }}/openid/v1/jwks" + - name: service-account-issuer + value: "{{ .Values.api.serviceAccountIssuer }}" + - name: service-account-jwks-uri + value: "{{ .Values.api.serviceAccountIssuer }}/openid/v1/jwks" {{- end }} - {{- if .Values.api.awsIamAuth.enabled }} - authentication-token-webhook-config-file: /etc/kubernetes/apiserver/aws-iam-authenticator.yaml - authentication-token-webhook-cache-ttl: 3600s + {{- if .Values.api.awsIamAuth }} + - name: authentication-token-webhook-config-file + value: /etc/kubernetes/apiserver/aws-iam-authenticator.yaml + - name: authentication-token-webhook-cache-ttl + value: 3600s + - name: authentication-token-webhook-version + value: v1 {{- end }} - feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }} - enable-admission-plugins: DenyServiceExternalIPs,NodeRestriction,EventRateLimit,ExtendedResourceToleration + - name: feature-gates + value: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }} + - name: authorization-config + value: /etc/kubernetes/apiserver/authz-config.yaml + - name: enable-admission-plugins + value: DenyServiceExternalIPs,NodeRestriction,EventRateLimit,ExtendedResourceToleration {{- if .Values.global.highAvailable }} - goaway-chance: ".001" + - name: goaway-chance + value: ".001" {{- end }} - logging-format: json + - name: logging-format + value: json {{- with .Values.api.extraArgs }} {{- toYaml . | nindent 4 }} {{- end }} diff --git a/charts/kubeadm/templates/InitConfiguration.yaml b/charts/kubeadm/templates/InitConfiguration.yaml index 6f8b3caa..5eab7ba2 100644 --- a/charts/kubeadm/templates/InitConfiguration.yaml +++ b/charts/kubeadm/templates/InitConfiguration.yaml @@ -1,4 +1,4 @@ -apiVersion: kubeadm.k8s.io/v1beta3 +apiVersion: kubeadm.k8s.io/v1beta4 kind: InitConfiguration localAPIEndpoint: advertiseAddress: {{ .Values.listenAddress }} @@ -17,10 +17,13 @@ nodeRegistration: - Swap - KubeletVersion kubeletExtraArgs: - node-labels: {{ .Values.nodeLabels | quote }} + - name: node-labels + value: {{ .Values.nodeLabels | quote }} {{- with .Values.providerID }} - provider-id: {{ . }} + - name: provider-id + value: {{ . }} {{- end }} {{- if ne .Values.listenAddress "0.0.0.0" }} - node-ip: {{ .Values.listenAddress }} + - name: node-ip + value: {{ .Values.listenAddress }} {{- end }} diff --git a/charts/kubeadm/templates/UpgradeConfiguration.yaml b/charts/kubeadm/templates/UpgradeConfiguration.yaml new file mode 100644 index 00000000..5e736a26 --- /dev/null +++ b/charts/kubeadm/templates/UpgradeConfiguration.yaml @@ -0,0 +1,16 @@ +apiVersion: kubeadm.k8s.io/v1beta4 +kind: UpgradeConfiguration +apply: + forceUpgrade: true + {{- with .Values.patches }} + patches: + directory: {{ . }} + {{- end }} + ignorePreflightErrors: + - DirAvailable--var-lib-etcd + - DirAvailable--etc-kubernetes-manifests + - FileAvailable--etc-kubernetes-pki-ca.crt + - FileAvailable--etc-kubernetes-manifests-etcd.yaml + - Swap + - KubeletVersion + skipPhases: [] diff --git a/charts/kubeadm/templates/admin-aws-iam.yaml b/charts/kubeadm/templates/admin-aws-iam.yaml deleted file mode 100644 index fe316866..00000000 --- a/charts/kubeadm/templates/admin-aws-iam.yaml +++ /dev/null @@ -1,27 +0,0 @@ -{{- if .Values.api.awsIamAuth.enabled }} -apiVersion: v1 -kind: Config -clusters: -- cluster: - server: https://{{ .Values.api.endpoint }} - name: {{ .Values.global.clusterName }} -contexts: -- context: - cluster: {{ .Values.global.clusterName }} - user: kubernetes-admin - name: kubernetes-admin@{{ .Values.global.clusterName }} -current-context: kubernetes-admin@{{ .Values.global.clusterName }} -preferences: {} -users: -- name: kubernetes-admin - user: - exec: - apiVersion: client.authentication.k8s.io/v1beta1 - command: aws-iam-authenticator - args: - - "token" - - "-i" - - "{{ .Values.global.clusterName }}" - - "-r" - - "{{ .Values.api.awsIamAuth.kubeAdminRole }}" -{{- end }} diff --git a/charts/kubeadm/templates/apiserver/authz-config.yaml b/charts/kubeadm/templates/apiserver/authz-config.yaml new file mode 100644 index 00000000..7b625227 --- /dev/null +++ b/charts/kubeadm/templates/apiserver/authz-config.yaml @@ -0,0 +1,32 @@ +apiVersion: apiserver.config.k8s.io/v1beta1 +kind: AuthorizationConfiguration +authorizers: + - type: Node + name: node + - type: RBAC + name: rbac +# - type: Webhook +# name: Example +# webhook: +# authorizedTTL: 300s +# unauthorizedTTL: 30s +# timeout: 3s +# subjectAccessReviewVersion: v1 +# matchConditionSubjectAccessReviewVersion: v1 +# failurePolicy: NoOpinion +# connectionInfo: +# type: KubeConfigFile +# kubeConfigFile: /etc/kubernetes/apiserver/example.yaml +# matchConditions: +# # only send resource requests to the webhook +# - expression: has(request.resourceAttributes) +# # Don't intercept requests from kube-system service accounts +# - expression: "!('system:serviceaccounts:kube-system' in request.groups)" +# ## Below expressions avoid issues with kubeadm init and other system components that should be authorized by Node and RBAC +# # Don't process node and bootstrap token requests with the webhook +# - expression: "!('system:nodes' in request.groups)" +# - expression: "!('system:bootstrappers' in request.groups)" +# - expression: "!('system:bootstrappers:kubeadm:default-node-token' in request.groups)" +# # Don't process kubeadm requests with the webhook +# - expression: "!('kubeadm:cluster-admins' in request.groups)" +# - expression: "!('system:masters' in request.groups)" diff --git a/charts/kubeadm/templates/apiserver/aws-iam-authenticator.yaml b/charts/kubeadm/templates/apiserver/aws-iam-authenticator.yaml deleted file mode 100644 index 1a6818a2..00000000 --- a/charts/kubeadm/templates/apiserver/aws-iam-authenticator.yaml +++ /dev/null @@ -1,19 +0,0 @@ -{{- if .Values.api.awsIamAuth.enabled }} -# clusters refers to the remote service. -clusters: - - name: aws-iam-authenticator - cluster: - certificate-authority-data: "replaced at runtime" - server: https://localhost:21362/authenticate -# users refers to the API Server's webhook configuration -# (we don't need to authenticate the API server). -users: - - name: apiserver -# kubeconfig files require a context. Provide one for the API Server. -current-context: webhook -contexts: -- name: webhook - context: - cluster: aws-iam-authenticator - user: apiserver -{{- end }} diff --git a/charts/kubeadm/templates/resources/50-aws-iam-authenticator-crds.yaml b/charts/kubeadm/templates/resources/50-aws-iam-authenticator-crds.yaml deleted file mode 100644 index be771953..00000000 --- a/charts/kubeadm/templates/resources/50-aws-iam-authenticator-crds.yaml +++ /dev/null @@ -1,46 +0,0 @@ -{{- if .Values.api.awsIamAuth.enabled }} -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: iamidentitymappings.iamauthenticator.k8s.aws -spec: - group: iamauthenticator.k8s.aws - scope: Cluster - names: - plural: iamidentitymappings - singular: iamidentitymapping - kind: IAMIdentityMapping - categories: - - all - versions: - - name: v1alpha1 - served: true - storage: true - schema: - openAPIV3Schema: - type: object - properties: - spec: - type: object - required: - - arn - - username - properties: - arn: - type: string - username: - type: string - groups: - type: array - items: - type: string - status: - type: object - properties: - canonicalARN: - type: string - userID: - type: string - subresources: - status: {} -{{- end }} diff --git a/charts/kubeadm/templates/resources/51-aws-iam-authenticator-deployment.yaml b/charts/kubeadm/templates/resources/51-aws-iam-authenticator-deployment.yaml deleted file mode 100644 index a8f3c0c2..00000000 --- a/charts/kubeadm/templates/resources/51-aws-iam-authenticator-deployment.yaml +++ /dev/null @@ -1,155 +0,0 @@ -{{- if .Values.api.awsIamAuth.enabled }} -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: aws-iam-authenticator -rules: -- apiGroups: - - iamauthenticator.k8s.aws - resources: - - iamidentitymappings - verbs: - - get - - list - - watch -- apiGroups: - - iamauthenticator.k8s.aws - resources: - - iamidentitymappings/status - verbs: - - patch - - update -- apiGroups: - - "" - resources: - - events - verbs: - - create - - update - - patch -- apiGroups: - - "" - resources: - - configmaps - verbs: - - list - - watch -- apiGroups: - - "" - resources: - - configmaps - resourceNames: - - aws-auth - verbs: - - get - ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: aws-iam-authenticator - namespace: kube-system - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: aws-iam-authenticator - namespace: kube-system -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: aws-iam-authenticator -subjects: -- kind: ServiceAccount - name: aws-iam-authenticator - namespace: kube-system - ---- -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: kube-system - name: aws-iam-authenticator - labels: - k8s-app: aws-iam-authenticator -data: - config.yaml: | - clusterID: {{ .Values.global.clusterName }} - ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - namespace: kube-system - name: aws-iam-authenticator - labels: - k8s-app: aws-iam-authenticator -spec: - selector: - matchLabels: - k8s-app: aws-iam-authenticator - updateStrategy: - type: RollingUpdate - template: - metadata: - labels: - k8s-app: aws-iam-authenticator - spec: - securityContext: - seccompProfile: - type: RuntimeDefault - - priorityClassName: system-cluster-critical - - # use service account with access to - serviceAccountName: aws-iam-authenticator - - # run on the host network (don't depend on CNI) - hostNetwork: true - - # run on each controller - nodeSelector: - node-role.kubernetes.io/control-plane: "" - tolerations: - - effect: NoSchedule - key: node-role.kubernetes.io/control-plane - - containers: - - name: aws-iam-authenticator - image: public.ecr.aws/zero-downtime/aws-iam-authenticator:v0.6.27 - args: - - server - - --backend-mode=CRD,MountedFile - - --config=/etc/aws-iam-authenticator/config.yaml - - --state-dir=/var/aws-iam-authenticator - - --kubeconfig-pregenerated=true - - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - - resources: - requests: - memory: 32Mi - cpu: 10m - limits: - memory: 64Mi - #cpu: 100m - - volumeMounts: - - name: config - mountPath: /etc/aws-iam-authenticator/ - - name: state - mountPath: /var/aws-iam-authenticator/ - - volumes: - - name: config - configMap: - name: aws-iam-authenticator - - name: state - secret: - secretName: aws-iam-certs -{{- end }} diff --git a/charts/kubeadm/templates/resources/52-aws-iam-authenticator-mappings.yaml b/charts/kubeadm/templates/resources/52-aws-iam-authenticator-mappings.yaml deleted file mode 100644 index 31ef6e35..00000000 --- a/charts/kubeadm/templates/resources/52-aws-iam-authenticator-mappings.yaml +++ /dev/null @@ -1,23 +0,0 @@ -{{- if .Values.api.awsIamAuth.enabled }} -apiVersion: iamauthenticator.k8s.aws/v1alpha1 -kind: IAMIdentityMapping -metadata: - name: kubezero-worker-nodes -spec: - arn: {{ .Values.api.awsIamAuth.workerNodeRole }} - username: system:node:{{ "{{" }}EC2PrivateDNSName{{ "}}" }} - groups: - - system:bootstrappers:kubeadm:default-node-token ---- - -# Admin Role for remote access -apiVersion: iamauthenticator.k8s.aws/v1alpha1 -kind: IAMIdentityMapping -metadata: - name: kubernetes-admin -spec: - arn: {{ .Values.api.awsIamAuth.kubeAdminRole }} - username: kubernetes-admin - groups: - - system:masters -{{- end }} diff --git a/charts/kubeadm/values.yaml b/charts/kubeadm/values.yaml index 670d2f5a..5ac977dd 100644 --- a/charts/kubeadm/values.yaml +++ b/charts/kubeadm/values.yaml @@ -20,10 +20,7 @@ api: oidcEndpoint: "" apiAudiences: "istio-ca" - awsIamAuth: - enabled: false - workerNodeRole: "arn:aws:iam::000000000000:role/KubernetesNode" - kubeAdminRole: "arn:aws:iam::000000000000:role/KubernetesNode" + awsIamAuth: false falco: enabled: false diff --git a/charts/kubezero-addons/Chart.yaml b/charts/kubezero-addons/Chart.yaml index 42d6cab3..4a585b96 100644 --- a/charts/kubezero-addons/Chart.yaml +++ b/charts/kubezero-addons/Chart.yaml @@ -54,4 +54,4 @@ dependencies: version: 0.2.12 repository: https://caas-team.github.io/helm-charts/ condition: py-kube-downscaler.enabled -kubeVersion: ">= 1.26.0" +kubeVersion: ">= 1.30.0-0" diff --git a/charts/kubezero-addons/README.md b/charts/kubezero-addons/README.md index b6abbd16..cdead9e8 100644 --- a/charts/kubezero-addons/README.md +++ b/charts/kubezero-addons/README.md @@ -1,6 +1,6 @@ # kubezero-addons -![Version: 0.8.11](https://img.shields.io/badge/Version-0.8.11-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.30](https://img.shields.io/badge/AppVersion-v1.30-informational?style=flat-square) +![Version: 0.8.13](https://img.shields.io/badge/Version-0.8.13-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.30](https://img.shields.io/badge/AppVersion-v1.30-informational?style=flat-square) KubeZero umbrella chart for various optional cluster addons @@ -18,13 +18,13 @@ Kubernetes: `>= 1.26.0` | Repository | Name | Version | |------------|------|---------| -| https://bitnami-labs.github.io/sealed-secrets | sealed-secrets | 2.16.2 | -| https://caas-team.github.io/helm-charts/ | py-kube-downscaler | 0.2.11 | -| https://kubernetes-sigs.github.io/external-dns/ | external-dns | 1.15.0 | -| https://kubernetes.github.io/autoscaler | cluster-autoscaler | 9.43.2 | +| https://bitnami-labs.github.io/sealed-secrets | sealed-secrets | 2.17.1 | +| https://caas-team.github.io/helm-charts/ | py-kube-downscaler | 0.2.12 | +| https://kubernetes-sigs.github.io/external-dns/ | external-dns | 1.15.1 | +| https://kubernetes.github.io/autoscaler | cluster-autoscaler | 9.46.0 | | https://nvidia.github.io/k8s-device-plugin | nvidia-device-plugin | 0.17.0 | | https://twin.github.io/helm-charts | aws-eks-asg-rolling-update-handler | 1.5.0 | -| oci://public.ecr.aws/aws-ec2/helm | aws-node-termination-handler | 0.24.1 | +| oci://public.ecr.aws/aws-ec2/helm | aws-node-termination-handler | 0.26.0 | | oci://public.ecr.aws/neuron | neuron-helm-chart | 1.1.1 | # MetalLB diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml index 52ac19fa..c14d15f5 100644 --- a/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -appVersion: 1.22.1 +appVersion: 1.24.0 description: A Helm chart for the AWS Node Termination Handler. home: https://github.com/aws/aws-node-termination-handler/ icon: https://raw.githubusercontent.com/aws/eks-charts/master/docs/logo/aws.png @@ -21,4 +21,4 @@ name: aws-node-termination-handler sources: - https://github.com/aws/aws-node-termination-handler/ type: application -version: 0.24.1 +version: 0.26.0 diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml index 95e4b50f..be6385de 100644 --- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml @@ -143,6 +143,8 @@ spec: {{- end }} - name: ENABLE_SPOT_INTERRUPTION_DRAINING value: {{ .Values.enableSpotInterruptionDraining | quote }} + - name: ENABLE_ASG_LIFECYCLE_DRAINING + value: {{ .Values.enableASGLifecycleDraining | quote }} - name: ENABLE_SCHEDULED_EVENT_DRAINING value: {{ .Values.enableScheduledEventDraining | quote }} - name: ENABLE_REBALANCE_MONITORING diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml index 8a9db7bf..95af69d1 100644 --- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml @@ -143,6 +143,8 @@ spec: {{- end }} - name: ENABLE_SPOT_INTERRUPTION_DRAINING value: {{ .Values.enableSpotInterruptionDraining | quote }} + - name: ENABLE_ASG_LIFECYCLE_DRAINING + value: {{ .Values.enableASGLifecycleDraining | quote }} - name: ENABLE_SCHEDULED_EVENT_DRAINING value: {{ .Values.enableScheduledEventDraining | quote }} - name: ENABLE_REBALANCE_MONITORING diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml index d85b08a1..c0e2d285 100644 --- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml @@ -168,6 +168,10 @@ spec: value: {{ .Values.deleteSqsMsgIfNodeNotFound | quote }} - name: WORKERS value: {{ .Values.workers | quote }} + - name: HEARTBEAT_INTERVAL + value: {{ .Values.heartbeatInterval | quote }} + - name: HEARTBEAT_UNTIL + value: {{ .Values.heartbeatUntil | quote }} {{- with .Values.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml index 2d6c84f2..f6c7bf42 100644 --- a/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml @@ -270,6 +270,9 @@ metadataTries: 3 # enableSpotInterruptionDraining If false, do not drain nodes when the spot interruption termination notice is received. Only used in IMDS mode. enableSpotInterruptionDraining: true +# enableASGLifecycleDraining If false, do not drain nodes when ASG target lifecycle state Terminated is received. Only used in IMDS mode. +enableASGLifecycleDraining: true + # enableScheduledEventDraining If false, do not drain nodes before the maintenance window starts for an EC2 instance scheduled event. Only used in IMDS mode. enableScheduledEventDraining: true diff --git a/charts/kubezero-addons/values.yaml b/charts/kubezero-addons/values.yaml index fd494e25..337ead08 100644 --- a/charts/kubezero-addons/values.yaml +++ b/charts/kubezero-addons/values.yaml @@ -134,7 +134,7 @@ aws-node-termination-handler: value: "regional" enablePrometheusServer: false - podMonitor: + serviceMonitor: create: false jsonLogging: true @@ -146,9 +146,6 @@ aws-node-termination-handler: nodeSelector: node-role.kubernetes.io/control-plane: "" - rbac: - pspEnabled: false - fuseDevicePlugin: enabled: false image: @@ -206,7 +203,7 @@ cluster-autoscaler: image: repository: registry.k8s.io/autoscaling/cluster-autoscaler - tag: v1.30.2 + tag: v1.31.1 autoDiscovery: clusterName: "" diff --git a/charts/kubezero-ci/values.yaml b/charts/kubezero-ci/values.yaml index 485cc7d8..f2c447b0 100644 --- a/charts/kubezero-ci/values.yaml +++ b/charts/kubezero-ci/values.yaml @@ -192,6 +192,8 @@ jenkins: annotations: container.apparmor.security.beta.kubernetes.io/jnlp: "unconfined" cluster-autoscaler.kubernetes.io/safe-to-evict: "false" + garbageCollection: + enabled: true customJenkinsLabels: - podman-aws-trivy idleMinutes: 30 diff --git a/charts/kubezero-graph/templates/neo4j/secrets.yaml b/charts/kubezero-graph/templates/neo4j/secrets.yaml new file mode 100644 index 00000000..d6fd5b98 --- /dev/null +++ b/charts/kubezero-graph/templates/neo4j/secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: neo4j-admin + labels: + {{- include "kubezero-lib.labels" . | nindent 4 }} +type: Opaque +data: + NEO4J_AUTH: {{.Values.neo4j.neo4j.password | printf "neo4j/%v" | b64enc -}} diff --git a/charts/kubezero-graph/values.yaml b/charts/kubezero-graph/values.yaml index d3bacc89..c15c4441 100644 --- a/charts/kubezero-graph/values.yaml +++ b/charts/kubezero-graph/values.yaml @@ -6,6 +6,9 @@ neo4j: neo4j: name: test-db + password: secret + passwordFromSecret: "neo4j-admin" + volumes: data: mode: defaultStorageClass diff --git a/charts/kubezero-istio-gateway/templates/bootstrap-config.yaml b/charts/kubezero-istio-gateway/templates/bootstrap-config.yaml index 9e3c88f9..28ecf1b9 100644 --- a/charts/kubezero-istio-gateway/templates/bootstrap-config.yaml +++ b/charts/kubezero-istio-gateway/templates/bootstrap-config.yaml @@ -51,7 +51,7 @@ data: { "name": "static_layer_0", "staticLayer": { - "overload.global_downstream_max_connections": 50000 + "envoy.resource_monitors.downstream_connections": 50000 } } ] diff --git a/charts/kubezero-logging/charts/fluentd/templates/tests/test-connection.yaml b/charts/kubezero-logging/charts/fluentd/templates/tests/test-connection.yaml index b6b3befa..2b07ec4f 100644 --- a/charts/kubezero-logging/charts/fluentd/templates/tests/test-connection.yaml +++ b/charts/kubezero-logging/charts/fluentd/templates/tests/test-connection.yaml @@ -4,7 +4,6 @@ fluentd is deployed with the default values If the fluentd config is overriden and the metrics server removed this will fail. */}} -{{- if .Values.testFramework.enabled }} {{ if empty .Values.service.ports }} apiVersion: v1 kind: Pod @@ -27,5 +26,4 @@ spec: while :; do nc -vz {{ include "fluentd.fullname" . }}:24231 && break; sleep 1; done wget '{{ include "fluentd.fullname" . }}:24231/metrics' restartPolicy: Never -{{ end }} -{{- end }} +{{ end }} \ No newline at end of file diff --git a/charts/kubezero-logging/charts/fluentd/values.yaml b/charts/kubezero-logging/charts/fluentd/values.yaml index c41c381f..ac05f8b2 100644 --- a/charts/kubezero-logging/charts/fluentd/values.yaml +++ b/charts/kubezero-logging/charts/fluentd/values.yaml @@ -13,9 +13,6 @@ image: pullPolicy: "IfNotPresent" tag: "" -testFramework: - enabled: false - ## Optional array of imagePullSecrets containing private registry credentials ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] diff --git a/charts/kubezero-logging/fluentd.patch b/charts/kubezero-logging/fluentd.patch index f6e11db5..f880f5f8 100644 --- a/charts/kubezero-logging/fluentd.patch +++ b/charts/kubezero-logging/fluentd.patch @@ -9,36 +9,3 @@ diff -rtuN charts/fluentd.orig/templates/fluentd-configurations-cm.yaml charts/f + {{- (tpl $value $) | nindent 4 }} {{- end }} {{- end }} - -diff -rtuN charts/fluentd.orig/templates/tests/test-connection.yaml charts/fluentd/templates/tests/test-connection.yaml ---- charts/fluentd.orig/templates/tests/test-connection.yaml 2024-04-08 11:00:03.030515998 +0000 -+++ charts/fluentd/templates/tests/test-connection.yaml 2024-04-08 11:03:16.254774985 +0000 -@@ -4,6 +4,7 @@ - If the fluentd config is overriden and the metrics server removed - this will fail. - */}} -+{{- if .Values.testFramework.enabled }} - {{ if empty .Values.service.ports }} - apiVersion: v1 - kind: Pod -@@ -26,4 +27,5 @@ - while :; do nc -vz {{ include "fluentd.fullname" . }}:24231 && break; sleep 1; done - wget '{{ include "fluentd.fullname" . }}:24231/metrics' - restartPolicy: Never --{{ end }} -\ No newline at end of file -+{{ end }} -+{{- end }} -diff -rtuN charts/fluentd.orig/values.yaml charts/fluentd/values.yaml ---- charts/fluentd.orig/values.yaml 2024-04-08 11:00:03.030515998 +0000 -+++ charts/fluentd/values.yaml 2024-04-08 11:00:03.040516045 +0000 -@@ -13,6 +13,9 @@ - pullPolicy: "IfNotPresent" - tag: "" - -+testFramework: -+ enabled: false -+ - ## Optional array of imagePullSecrets containing private registry credentials - ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ - imagePullSecrets: [] diff --git a/charts/kubezero-network/Chart.yaml b/charts/kubezero-network/Chart.yaml index 3bb7f94c..45e77251 100644 --- a/charts/kubezero-network/Chart.yaml +++ b/charts/kubezero-network/Chart.yaml @@ -30,4 +30,4 @@ dependencies: version: 1.23.0 repository: https://haproxytech.github.io/helm-charts condition: haproxy.enabled -kubeVersion: ">= 1.26.0" +kubeVersion: ">= 1.29.0-0" diff --git a/charts/kubezero-network/README.md b/charts/kubezero-network/README.md index 77bac577..852f2e7d 100644 --- a/charts/kubezero-network/README.md +++ b/charts/kubezero-network/README.md @@ -1,6 +1,6 @@ # kubezero-network -![Version: 0.5.6](https://img.shields.io/badge/Version-0.5.6-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.5.7](https://img.shields.io/badge/Version-0.5.7-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) KubeZero umbrella chart for all things network @@ -14,13 +14,13 @@ KubeZero umbrella chart for all things network ## Requirements -Kubernetes: `>= 1.26.0` +Kubernetes: `>= 1.29.0-0` | Repository | Name | Version | |------------|------|---------| | https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 | | https://haproxytech.github.io/helm-charts | haproxy | 1.23.0 | -| https://helm.cilium.io/ | cilium | 1.16.5 | +| https://helm.cilium.io/ | cilium | 1.16.6 | | https://metallb.github.io/metallb | metallb | 0.14.9 | ## Values diff --git a/charts/kubezero-network/templates/multus/daemonset.yaml b/charts/kubezero-network/templates/multus/daemonset.yaml index f1313ae5..989a55db 100644 --- a/charts/kubezero-network/templates/multus/daemonset.yaml +++ b/charts/kubezero-network/templates/multus/daemonset.yaml @@ -30,7 +30,8 @@ spec: image: {{ .Values.multus.image.repository }}:{{ .Values.multus.image.tag }} # Always used cached images imagePullPolicy: {{ .Values.multus.image.pullPolicy }} - command: ["/entrypoint.sh"] + #command: ["/entrypoint.sh"] + command: ["/thin_entrypoint"] args: - "--multus-conf-file=/tmp/multus-conf/00-multus.conf" - "--rename-conf-file=false" @@ -39,10 +40,10 @@ spec: resources: requests: cpu: "10m" - # memory: "64Mi" - # limits: + memory: "32Mi" + limits: # cpu: "100m" - # memory: "256Mi" + memory: "64Mi" securityContext: privileged: true capabilities: diff --git a/charts/kubezero-telemetry/README.md b/charts/kubezero-telemetry/README.md index d6e7dd06..655a802c 100644 --- a/charts/kubezero-telemetry/README.md +++ b/charts/kubezero-telemetry/README.md @@ -29,8 +29,10 @@ Kubernetes: `>= 1.26.0` | Key | Type | Default | Description | |-----|------|---------|-------------| +| data-prepper.config."data-prepper-config.yaml" | string | `"ssl: false\npeer_forwarder:\n ssl: false\n"` | | | data-prepper.config."log4j2-rolling.properties" | string | `"status = error\ndest = err\nname = PropertiesConfig\n\nappender.console.type = Console\nappender.console.name = STDOUT\nappender.console.layout.type = PatternLayout\nappender.console.layout.pattern = %d{ISO8601} [%t] %-5p %40C - %m%n\n\nrootLogger.level = warn\nrootLogger.appenderRef.stdout.ref = STDOUT\n\nlogger.pipeline.name = org.opensearch.dataprepper.pipeline\nlogger.pipeline.level = info\n\nlogger.parser.name = org.opensearch.dataprepper.parser\nlogger.parser.level = info\n\nlogger.plugins.name = org.opensearch.dataprepper.plugins\nlogger.plugins.level = info\n"` | | | data-prepper.enabled | bool | `false` | | +| data-prepper.image.tag | string | `"2.10.1"` | | | data-prepper.pipelineConfig.config.otel-service-map-pipeline.buffer.bounded_blocking | string | `nil` | | | data-prepper.pipelineConfig.config.otel-service-map-pipeline.delay | int | `3000` | | | data-prepper.pipelineConfig.config.otel-service-map-pipeline.processor[0].service_map.window_duration | int | `180` | | @@ -72,7 +74,7 @@ Kubernetes: `>= 1.26.0` | fluent-bit.config.inputs | string | `"[INPUT]\n Name tail\n Path /var/log/containers/*.log\n # Exclude ourselves to current error spam, https://github.com/fluent/fluent-bit/issues/5769\n # Todo: Rather limit / filter spam message than exclude all together -> ideally locally, next dataprepper\n Exclude_Path *logging-fluent-bit*\n multiline.parser cri\n Tag cri.*\n Skip_Long_Lines On\n Skip_Empty_Lines On\n DB /var/log/flb_kube.db\n DB.Sync Normal\n DB.locking true\n # Buffer_Max_Size 1M\n {{- with .Values.config.input }}\n Mem_Buf_Limit {{ .memBufLimit }}\n Refresh_Interval {{ .refreshInterval }}\n {{- end }}\n\n[INPUT]\n Name opentelemetry\n Tag otel\n"` | | | fluent-bit.config.logLevel | string | `"info"` | | | fluent-bit.config.output.host | string | `"telemetry-fluentd"` | | -| fluent-bit.config.output.sharedKey | string | `"secretref+k8s://v1/Secret/kube-system/kubezero-secrets/telemetry.fluentd.source.sharedKey"` | | +| fluent-bit.config.output.sharedKey | string | `"secretref+k8s://v1/Secret/kubezero/kubezero-secrets/telemetry.fluentd.source.sharedKey"` | | | fluent-bit.config.output.tls | bool | `false` | | | fluent-bit.config.output_otel.host | string | `"telemetry-opentelemetry-collector"` | | | fluent-bit.config.outputs | string | `"[OUTPUT]\n Match kube.*\n Name forward\n Host {{ .Values.config.output.host }}\n Port 24224\n Shared_Key {{ .Values.config.output.sharedKey }}\n tls {{ ternary \"on\" \"off\" .Values.config.output.tls }}\n Send_options true\n Require_ack_response true\n\n[OUTPUT]\n Name opentelemetry\n Match otel\n Host {{ .Values.config.output_otel.host }}\n Port 4318\n #Metrics_uri /v1/metrics\n Traces_uri /v1/traces\n #Logs_uri /v1/logs\n"` | | @@ -133,7 +135,7 @@ Kubernetes: `>= 1.26.0` | fluentd.service.ports[1].containerPort | int | `9880` | | | fluentd.service.ports[1].name | string | `"http-fluentd"` | | | fluentd.service.ports[1].protocol | string | `"TCP"` | | -| fluentd.source.sharedKey | string | `"secretref+k8s://v1/Secret/kube-system/kubezero-secrets/telemetry.fluentd.source.sharedKey"` | | +| fluentd.source.sharedKey | string | `"secretref+k8s://v1/Secret/kubezero/kubezero-secrets/telemetry.fluentd.source.sharedKey"` | | | fluentd.volumeMounts[0].mountPath | string | `"/run/pki"` | | | fluentd.volumeMounts[0].name | string | `"trust-store"` | | | fluentd.volumeMounts[0].readOnly | bool | `true` | | @@ -164,6 +166,7 @@ Kubernetes: `>= 1.26.0` | jaeger.storage.elasticsearch.scheme | string | `"https"` | | | jaeger.storage.elasticsearch.user | string | `"admin"` | | | jaeger.storage.type | string | `"elasticsearch"` | | +| metrics.enabled | bool | `false` | | | opensearch.dashboard.enabled | bool | `false` | | | opensearch.dashboard.istio.enabled | bool | `false` | | | opensearch.dashboard.istio.gateway | string | `"istio-ingress/private-ingressgateway"` | | @@ -179,9 +182,6 @@ Kubernetes: `>= 1.26.0` | opentelemetry-collector.config.receivers.otlp.protocols.grpc.endpoint | string | `"${env:MY_POD_IP}:4317"` | | | opentelemetry-collector.config.receivers.otlp.protocols.http.endpoint | string | `"${env:MY_POD_IP}:4318"` | | | opentelemetry-collector.config.service.extensions[0] | string | `"health_check"` | | -| opentelemetry-collector.config.service.extensions[1] | string | `"memory_ballast"` | | -| opentelemetry-collector.config.service.pipelines.logs | string | `nil` | | -| opentelemetry-collector.config.service.pipelines.metrics | string | `nil` | | | opentelemetry-collector.config.service.pipelines.traces.exporters[0] | string | `"otlp/jaeger"` | | | opentelemetry-collector.config.service.pipelines.traces.exporters[1] | string | `"otlp/data-prepper"` | | | opentelemetry-collector.config.service.pipelines.traces.processors[0] | string | `"memory_limiter"` | | diff --git a/charts/kubezero-telemetry/charts/fluentd/templates/tests/test-connection.yaml b/charts/kubezero-telemetry/charts/fluentd/templates/tests/test-connection.yaml index b6b3befa..2b07ec4f 100644 --- a/charts/kubezero-telemetry/charts/fluentd/templates/tests/test-connection.yaml +++ b/charts/kubezero-telemetry/charts/fluentd/templates/tests/test-connection.yaml @@ -4,7 +4,6 @@ fluentd is deployed with the default values If the fluentd config is overriden and the metrics server removed this will fail. */}} -{{- if .Values.testFramework.enabled }} {{ if empty .Values.service.ports }} apiVersion: v1 kind: Pod @@ -27,5 +26,4 @@ spec: while :; do nc -vz {{ include "fluentd.fullname" . }}:24231 && break; sleep 1; done wget '{{ include "fluentd.fullname" . }}:24231/metrics' restartPolicy: Never -{{ end }} -{{- end }} +{{ end }} \ No newline at end of file diff --git a/charts/kubezero-telemetry/charts/fluentd/values.yaml b/charts/kubezero-telemetry/charts/fluentd/values.yaml index c41c381f..ac05f8b2 100644 --- a/charts/kubezero-telemetry/charts/fluentd/values.yaml +++ b/charts/kubezero-telemetry/charts/fluentd/values.yaml @@ -13,9 +13,6 @@ image: pullPolicy: "IfNotPresent" tag: "" -testFramework: - enabled: false - ## Optional array of imagePullSecrets containing private registry credentials ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] diff --git a/charts/kubezero-telemetry/fluentd.patch b/charts/kubezero-telemetry/fluentd.patch index f6e11db5..f880f5f8 100644 --- a/charts/kubezero-telemetry/fluentd.patch +++ b/charts/kubezero-telemetry/fluentd.patch @@ -9,36 +9,3 @@ diff -rtuN charts/fluentd.orig/templates/fluentd-configurations-cm.yaml charts/f + {{- (tpl $value $) | nindent 4 }} {{- end }} {{- end }} - -diff -rtuN charts/fluentd.orig/templates/tests/test-connection.yaml charts/fluentd/templates/tests/test-connection.yaml ---- charts/fluentd.orig/templates/tests/test-connection.yaml 2024-04-08 11:00:03.030515998 +0000 -+++ charts/fluentd/templates/tests/test-connection.yaml 2024-04-08 11:03:16.254774985 +0000 -@@ -4,6 +4,7 @@ - If the fluentd config is overriden and the metrics server removed - this will fail. - */}} -+{{- if .Values.testFramework.enabled }} - {{ if empty .Values.service.ports }} - apiVersion: v1 - kind: Pod -@@ -26,4 +27,5 @@ - while :; do nc -vz {{ include "fluentd.fullname" . }}:24231 && break; sleep 1; done - wget '{{ include "fluentd.fullname" . }}:24231/metrics' - restartPolicy: Never --{{ end }} -\ No newline at end of file -+{{ end }} -+{{- end }} -diff -rtuN charts/fluentd.orig/values.yaml charts/fluentd/values.yaml ---- charts/fluentd.orig/values.yaml 2024-04-08 11:00:03.030515998 +0000 -+++ charts/fluentd/values.yaml 2024-04-08 11:00:03.040516045 +0000 -@@ -13,6 +13,9 @@ - pullPolicy: "IfNotPresent" - tag: "" - -+testFramework: -+ enabled: false -+ - ## Optional array of imagePullSecrets containing private registry credentials - ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ - imagePullSecrets: [] diff --git a/charts/kubezero/Chart.yaml b/charts/kubezero/Chart.yaml index acd84034..904bf743 100644 --- a/charts/kubezero/Chart.yaml +++ b/charts/kubezero/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero description: KubeZero - Root App of Apps chart type: application -version: 1.31.4-alpha +version: 1.31.5-alpha home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: diff --git a/charts/kubezero/templates/_app.tpl b/charts/kubezero/templates/_app.tpl index ffa278b9..1d77a43e 100644 --- a/charts/kubezero/templates/_app.tpl +++ b/charts/kubezero/templates/_app.tpl @@ -25,6 +25,7 @@ spec: repoURL: {{ .Values.kubezero.repoURL }} targetRevision: {{ default .Values.kubezero.targetRevision ( index .Values $name "targetRevision" ) | quote }} helm: + skipTests: true valuesObject: {{- include (print $name "-values") $ | nindent 8 }} diff --git a/charts/kubezero/templates/addons.yaml b/charts/kubezero/templates/addons.yaml index 8573b86d..6f1d3e6e 100644 --- a/charts/kubezero/templates/addons.yaml +++ b/charts/kubezero/templates/addons.yaml @@ -135,8 +135,8 @@ py-kube-downscaler: {{- toYaml . | nindent 2 }} {{- end }} -# AWS only {{- if eq .Values.global.platform "aws" }} +# AWS only aws-node-termination-handler: enabled: {{ default "true" (index .Values "addons" "aws-node-termination-handler" "enabled") }} @@ -146,6 +146,8 @@ aws-node-termination-handler: {{- with .Values.metrics }} enablePrometheusServer: {{ .enabled }} + serviceMonitor: + create: true {{- end }} queueURL: "https://sqs.{{ .Values.global.aws.region }}.amazonaws.com/{{ .Values.global.aws.accountId }}/{{ .Values.global.clusterName }}_Nth" @@ -187,8 +189,8 @@ aws-eks-asg-rolling-update-handler: - name: AWS_STS_REGIONAL_ENDPOINTS value: "regional" -{{- with .Values.addons.awsNeuron }} -awsNeuron: +{{- with (index .Values "addons" "neuron-helm-chart") }} +neuron-helm-chart: {{- toYaml . | nindent 2 }} {{- end }} diff --git a/charts/kubezero/templates/istio-ingress.yaml b/charts/kubezero/templates/istio-ingress.yaml index 1c53bdb2..a4dfa6db 100644 --- a/charts/kubezero/templates/istio-ingress.yaml +++ b/charts/kubezero/templates/istio-ingress.yaml @@ -1,5 +1,9 @@ {{- define "istio-ingress-values" }} +{{- if eq .Values.global.platform "aws" }} +{{- end }} +{{- $ingressLabel := "node.kubernetes.io/ingress.public" }} + gateway: name: istio-ingressgateway diff --git a/charts/kubezero/values.yaml b/charts/kubezero/values.yaml index d1597e50..7e514016 100644 --- a/charts/kubezero/values.yaml +++ b/charts/kubezero/values.yaml @@ -17,7 +17,7 @@ global: addons: enabled: true - targetRevision: 0.8.11 + targetRevision: 0.8.13 external-dns: enabled: false forseti: @@ -36,7 +36,7 @@ addons: network: enabled: true retain: true - targetRevision: 0.5.6 + targetRevision: 0.5.7 cilium: cluster: {}