Fix storage tests, add taint handling to CA, first docs draft

2023-04-26 10:26:30 +00:00 · 2023-04-26 10:26:30 +00:00 · 2cd33786ea
commit 2cd33786ea
parent 823aa70e35
6 changed files with 66 additions and 4 deletions
--- a/3
+++ b/3
@ -3,7 +3,8 @@ IMAGE := kubezero-admin
 REGION := us-east-1

 # Also tag as Kubernetes major version
-EXTRA_TAGS = $(shell echo $(TAG) | awk -F '.' '{ print $$1 "." $$2 }')
+MY_TAG = $(shell git describe --tags --match v*.*.* 2>/dev/null || git rev-parse --short HEAD 2>/dev/null)
+EXTRA_TAGS = $(shell echo $(MY_TAG) | awk -F '.' '{ print $$1 "." $$2 }')

 include .ci/podman.mk

--- a/charts/kubezero-addons/values.yaml
+++ b/charts/kubezero-addons/values.yaml
@ -209,6 +209,8 @@ cluster-autoscaler:
  extraArgs:
    scan-interval: 30s
    skip-nodes-with-local-storage: false
+    balance-similar-node-groups: true
+    ignore-taint: "node.cilium.io/agent-not-ready"

  #securityContext:
  #  runAsNonRoot: true
--- a/charts/kubezero-network/values.yaml
+++ b/charts/kubezero-network/values.yaml
@ -30,7 +30,7 @@ cilium:
  containerRuntime:
    integration: crio

-  # Until we figured out AppArmore on Alpine and Gentoo
+  # remove with 1.26
  securityContext:
    privileged: true

--- a/charts/kubezero-storage/Chart.yaml
+++ b/charts/kubezero-storage/Chart.yaml
@ -2,7 +2,7 @@ apiVersion: v2
 name: kubezero-storage
 description: KubeZero umbrella chart for all things storage incl. AWS EBS/EFS, openEBS-lvm, gemini
 type: application
-version: 0.8.1
+version: 0.8.2
 home: https://kubezero.com
 icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
 keywords:
--- a/charts/kubezero/values.yaml
+++ b/charts/kubezero/values.yaml
@ -40,7 +40,7 @@ cert-manager:

 storage:
  enabled: false
-  targetRevision: 0.8.1
+  targetRevision: 0.8.2
  aws-ebs-csi-driver:
    enabled: false
  aws-efs-csi-driver:
--- a/docs/v1.25.md
+++ b/docs/v1.25.md
@ -0,0 +1,59 @@
+# KubeZero 1.25
+
+## TODO
+
+## What's new - Major themes
+- AppArmor is now available and enabled on all controller and worker nodes
+- all KubeZero AMIs and containers updated to latest Alpine 3.17
+- improved worker initialization, ensure cilium is running before any other pod
+
+## Version upgrades
+- cilium 1.13
+- nvidia-device-plugin
+- aws-neuron-driver
+- aws-node-termination-handler
+- aws-ebs-csi-driver
+- aws-efs-csi-driver
+- istio 1.17
+- argocd 2.6.7
+- fluent-bit 2.0.10
+- fluentd 2.6
+- all things prometheus
+
+### FeatureGates
+- CustomCPUCFSQuotaPeriod
+- NodeOutOfServiceVolumeDetach
+
+# Upgrade
+`(No, really, you MUST read this before you upgrade)`
+
+Ensure your Kube context points to the correct cluster !
+
+1. Review CFN config for controller and workers, no mandatory changes during this release though
+
+2. Upgrade CFN stacks for the control plane *ONLY* !
+  Updating the workers CFN stacks would trigger rolling updates right away !
+
+3. Trigger cluster upgrade:  
+  `./admin/upgrade_cluster.sh <path to the argocd app kubezero yaml for THIS cluster>`
+
+4. Review the kubezero-config and if all looks good commit the ArgoApp resouce for Kubezero via regular git  
+  git add / commit / push `<cluster/env/kubezero/application.yaml>`  
+  *DO NOT yet re-enable ArgoCD before all pre v1.24 workers have been replaced !!!*
+
+5. Reboot controller(s) one by one  
+Wait each time for controller to join and all pods running.
+Might take a while ...
+
+6. Upgrade CFN stacks for the workers.  
+  This in turn will trigger automated worker updates by evicting pods and launching new workers in a rolling fashion.
+  Grab a coffee and keep an eye on the cluster to be safe ...
+  Depending on your cluster size it might take a while to roll over all workers!
+
+7. Re-enable ArgoCD by hitting <return> on the still waiting upgrade script 
+
+8. Quickly head over to ArgoCD and sync the KubeZero main module as soon as possible to reduce potential back and forth in case ArgoCD has legacy state
+
+
+## Known issues
+So far so good.