feat: add delete_module to admin, various cleanups
This commit is contained in:
parent
af5dc03e36
commit
e5f77cc466
@ -323,6 +323,21 @@ apply_module() {
|
||||
}
|
||||
|
||||
|
||||
delete_module() {
|
||||
MODULES=$1
|
||||
|
||||
get_kubezero_values
|
||||
|
||||
# Always use embedded kubezero chart
|
||||
helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
|
||||
|
||||
for t in $MODULES; do
|
||||
_helm delete $t
|
||||
done
|
||||
|
||||
echo "Deleted KubeZero modules: $MODULES. Potential CRDs must be removed manually."
|
||||
}
|
||||
|
||||
# backup etcd + /etc/kubernetes/pki
|
||||
backup() {
|
||||
# Display all ENVs, careful this exposes the password !
|
||||
@ -377,7 +392,8 @@ for t in $@; do
|
||||
bootstrap) control_plane_node bootstrap;;
|
||||
join) control_plane_node join;;
|
||||
restore) control_plane_node restore;;
|
||||
apply_*) apply_module ${t##apply_};;
|
||||
apply_*) apply_module "${t##apply_}";;
|
||||
delete_*) delete_module "${t##delete_}";;
|
||||
backup) backup;;
|
||||
debug_shell) debug_shell;;
|
||||
*) echo "Unknown command: '$t'";;
|
||||
|
@ -108,6 +108,7 @@ function _crds() {
|
||||
|
||||
# Only apply if there are actually any crds
|
||||
if [ -s $WORKDIR/crds.yaml ]; then
|
||||
[ -n "$DEBUG" ] && cat $WORKDIR/crds.yaml
|
||||
kubectl apply -f $WORKDIR/crds.yaml --server-side --force-conflicts
|
||||
fi
|
||||
}
|
||||
@ -115,7 +116,7 @@ function _crds() {
|
||||
|
||||
# helm template | kubectl apply -f -
|
||||
# confine to one namespace if possible
|
||||
function apply() {
|
||||
function render() {
|
||||
helm template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --skip-crds -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION $@ \
|
||||
| python3 -c '
|
||||
#!/usr/bin/python3
|
||||
@ -128,8 +129,6 @@ for manifest in yaml.safe_load_all(sys.stdin):
|
||||
manifest["metadata"]["namespace"] = sys.argv[1]
|
||||
print("---")
|
||||
print(yaml.dump(manifest))' $namespace > $WORKDIR/helm.yaml
|
||||
|
||||
kubectl $action -f $WORKDIR/helm.yaml --server-side --force-conflicts && rc=$? || rc=$?
|
||||
}
|
||||
|
||||
|
||||
@ -164,13 +163,15 @@ function _helm() {
|
||||
# Optional pre hook
|
||||
declare -F ${module}-pre && ${module}-pre
|
||||
|
||||
apply
|
||||
render
|
||||
kubectl $action -f $WORKDIR/helm.yaml --server-side --force-conflicts && rc=$? || rc=$?
|
||||
|
||||
# Optional post hook
|
||||
declare -F ${module}-post && ${module}-post
|
||||
|
||||
elif [ $action == "delete" ]; then
|
||||
apply
|
||||
render
|
||||
kubectl $action -f $WORKDIR/helm.yaml && rc=$? || rc=$?
|
||||
|
||||
# Delete dedicated namespace if not kube-system
|
||||
[ -n "$DELETE_NS" ] && delete_ns $namespace
|
||||
|
@ -124,6 +124,10 @@ spec:
|
||||
node-role.kubernetes.io/control-plane: ""
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
restartPolicy: Never
|
||||
EOF
|
||||
|
@ -26,16 +26,22 @@ Kubernetes: `>= 1.24.0`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| cilium.bpf.hostLegacyRouting | bool | `true` | |
|
||||
| cilium.cgroup.autoMount.enabled | bool | `false` | |
|
||||
| cilium.cgroup.hostRoot | string | `"/sys/fs/cgroup"` | |
|
||||
| cilium.cluster.id | int | `240` | |
|
||||
| cilium.cluster.name | string | `"default"` | |
|
||||
| cilium.cni.binPath | string | `"/usr/libexec/cni"` | |
|
||||
| cilium.cni.exclusive | bool | `false` | |
|
||||
| cilium.cni.logFile | string | `"/var/log/cilium-cni.log"` | |
|
||||
| cilium.containerRuntime.integration | string | `"crio"` | |
|
||||
| cilium.enabled | bool | `false` | |
|
||||
| cilium.hubble.enabled | bool | `false` | |
|
||||
| cilium.hubble.relay.enabled | bool | `false` | |
|
||||
| cilium.hubble.tls.auto.certManagerIssuerRef.group | string | `"cert-manager.io"` | |
|
||||
| cilium.hubble.tls.auto.certManagerIssuerRef.kind | string | `"ClusterIssuer"` | |
|
||||
| cilium.hubble.tls.auto.certManagerIssuerRef.name | string | `"kubezero-local-ca-issuer"` | |
|
||||
| cilium.hubble.tls.auto.method | string | `"cert-manager"` | |
|
||||
| cilium.hubble.ui.enabled | bool | `false` | |
|
||||
| cilium.ipam.operator.clusterPoolIPv4PodCIDRList[0] | string | `"10.240.0.0/16"` | |
|
||||
| cilium.l7Proxy | bool | `false` | |
|
||||
| cilium.operator.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
|
||||
@ -58,4 +64,5 @@ Kubernetes: `>= 1.24.0`
|
||||
| multus.clusterNetwork | string | `"cilium"` | |
|
||||
| multus.defaultNetworks | list | `[]` | |
|
||||
| multus.enabled | bool | `false` | |
|
||||
| multus.readinessindicatorfile | string | `"/etc/cni/net.d/05-cilium.conf"` | |
|
||||
| multus.tag | string | `"v3.9.2"` | |
|
||||
|
@ -40,8 +40,8 @@ cilium:
|
||||
#-- Ensure this is false if multus is enabled
|
||||
exclusive: false
|
||||
|
||||
bpf:
|
||||
hostLegacyRouting: true
|
||||
#bpf:
|
||||
# hostLegacyRouting: true
|
||||
# tproxy: false
|
||||
|
||||
cluster:
|
||||
@ -57,10 +57,10 @@ cilium:
|
||||
- 10.240.0.0/16
|
||||
|
||||
# Keep it simple for now
|
||||
# nodePort:
|
||||
# enabled: true
|
||||
l7Proxy: false
|
||||
|
||||
#rollOutCiliumPods: true
|
||||
|
||||
cgroup:
|
||||
autoMount:
|
||||
enabled: false
|
||||
@ -84,3 +84,14 @@ cilium:
|
||||
|
||||
hubble:
|
||||
enabled: false
|
||||
relay:
|
||||
enabled: false
|
||||
ui:
|
||||
enabled: false
|
||||
tls:
|
||||
auto:
|
||||
method: cert-manager
|
||||
certManagerIssuerRef:
|
||||
group: cert-manager.io
|
||||
kind: ClusterIssuer
|
||||
name: kubezero-local-ca-issuer
|
||||
|
@ -72,7 +72,7 @@ Kubernetes: `>= 1.24.0`
|
||||
| storage.aws-ebs-csi-driver.enabled | bool | `false` | |
|
||||
| storage.aws-efs-csi-driver.enabled | bool | `false` | |
|
||||
| storage.enabled | bool | `false` | |
|
||||
| storage.targetRevision | string | `"0.7.3"` | |
|
||||
| storage.targetRevision | string | `"0.7.4"` | |
|
||||
|
||||
----------------------------------------------
|
||||
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
|
||||
|
@ -36,7 +36,7 @@ cert-manager:
|
||||
|
||||
storage:
|
||||
enabled: false
|
||||
targetRevision: 0.7.3
|
||||
targetRevision: 0.7.4
|
||||
aws-ebs-csi-driver:
|
||||
enabled: false
|
||||
aws-efs-csi-driver:
|
||||
|
@ -2,47 +2,41 @@
|
||||
|
||||
## TODO
|
||||
|
||||
### FeatureGates
|
||||
- PodAndContainerStatsFromCRI
|
||||
- DelegateFSGroupToCSIDriver
|
||||
|
||||
## What's new - Major themes
|
||||
|
||||
- Cilium added as second CNI to prepare full migration to Cilium with 1.24 upgrade
|
||||
- support for Nvidia g5 instances incl. pre-installed kernel drivers, cudo toolchain and CRI intergration
|
||||
- updated inf1 neuron drivers
|
||||
- ExtendedResourceToleration AdmissionController and auto-taints allowing Neuron and Nvidia pods ONLY to be scheduled on dedicated workers
|
||||
- full Cluster-Autoscaler integration
|
||||
- Cilium is now the default CNI, calico got removed
|
||||
- cluster-autoscaler is enabled by default on AWS
|
||||
|
||||
## Version upgrades
|
||||
- Istio to 1.14.4
|
||||
- Logging: ECK operator to 2.4, fluent-bit 1.9.8
|
||||
- Metrics: Prometheus and all Grafana charts to latest to match V1.23
|
||||
- ArgoCD to V2.4 ( access to pod via shell disabled by default )
|
||||
- AWS EBS/EFS CSI drivers to latest versions
|
||||
- cert-manager to V1.9.1
|
||||
- cilium
|
||||
- metallb
|
||||
- nvidia-device-plugin
|
||||
- aws-node-termination-handler
|
||||
- aws-ebs-csi-driver
|
||||
- aws-efs-csi-driver
|
||||
|
||||
### FeatureGates
|
||||
- PodAndContainerStatsFromCRI
|
||||
- DelegateFSGroupToCSIDriver
|
||||
|
||||
# Upgrade
|
||||
`(No, really, you MUST read this before you upgrade)`
|
||||
|
||||
- Ensure your Kube context points to the correct cluster !
|
||||
|
||||
1. Enable `containerProxy` for NAT instances and upgrade NAT instance using the new V2 Pulumi stacks
|
||||
1. Review CFN config for controller and workers ( enable containerProxy, remove legacy version settings etc )
|
||||
|
||||
2. Review CFN config for controller and workers ( enable containerProxy, remove legacy version settings etc )
|
||||
2. Upgrade CFN stacks for the control plane and all worker groups
|
||||
|
||||
3. Upgrade CFN stacks for the control plane and all worker groups
|
||||
|
||||
4. Trigger fully-automated cluster upgrade:
|
||||
3. Trigger fully-automated cluster upgrade:
|
||||
`./admin/upgrade_cluster.sh <path to the argocd app kubezero yaml for THIS cluster>`
|
||||
|
||||
5. Reboot controller(s) one by one
|
||||
4. Reboot controller(s) one by one
|
||||
Wait each time for controller to join and all pods running.
|
||||
Might take a while ...
|
||||
|
||||
6. Launch new set of workers eg. by doubling `desired` for each worker ASG
|
||||
5. Launch new set of workers eg. by doubling `desired` for each worker ASG
|
||||
once new workers are ready, cordon and drain all old workers
|
||||
The cluster-autoscaler will remove the old workers automatically after about 10min !
|
||||
|
||||
7. If all looks good, commit the ArgoApp resouce for Kubezero, before re-enabling ArgoCD itself.
|
||||
6. If all looks good, commit the ArgoApp resouce for Kubezero, before re-enabling ArgoCD itself.
|
||||
git add / commit / push `<cluster/env/kubezero/application.yaml>`
|
||||
|
Loading…
Reference in New Issue
Block a user