feat: add delete_module to admin, various cleanups
This commit is contained in:
parent
af5dc03e36
commit
e5f77cc466
@ -323,6 +323,21 @@ apply_module() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
delete_module() {
|
||||||
|
MODULES=$1
|
||||||
|
|
||||||
|
get_kubezero_values
|
||||||
|
|
||||||
|
# Always use embedded kubezero chart
|
||||||
|
helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
|
||||||
|
|
||||||
|
for t in $MODULES; do
|
||||||
|
_helm delete $t
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Deleted KubeZero modules: $MODULES. Potential CRDs must be removed manually."
|
||||||
|
}
|
||||||
|
|
||||||
# backup etcd + /etc/kubernetes/pki
|
# backup etcd + /etc/kubernetes/pki
|
||||||
backup() {
|
backup() {
|
||||||
# Display all ENVs, careful this exposes the password !
|
# Display all ENVs, careful this exposes the password !
|
||||||
@ -377,7 +392,8 @@ for t in $@; do
|
|||||||
bootstrap) control_plane_node bootstrap;;
|
bootstrap) control_plane_node bootstrap;;
|
||||||
join) control_plane_node join;;
|
join) control_plane_node join;;
|
||||||
restore) control_plane_node restore;;
|
restore) control_plane_node restore;;
|
||||||
apply_*) apply_module ${t##apply_};;
|
apply_*) apply_module "${t##apply_}";;
|
||||||
|
delete_*) delete_module "${t##delete_}";;
|
||||||
backup) backup;;
|
backup) backup;;
|
||||||
debug_shell) debug_shell;;
|
debug_shell) debug_shell;;
|
||||||
*) echo "Unknown command: '$t'";;
|
*) echo "Unknown command: '$t'";;
|
||||||
|
@ -108,6 +108,7 @@ function _crds() {
|
|||||||
|
|
||||||
# Only apply if there are actually any crds
|
# Only apply if there are actually any crds
|
||||||
if [ -s $WORKDIR/crds.yaml ]; then
|
if [ -s $WORKDIR/crds.yaml ]; then
|
||||||
|
[ -n "$DEBUG" ] && cat $WORKDIR/crds.yaml
|
||||||
kubectl apply -f $WORKDIR/crds.yaml --server-side --force-conflicts
|
kubectl apply -f $WORKDIR/crds.yaml --server-side --force-conflicts
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
@ -115,7 +116,7 @@ function _crds() {
|
|||||||
|
|
||||||
# helm template | kubectl apply -f -
|
# helm template | kubectl apply -f -
|
||||||
# confine to one namespace if possible
|
# confine to one namespace if possible
|
||||||
function apply() {
|
function render() {
|
||||||
helm template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --skip-crds -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION $@ \
|
helm template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --skip-crds -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION $@ \
|
||||||
| python3 -c '
|
| python3 -c '
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
@ -128,8 +129,6 @@ for manifest in yaml.safe_load_all(sys.stdin):
|
|||||||
manifest["metadata"]["namespace"] = sys.argv[1]
|
manifest["metadata"]["namespace"] = sys.argv[1]
|
||||||
print("---")
|
print("---")
|
||||||
print(yaml.dump(manifest))' $namespace > $WORKDIR/helm.yaml
|
print(yaml.dump(manifest))' $namespace > $WORKDIR/helm.yaml
|
||||||
|
|
||||||
kubectl $action -f $WORKDIR/helm.yaml --server-side --force-conflicts && rc=$? || rc=$?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -164,13 +163,15 @@ function _helm() {
|
|||||||
# Optional pre hook
|
# Optional pre hook
|
||||||
declare -F ${module}-pre && ${module}-pre
|
declare -F ${module}-pre && ${module}-pre
|
||||||
|
|
||||||
apply
|
render
|
||||||
|
kubectl $action -f $WORKDIR/helm.yaml --server-side --force-conflicts && rc=$? || rc=$?
|
||||||
|
|
||||||
# Optional post hook
|
# Optional post hook
|
||||||
declare -F ${module}-post && ${module}-post
|
declare -F ${module}-post && ${module}-post
|
||||||
|
|
||||||
elif [ $action == "delete" ]; then
|
elif [ $action == "delete" ]; then
|
||||||
apply
|
render
|
||||||
|
kubectl $action -f $WORKDIR/helm.yaml && rc=$? || rc=$?
|
||||||
|
|
||||||
# Delete dedicated namespace if not kube-system
|
# Delete dedicated namespace if not kube-system
|
||||||
[ -n "$DELETE_NS" ] && delete_ns $namespace
|
[ -n "$DELETE_NS" ] && delete_ns $namespace
|
||||||
|
@ -124,6 +124,10 @@ spec:
|
|||||||
node-role.kubernetes.io/control-plane: ""
|
node-role.kubernetes.io/control-plane: ""
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: node-role.kubernetes.io/master
|
- key: node-role.kubernetes.io/master
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- key: node-role.kubernetes.io/control-plane
|
||||||
|
operator: Exists
|
||||||
effect: NoSchedule
|
effect: NoSchedule
|
||||||
restartPolicy: Never
|
restartPolicy: Never
|
||||||
EOF
|
EOF
|
||||||
|
@ -26,16 +26,22 @@ Kubernetes: `>= 1.24.0`
|
|||||||
|
|
||||||
| Key | Type | Default | Description |
|
| Key | Type | Default | Description |
|
||||||
|-----|------|---------|-------------|
|
|-----|------|---------|-------------|
|
||||||
| cilium.bpf.hostLegacyRouting | bool | `true` | |
|
|
||||||
| cilium.cgroup.autoMount.enabled | bool | `false` | |
|
| cilium.cgroup.autoMount.enabled | bool | `false` | |
|
||||||
| cilium.cgroup.hostRoot | string | `"/sys/fs/cgroup"` | |
|
| cilium.cgroup.hostRoot | string | `"/sys/fs/cgroup"` | |
|
||||||
| cilium.cluster.id | int | `240` | |
|
| cilium.cluster.id | int | `240` | |
|
||||||
| cilium.cluster.name | string | `"default"` | |
|
| cilium.cluster.name | string | `"default"` | |
|
||||||
| cilium.cni.binPath | string | `"/usr/libexec/cni"` | |
|
| cilium.cni.binPath | string | `"/usr/libexec/cni"` | |
|
||||||
|
| cilium.cni.exclusive | bool | `false` | |
|
||||||
| cilium.cni.logFile | string | `"/var/log/cilium-cni.log"` | |
|
| cilium.cni.logFile | string | `"/var/log/cilium-cni.log"` | |
|
||||||
| cilium.containerRuntime.integration | string | `"crio"` | |
|
| cilium.containerRuntime.integration | string | `"crio"` | |
|
||||||
| cilium.enabled | bool | `false` | |
|
| cilium.enabled | bool | `false` | |
|
||||||
| cilium.hubble.enabled | bool | `false` | |
|
| cilium.hubble.enabled | bool | `false` | |
|
||||||
|
| cilium.hubble.relay.enabled | bool | `false` | |
|
||||||
|
| cilium.hubble.tls.auto.certManagerIssuerRef.group | string | `"cert-manager.io"` | |
|
||||||
|
| cilium.hubble.tls.auto.certManagerIssuerRef.kind | string | `"ClusterIssuer"` | |
|
||||||
|
| cilium.hubble.tls.auto.certManagerIssuerRef.name | string | `"kubezero-local-ca-issuer"` | |
|
||||||
|
| cilium.hubble.tls.auto.method | string | `"cert-manager"` | |
|
||||||
|
| cilium.hubble.ui.enabled | bool | `false` | |
|
||||||
| cilium.ipam.operator.clusterPoolIPv4PodCIDRList[0] | string | `"10.240.0.0/16"` | |
|
| cilium.ipam.operator.clusterPoolIPv4PodCIDRList[0] | string | `"10.240.0.0/16"` | |
|
||||||
| cilium.l7Proxy | bool | `false` | |
|
| cilium.l7Proxy | bool | `false` | |
|
||||||
| cilium.operator.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
|
| cilium.operator.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
|
||||||
@ -58,4 +64,5 @@ Kubernetes: `>= 1.24.0`
|
|||||||
| multus.clusterNetwork | string | `"cilium"` | |
|
| multus.clusterNetwork | string | `"cilium"` | |
|
||||||
| multus.defaultNetworks | list | `[]` | |
|
| multus.defaultNetworks | list | `[]` | |
|
||||||
| multus.enabled | bool | `false` | |
|
| multus.enabled | bool | `false` | |
|
||||||
|
| multus.readinessindicatorfile | string | `"/etc/cni/net.d/05-cilium.conf"` | |
|
||||||
| multus.tag | string | `"v3.9.2"` | |
|
| multus.tag | string | `"v3.9.2"` | |
|
||||||
|
@ -40,8 +40,8 @@ cilium:
|
|||||||
#-- Ensure this is false if multus is enabled
|
#-- Ensure this is false if multus is enabled
|
||||||
exclusive: false
|
exclusive: false
|
||||||
|
|
||||||
bpf:
|
#bpf:
|
||||||
hostLegacyRouting: true
|
# hostLegacyRouting: true
|
||||||
# tproxy: false
|
# tproxy: false
|
||||||
|
|
||||||
cluster:
|
cluster:
|
||||||
@ -57,10 +57,10 @@ cilium:
|
|||||||
- 10.240.0.0/16
|
- 10.240.0.0/16
|
||||||
|
|
||||||
# Keep it simple for now
|
# Keep it simple for now
|
||||||
# nodePort:
|
|
||||||
# enabled: true
|
|
||||||
l7Proxy: false
|
l7Proxy: false
|
||||||
|
|
||||||
|
#rollOutCiliumPods: true
|
||||||
|
|
||||||
cgroup:
|
cgroup:
|
||||||
autoMount:
|
autoMount:
|
||||||
enabled: false
|
enabled: false
|
||||||
@ -84,3 +84,14 @@ cilium:
|
|||||||
|
|
||||||
hubble:
|
hubble:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
relay:
|
||||||
|
enabled: false
|
||||||
|
ui:
|
||||||
|
enabled: false
|
||||||
|
tls:
|
||||||
|
auto:
|
||||||
|
method: cert-manager
|
||||||
|
certManagerIssuerRef:
|
||||||
|
group: cert-manager.io
|
||||||
|
kind: ClusterIssuer
|
||||||
|
name: kubezero-local-ca-issuer
|
||||||
|
@ -72,7 +72,7 @@ Kubernetes: `>= 1.24.0`
|
|||||||
| storage.aws-ebs-csi-driver.enabled | bool | `false` | |
|
| storage.aws-ebs-csi-driver.enabled | bool | `false` | |
|
||||||
| storage.aws-efs-csi-driver.enabled | bool | `false` | |
|
| storage.aws-efs-csi-driver.enabled | bool | `false` | |
|
||||||
| storage.enabled | bool | `false` | |
|
| storage.enabled | bool | `false` | |
|
||||||
| storage.targetRevision | string | `"0.7.3"` | |
|
| storage.targetRevision | string | `"0.7.4"` | |
|
||||||
|
|
||||||
----------------------------------------------
|
----------------------------------------------
|
||||||
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
|
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
|
||||||
|
@ -36,7 +36,7 @@ cert-manager:
|
|||||||
|
|
||||||
storage:
|
storage:
|
||||||
enabled: false
|
enabled: false
|
||||||
targetRevision: 0.7.3
|
targetRevision: 0.7.4
|
||||||
aws-ebs-csi-driver:
|
aws-ebs-csi-driver:
|
||||||
enabled: false
|
enabled: false
|
||||||
aws-efs-csi-driver:
|
aws-efs-csi-driver:
|
||||||
|
@ -2,47 +2,41 @@
|
|||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
### FeatureGates
|
|
||||||
- PodAndContainerStatsFromCRI
|
|
||||||
- DelegateFSGroupToCSIDriver
|
|
||||||
|
|
||||||
## What's new - Major themes
|
## What's new - Major themes
|
||||||
|
- Cilium is now the default CNI, calico got removed
|
||||||
- Cilium added as second CNI to prepare full migration to Cilium with 1.24 upgrade
|
- cluster-autoscaler is enabled by default on AWS
|
||||||
- support for Nvidia g5 instances incl. pre-installed kernel drivers, cudo toolchain and CRI intergration
|
|
||||||
- updated inf1 neuron drivers
|
|
||||||
- ExtendedResourceToleration AdmissionController and auto-taints allowing Neuron and Nvidia pods ONLY to be scheduled on dedicated workers
|
|
||||||
- full Cluster-Autoscaler integration
|
|
||||||
|
|
||||||
## Version upgrades
|
## Version upgrades
|
||||||
- Istio to 1.14.4
|
- cilium
|
||||||
- Logging: ECK operator to 2.4, fluent-bit 1.9.8
|
- metallb
|
||||||
- Metrics: Prometheus and all Grafana charts to latest to match V1.23
|
- nvidia-device-plugin
|
||||||
- ArgoCD to V2.4 ( access to pod via shell disabled by default )
|
- aws-node-termination-handler
|
||||||
- AWS EBS/EFS CSI drivers to latest versions
|
- aws-ebs-csi-driver
|
||||||
- cert-manager to V1.9.1
|
- aws-efs-csi-driver
|
||||||
|
|
||||||
|
### FeatureGates
|
||||||
|
- PodAndContainerStatsFromCRI
|
||||||
|
- DelegateFSGroupToCSIDriver
|
||||||
|
|
||||||
# Upgrade
|
# Upgrade
|
||||||
`(No, really, you MUST read this before you upgrade)`
|
`(No, really, you MUST read this before you upgrade)`
|
||||||
|
|
||||||
- Ensure your Kube context points to the correct cluster !
|
- Ensure your Kube context points to the correct cluster !
|
||||||
|
|
||||||
1. Enable `containerProxy` for NAT instances and upgrade NAT instance using the new V2 Pulumi stacks
|
1. Review CFN config for controller and workers ( enable containerProxy, remove legacy version settings etc )
|
||||||
|
|
||||||
2. Review CFN config for controller and workers ( enable containerProxy, remove legacy version settings etc )
|
2. Upgrade CFN stacks for the control plane and all worker groups
|
||||||
|
|
||||||
3. Upgrade CFN stacks for the control plane and all worker groups
|
3. Trigger fully-automated cluster upgrade:
|
||||||
|
|
||||||
4. Trigger fully-automated cluster upgrade:
|
|
||||||
`./admin/upgrade_cluster.sh <path to the argocd app kubezero yaml for THIS cluster>`
|
`./admin/upgrade_cluster.sh <path to the argocd app kubezero yaml for THIS cluster>`
|
||||||
|
|
||||||
5. Reboot controller(s) one by one
|
4. Reboot controller(s) one by one
|
||||||
Wait each time for controller to join and all pods running.
|
Wait each time for controller to join and all pods running.
|
||||||
Might take a while ...
|
Might take a while ...
|
||||||
|
|
||||||
6. Launch new set of workers eg. by doubling `desired` for each worker ASG
|
5. Launch new set of workers eg. by doubling `desired` for each worker ASG
|
||||||
once new workers are ready, cordon and drain all old workers
|
once new workers are ready, cordon and drain all old workers
|
||||||
The cluster-autoscaler will remove the old workers automatically after about 10min !
|
The cluster-autoscaler will remove the old workers automatically after about 10min !
|
||||||
|
|
||||||
7. If all looks good, commit the ArgoApp resouce for Kubezero, before re-enabling ArgoCD itself.
|
6. If all looks good, commit the ArgoApp resouce for Kubezero, before re-enabling ArgoCD itself.
|
||||||
git add / commit / push `<cluster/env/kubezero/application.yaml>`
|
git add / commit / push `<cluster/env/kubezero/application.yaml>`
|
||||||
|
Loading…
Reference in New Issue
Block a user