diff --git a/charts/kubezero-addons/Chart.yaml b/charts/kubezero-addons/Chart.yaml index d89e253..9cabf83 100644 --- a/charts/kubezero-addons/Chart.yaml +++ b/charts/kubezero-addons/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-addons description: KubeZero umbrella chart for various optional cluster addons type: application -version: 0.2.0 +version: 0.2.1 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: @@ -16,6 +16,6 @@ maintainers: dependencies: - name: aws-node-termination-handler version: 0.16.0 - repository: https://aws.github.io/eks-charts + # repository: https://aws.github.io/eks-charts condition: aws-node-termination-handler.enabled kubeVersion: ">= 1.20.0" diff --git a/charts/kubezero-addons/README.md b/charts/kubezero-addons/README.md index d35dda1..60eb0eb 100644 --- a/charts/kubezero-addons/README.md +++ b/charts/kubezero-addons/README.md @@ -1,6 +1,6 @@ # kubezero-addons -![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.2.1](https://img.shields.io/badge/Version-0.2.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) KubeZero umbrella chart for various optional cluster addons @@ -10,15 +10,15 @@ KubeZero umbrella chart for various optional cluster addons | Name | Email | Url | | ---- | ------ | --- | -| Quarky9 | | | +| Stefan Reimer | stefan@zero-downtime.net | | ## Requirements -Kubernetes: `>= 1.18.0` +Kubernetes: `>= 1.20.0` | Repository | Name | Version | |------------|------|---------| -| https://metallb.github.io/metallb | metallb | 0.10.2 | +| | aws-node-termination-handler | 0.16.0 | # MetalLB @@ -41,11 +41,27 @@ Create secret with the IAM user credential for ecr-renew to use, using the crede | Key | Type | Default | Description | |-----|------|---------|-------------| +| aws-node-termination-handler.deleteLocalData | bool | `true` | | +| aws-node-termination-handler.enablePrometheusServer | bool | `false` | | +| aws-node-termination-handler.enableSqsTerminationDraining | bool | `true` | | +| aws-node-termination-handler.enabled | bool | `false` | | +| aws-node-termination-handler.extraEnv.AWS_ROLE_ARN | string | `""` | "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.awsNth" | +| aws-node-termination-handler.extraEnv.AWS_STS_REGIONAL_ENDPOINTS | string | `"regional"` | | +| aws-node-termination-handler.extraEnv.AWS_WEB_IDENTITY_TOKEN_FILE | string | `"/var/run/secrets/sts.amazonaws.com/serviceaccount/token"` | | +| aws-node-termination-handler.fullnameOverride | string | `"aws-node-termination-handler"` | | +| aws-node-termination-handler.jsonLogging | bool | `true` | | +| aws-node-termination-handler.metadataTries | int | `0` | | +| aws-node-termination-handler.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | | +| aws-node-termination-handler.podMonitor.create | bool | `false` | | +| aws-node-termination-handler.queueURL | string | `""` | https://sqs.${AWS::Region}.amazonaws.com/${AWS::AccountId}/${ClusterName}_Nth | +| aws-node-termination-handler.rbac.pspEnabled | bool | `false` | | +| aws-node-termination-handler.taintNode | bool | `true` | | +| aws-node-termination-handler.tolerations[0].effect | string | `"NoSchedule"` | | +| aws-node-termination-handler.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | | +| clusterBackup.enabled | bool | `false` | | +| clusterBackup.image.name | string | `"public.ecr.aws/zero-downtime/kubezero-admin"` | | +| clusterBackup.image.tag | string | `"v1.21.7"` | | +| clusterBackup.password | string | `""` | | +| clusterBackup.repository | string | `""` | | | fuseDevicePlugin.enabled | bool | `false` | | | k8sEcrLoginRenew.enabled | bool | `false` | | -| metallb.configInline | object | `{}` | | -| metallb.controller.nodeSelector."node-role.kubernetes.io/master" | string | `""` | | -| metallb.controller.tolerations[0].effect | string | `"NoSchedule"` | | -| metallb.controller.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | | -| metallb.enabled | bool | `false` | | -| metallb.psp.create | bool | `false` | | diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/.helmignore b/charts/kubezero-addons/charts/aws-node-termination-handler/.helmignore new file mode 100644 index 0000000..50af031 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml new file mode 100644 index 0000000..832f402 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +appVersion: 1.14.0 +description: A Helm chart for the AWS Node Termination Handler +home: https://github.com/aws/eks-charts +icon: https://raw.githubusercontent.com/aws/eks-charts/master/docs/logo/aws.png +keywords: +- eks +- ec2 +- node-termination +- spot +maintainers: +- email: bwagner5@users.noreply.github.com + name: Brandon Wagner + url: https://github.com/bwagner5 +- email: jillmon@users.noreply.github.com + name: Jillian Montalvo + url: https://github.com/jillmon +name: aws-node-termination-handler +sources: +- https://github.com/aws/eks-charts +version: 0.16.0 diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/README.md b/charts/kubezero-addons/charts/aws-node-termination-handler/README.md new file mode 100644 index 0000000..186109e --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/README.md @@ -0,0 +1,172 @@ +# AWS Node Termination Handler + +AWS Node Termination Handler Helm chart for Kubernetes. For more information on this project see the project repo at https://github.com/aws/aws-node-termination-handler. + +## Prerequisites + +* Kubernetes >= 1.14 + +## Installing the Chart + +Add the EKS repository to Helm: + +```sh +helm repo add eks https://aws.github.io/eks-charts +``` + +Install AWS Node Termination Handler: + +To install the chart with the release name aws-node-termination-handler and default configuration: + +```sh +helm upgrade --install aws-node-termination-handler \ + --namespace kube-system \ + eks/aws-node-termination-handler +``` + +To install into an EKS cluster where the Node Termination Handler is already installed, you can run: + +```sh +helm upgrade --install --recreate-pods --force \ + aws-node-termination-handler --namespace kube-system eks/aws-node-termination-handler +``` + +If you receive an error similar to `Error: release aws-node-termination-handler +failed: "aws-node-termination-handler" already exists`, simply rerun +the above command. + +The [configuration](#configuration) section lists the parameters that can be configured during installation. + +## Uninstalling the Chart + +To uninstall/delete the `aws-node-termination-handler` deployment: + +```sh +helm delete --purge aws-node-termination-handler +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. + +## Configuration + +The following tables lists the configurable parameters of the chart and their default values. + +### AWS Node Termination Handler Common Configuration + +The configuration in this table applies to both queue-processor mode and IMDS mode. + +Parameter | Description | Default +--- | --- | --- +`deleteLocalData` | Tells kubectl to continue even if there are pods using emptyDir (local data that will be deleted when the node is drained). | `true` +`gracePeriod` | (DEPRECATED: Renamed to podTerminationGracePeriod) The time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used, which defaults to 30 seconds if not specified. | `-1` +`podTerminationGracePeriod` | The time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used, which defaults to 30 seconds if not specified. | `-1` +`nodeTerminationGracePeriod` | Period of time in seconds given to each NODE to terminate gracefully. Node draining will be scheduled based on this value to optimize the amount of compute time, but still safely drain the node before an event. | `120` +`ignoreDaemonSets` | Causes kubectl to skip daemon set managed pods | `true` +`instanceMetadataURL` | The URL of EC2 instance metadata. This shouldn't need to be changed unless you are testing. | `http://169.254.169.254:80` +`webhookURL` | Posts event data to URL upon instance interruption action | `` +`webhookURLSecretName` | Pass Webhook URL as a secret. Secret Key: `webhookurl`, Value: `` | None +`webhookProxy` | Uses the specified HTTP(S) proxy for sending webhooks | `` +`webhookHeaders` | Replaces the default webhook headers. | `{"Content-type":"application/json"}` +`webhookTemplate` | Replaces the default webhook message template. | `{"text":"[NTH][Instance Interruption] EventID: {{ .EventID }} - Kind: {{ .Kind }} - Instance: {{ .InstanceID }} - Node: {{ .NodeName }} - Description: {{ .Description }} - Start Time: {{ .StartTime }}"}` +`webhookTemplateConfigMapName` | Pass Webhook template file as configmap | None +`webhookTemplateConfigMapKey` | Name of the template file stored in the configmap| None +`metadataTries` | The number of times to try requesting metadata. If you would like 2 retries, set metadata-tries to 3. | `3` +`cordonOnly` | If true, nodes will be cordoned but not drained when an interruption event occurs. | `false` +`taintNode` | If true, nodes will be tainted when an interruption event occurs. Currently used taint keys are `aws-node-termination-handler/scheduled-maintenance`, `aws-node-termination-handler/spot-itn`, `aws-node-termination-handler/asg-lifecycle-termination` and `aws-node-termination-handler/rebalance-recommendation`| `false` +`jsonLogging` | If true, use JSON-formatted logs instead of human readable logs. | `false` +`logLevel` | Sets the log level (INFO, DEBUG, or ERROR) | `INFO` +`enablePrometheusServer` | If true, start an http server exposing `/metrics` endpoint for prometheus. | `false` +`prometheusServerPort` | Replaces the default HTTP port for exposing prometheus metrics. | `9092` +`enableProbesServer` | If true, start an http server exposing `/healthz` endpoint for probes. | `false` +`probesServerPort` | Replaces the default HTTP port for exposing probes endpoint. | `8080` +`probesServerEndpoint` | Replaces the default endpoint for exposing probes endpoint. | `/healthz` +`emitKubernetesEvents` | If `true`, Kubernetes events will be emitted when interruption events are received and when actions are taken on Kubernetes nodes. In IMDS Processor mode a default set of annotations with all the node metadata gathered from IMDS will be attached to each event. More information [here](https://github.com/aws/aws-node-termination-handler/blob/main/docs/kubernetes_events.md) | `false` +`kubernetesExtraEventsAnnotations` | A comma-separated list of `key=value` extra annotations to attach to all emitted Kubernetes events. Example: `first=annotation,sample.annotation/number=two"` | None + +### AWS Node Termination Handler - Queue-Processor Mode Configuration + +Parameter | Description | Default +--- | --- | --- +`enableSqsTerminationDraining` | If true, this turns on queue-processor mode which drains nodes when an SQS termination event is received. | `false` +`queueURL` | Listens for messages on the specified SQS queue URL | None +`awsRegion` | If specified, use the AWS region for AWS API calls, else NTH will try to find the region through AWS_REGION env var, IMDS, or the specified queue URL | `` +`checkASGTagBeforeDraining` | If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node | `true` +`managedAsgTag` | The tag to ensure is on a node if checkASGTagBeforeDraining is true | `aws-node-termination-handler/managed` +`workers` | The maximum amount of parallel event processors | `10` +`replicas` | The number of replicas in the NTH deployment when using queue-processor mode (NOTE: increasing replicas may cause duplicate webhooks since NTH pods are stateless) | `1` +`podDisruptionBudget` | Limit the disruption for controller pods, requires at least 2 controller replicas | `{}` +`serviceMonitor.create` | If `true`, create a ServiceMonitor (this requires enableSqsTerminationDraining and enablePrometheusServer to be set) | `false` +`serviceMonitor.interval` | Prometheus scrape interval | `30s` +`serviceMonitor.sampleLimit` | Number of scraped samples accepted | `5000` +`serviceMonitor.labels` | Additional ServiceMonitor metadata labels | `{}` +`serviceMonitor.namespace` | Override ServiceMonitor Helm release namespace | `{{ .Release.Namespace }}` + +### AWS Node Termination Handler - IMDS Mode Configuration + +Parameter | Description | Default +--- | --- | --- +`enableScheduledEventDraining` | [EXPERIMENTAL] If true, drain nodes before the maintenance window starts for an EC2 instance scheduled event | `false` +`enableSpotInterruptionDraining` | If true, drain nodes when the spot interruption termination notice is received | `true` +`enableRebalanceDraining` | If true, drain nodes when the rebalance recommendation notice is received | `false` +`enableRebalanceMonitoring` | If true, cordon nodes when the rebalance recommendation notice is received. If you'd like to drain the node in addition to cordoning, then also set `enableRebalanceDraining`. | `false` +`useHostNetwork` | If `true`, enables `hostNetwork` for the Linux DaemonSet. NOTE: setting this to `false` may cause issues accessing IMDSv2 if your account is not configured with an IP hop count of 2 | `true` +`podMonitor.create` | If `true`, create a PodMonitor (this requires enableSqsTerminationDraining to not be set and enablePrometheusServer to be set) | `false` +`podMonitor.interval` | Prometheus scrape interval | `30s` +`podMonitor.sampleLimit` | Number of scraped samples accepted | `5000` +`podMonitor.labels` | Additional PodMonitor metadata labels | `{}` +`podMonitor.namespace` | Override PodMonitor Helm release namespace | `{{ .Release.Namespace }}` + +### Kubernetes Configuration + +Parameter | Description | Default +--- | --- | --- +`image.repository` | image repository | `public.ecr.aws/aws-ec2/aws-node-termination-handler` +`image.tag` | image tag | `` +`image.pullPolicy` | image pull policy | `IfNotPresent` +`image.pullSecrets` | image pull secrets (for private docker registries) | `[]` +`affinity` | node/pod affinities | None +`linuxAffinity` | Linux node/pod affinities | None +`windowsAffinity` | Windows node/pod affinities | None +`podAnnotations` | annotations to add to each pod | `{}` +`linuxPodAnnotations` | Linux annotations to add to each pod | `{}` +`windowsPodAnnotations` | Windows annotations to add to each pod | `{}` +`podLabels` | labels to add to each pod | `{}` +`linuxPodLabels` | labels to add to each Linux pod | `{}` +`windowsPodLabels` | labels to add to each Windows pod | `{}` +`priorityClassName` | Name of the priorityClass | `system-node-critical` +`resources` | Resources for the pods | `requests.cpu: 50m, requests.memory: 64Mi, limits.cpu: 100m, limits.memory: 128Mi` +`dnsPolicy` | DaemonSet DNS policy | Linux: `ClusterFirstWithHostNet`, Windows: `ClusterFirst` +`nodeSelector` | Tells the all daemon sets where to place the node-termination-handler pods. For example: `lifecycle: "Ec2Spot"`, `on-demand: "false"`, `aws.amazon.com/purchaseType: "spot"`, etc. Value must be a valid yaml expression. | `{}` +`linuxNodeSelector` | Tells the Linux daemon set where to place the node-termination-handler pods. For example: `lifecycle: "Ec2Spot"`, `on-demand: "false"`, `aws.amazon.com/purchaseType: "spot"`, etc. Value must be a valid yaml expression. | `{}` +`windowsNodeSelector` | Tells the Windows daemon set where to place the node-termination-handler pods. For example: `lifecycle: "Ec2Spot"`, `on-demand: "false"`, `aws.amazon.com/purchaseType: "spot"`, etc. Value must be a valid yaml expression. | `{}` +`tolerations` | list of node taints to tolerate | `[ {"operator": "Exists"} ]` +`rbac.create` | if `true`, create and use RBAC resources | `true` +`rbac.pspEnabled` | If `true`, create and use a restricted pod security policy | `true` +`serviceAccount.create` | If `true`, create a new service account | `true` +`serviceAccount.name` | Service account to be used | None +`serviceAccount.annotations` | Specifies the annotations for ServiceAccount | `{}` +`securityContext.runAsUserID` | User ID to run the container | `1000` +`securityContext.runAsGroupID` | Group ID to run the container | `1000` +`nodeSelectorTermsOs` | Operating System Node Selector Key | `kubernetes.io/os` +`nodeSelectorTermsArch` | CPU Architecture Node Selector Key | `kubernetes.io/arch` +`targetNodeOs` | Space separated list of node OS's to target, e.g. "linux", "windows", "linux windows". Note: Windows support is experimental. | `"linux"` +`updateStrategy` | Update strategy for the all DaemonSets (Linux and Windows) | `type=RollingUpdate,rollingUpdate.maxUnavailable=1` +`linuxUpdateStrategy` | Update strategy for the Linux DaemonSet | `type=RollingUpdate,rollingUpdate.maxUnavailable=1` +`windowsUpdateStrategy` | Update strategy for the Windows DaemonSet | `type=RollingUpdate,rollingUpdate.maxUnavailable=1` +`extraEnv` | Additional environment variables to inject into pod configuration | `[]` + +### Testing Configuration (NOT RECOMMENDED FOR PROD DEPLOYMENTS) + +Parameter | Description | Default +--- | --- | --- +`procUptimeFile` | (Used for Testing) Specify the uptime file | `/proc/uptime` +`awsEndpoint` | (Used for testing) If specified, use the AWS endpoint to make API calls | None +`awsSecretAccessKey` | (Used for testing) Pass-thru env var | None +`awsAccessKeyID` | (Used for testing) Pass-thru env var | None +`dryRun` | If true, only log if a node would be drained | `false` + +## Metrics endpoint consideration + +NTH in IMDS mode runs as a DaemonSet w/ `host_networking=true` by default. If the prometheus server is enabled, nothing else will be able to bind to the configured port (by default `:9092`) in the root network namespace. Therefore, it will need to have a firewall/security group configured on the nodes to block access to the `/metrics` endpoint. + +You can switch NTH in IMDS mode to run w/ `host_networking=false`, but you will need to make sure that IMDSv1 is enabled or IMDSv2 IP hop count will need to be incremented to 2. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/NOTES.txt b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/NOTES.txt new file mode 100644 index 0000000..f2dd1ce --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/NOTES.txt @@ -0,0 +1,3 @@ +{{ .Release.Name }} has been installed or updated. To check the status of pods, run: + +kubectl get pods --namespace {{ .Values.namespace }} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/_helpers.tpl b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/_helpers.tpl new file mode 100644 index 0000000..249a9c9 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/_helpers.tpl @@ -0,0 +1,106 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "aws-node-termination-handler.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "aws-node-termination-handler.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Equivalent to "aws-node-termination-handler.fullname" except that "-win" indicator is appended to the end. +Name will not exceed 63 characters. +*/}} +{{- define "aws-node-termination-handler.fullname.windows" -}} +{{- include "aws-node-termination-handler.fullname" . | trunc 59 | trimSuffix "-" | printf "%s-win" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "aws-node-termination-handler.labels" -}} +helm.sh/chart: {{ include "aws-node-termination-handler.chart" . }} +{{ include "aws-node-termination-handler.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "aws-node-termination-handler.selectorLabels" -}} +app.kubernetes.io/name: {{ include "aws-node-termination-handler.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "aws-node-termination-handler.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "aws-node-termination-handler.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "aws-node-termination-handler.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Get the default node selector term prefix. +*/}} +{{- define "aws-node-termination-handler.defaultNodeSelectorTermsPrefix" -}} +kubernetes.io +{{- end -}} + +{{/* +Get the default node selector OS term. +*/}} +{{- define "aws-node-termination-handler.defaultNodeSelectorTermsOs" -}} + {{- list (include "aws-node-termination-handler.defaultNodeSelectorTermsPrefix" .) "os" | join "/" -}} +{{- end -}} + +{{/* +Get the default node selector Arch term. +*/}} +{{- define "aws-node-termination-handler.defaultNodeSelectorTermsArch" -}} + {{- list (include "aws-node-termination-handler.defaultNodeSelectorTermsPrefix" .) "arch" | join "/" -}} +{{- end -}} + +{{/* +Get the node selector OS term. +*/}} +{{- define "aws-node-termination-handler.nodeSelectorTermsOs" -}} + {{- or .Values.nodeSelectorTermsOs (include "aws-node-termination-handler.defaultNodeSelectorTermsOs" .) -}} +{{- end -}} + +{{/* +Get the node selector Arch term. +*/}} +{{- define "aws-node-termination-handler.nodeSelectorTermsArch" -}} + {{- or .Values.nodeSelectorTermsArch (include "aws-node-termination-handler.defaultNodeSelectorTermsArch" .) -}} +{{- end -}} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/clusterrole.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/clusterrole.yaml new file mode 100644 index 0000000..8418ff3 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/clusterrole.yaml @@ -0,0 +1,48 @@ +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - patch + - update +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - get +- apiGroups: + - "" + resources: + - pods/eviction + verbs: + - create +- apiGroups: + - extensions + resources: + - daemonsets + verbs: + - get +- apiGroups: + - apps + resources: + - daemonsets + verbs: + - get +{{- if .Values.emitKubernetesEvents }} +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +{{- end }} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/clusterrolebinding.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/clusterrolebinding.yaml new file mode 100644 index 0000000..b5c2532 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/clusterrolebinding.yaml @@ -0,0 +1,12 @@ +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} +subjects: +- kind: ServiceAccount + name: {{ template "aws-node-termination-handler.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "aws-node-termination-handler.fullname" . }} + apiGroup: rbac.authorization.k8s.io diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml new file mode 100644 index 0000000..0a09aa6 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml @@ -0,0 +1,226 @@ +{{- if and (lower .Values.targetNodeOs | contains "linux") (not .Values.enableSqsTerminationDraining) -}} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +spec: + {{- if (or .Values.updateStrategy .Values.linuxUpdateStrategy) }} + updateStrategy: + {{- with .Values.updateStrategy }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.linuxUpdateStrategy }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} + selector: + matchLabels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 6 }} + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: linux + template: + metadata: + {{- if (or .Values.podAnnotations .Values.linuxPodAnnotations) }} + annotations: + {{- range $key, $value := (mergeOverwrite (dict) .Values.podAnnotations .Values.linuxPodAnnotations) }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + labels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 8 }} + k8s-app: aws-node-termination-handler + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: linux + {{- range $key, $value := (mergeOverwrite (dict) .Values.podLabels .Values.linuxPodLabels) }} + {{ $key }}: {{ $value | quote }} + {{- end }} + spec: + volumes: + - name: "uptime" + hostPath: + path: {{ .Values.procUptimeFile | default "/proc/uptime" | quote }} + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + - name: "webhook-template" + configMap: + name: {{ .Values.webhookTemplateConfigMapName }} + {{- end }} + priorityClassName: {{ .Values.priorityClassName | quote }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . | quote }} + operator: In + values: + - linux + - key: {{ include "aws-node-termination-handler.nodeSelectorTermsArch" . | quote }} + operator: In + values: + - amd64 + - arm64 + - arm + - key: "eks.amazonaws.com/compute-type" + operator: NotIn + values: + - fargate + {{- with .Values.affinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.linuxAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ template "aws-node-termination-handler.serviceAccountName" . }} + hostNetwork: {{ .Values.useHostNetwork }} + dnsPolicy: {{ .Values.dnsPolicy | default "ClusterFirstWithHostNet" | quote }} + containers: + - name: {{ include "aws-node-termination-handler.name" . }} + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: {{ .Values.securityContext.runAsUserID }} + runAsGroup: {{ .Values.securityContext.runAsGroupID }} + allowPrivilegeEscalation: false + volumeMounts: + - name: "uptime" + mountPath: {{ .Values.procUptimeFile | default "/proc/uptime" | quote }} + readOnly: true + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + - name: "webhook-template" + mountPath: "/config/" + {{- end }} + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: DELETE_LOCAL_DATA + value: {{ .Values.deleteLocalData | quote }} + - name: IGNORE_DAEMON_SETS + value: {{ .Values.ignoreDaemonSets | quote }} + - name: GRACE_PERIOD + value: {{ .Values.gracePeriod | quote }} + - name: POD_TERMINATION_GRACE_PERIOD + value: {{ .Values.podTerminationGracePeriod | quote }} + - name: INSTANCE_METADATA_URL + value: {{ .Values.instanceMetadataURL | quote }} + - name: NODE_TERMINATION_GRACE_PERIOD + value: {{ .Values.nodeTerminationGracePeriod | quote }} + - name: WEBHOOK_URL + {{- if .Values.webhookURLSecretName }} + valueFrom: + secretKeyRef: + name: {{ .Values.webhookURLSecretName }} + key: webhookurl + {{- else }} + value: {{ .Values.webhookURL | quote }} + {{- end }} + - name: WEBHOOK_HEADERS + value: {{ .Values.webhookHeaders | quote }} + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + - name: WEBHOOK_TEMPLATE_FILE + value: {{ print "/config/" .Values.webhookTemplateConfigMapKey | quote }} + {{- end }} + - name: WEBHOOK_TEMPLATE + value: {{ .Values.webhookTemplate | quote }} + - name: DRY_RUN + value: {{ .Values.dryRun | quote }} + - name: ENABLE_SPOT_INTERRUPTION_DRAINING + value: {{ .Values.enableSpotInterruptionDraining | quote }} + - name: ENABLE_SCHEDULED_EVENT_DRAINING + value: {{ .Values.enableScheduledEventDraining | quote }} + - name: ENABLE_REBALANCE_MONITORING + value: {{ .Values.enableRebalanceMonitoring | quote }} + - name: ENABLE_REBALANCE_DRAINING + value: {{ .Values.enableRebalanceDraining | quote }} + - name: CHECK_ASG_TAG_BEFORE_DRAINING + value: {{ .Values.checkASGTagBeforeDraining | quote }} + - name: MANAGED_ASG_TAG + value: {{ .Values.managedAsgTag | quote }} + - name: METADATA_TRIES + value: {{ .Values.metadataTries | quote }} + - name: CORDON_ONLY + value: {{ .Values.cordonOnly | quote }} + - name: TAINT_NODE + value: {{ .Values.taintNode | quote }} + - name: JSON_LOGGING + value: {{ .Values.jsonLogging | quote }} + - name: LOG_LEVEL + value: {{ .Values.logLevel | quote }} + - name: WEBHOOK_PROXY + value: {{ .Values.webhookProxy | quote }} + - name: UPTIME_FROM_FILE + value: {{ .Values.procUptimeFile | quote }} + - name: ENABLE_PROMETHEUS_SERVER + value: {{ .Values.enablePrometheusServer | quote }} + - name: PROMETHEUS_SERVER_PORT + value: {{ .Values.prometheusServerPort | quote }} + - name: ENABLE_PROBES_SERVER + value: {{ .Values.enableProbesServer | quote }} + - name: PROBES_SERVER_PORT + value: {{ .Values.probesServerPort | quote }} + - name: PROBES_SERVER_ENDPOINT + value: {{ .Values.probesServerEndpoint | quote }} + - name: EMIT_KUBERNETES_EVENTS + value: {{ .Values.emitKubernetesEvents | quote }} + - name: KUBERNETES_EVENTS_EXTRA_ANNOTATIONS + value: {{ .Values.kubernetesEventsExtraAnnotations | quote }} +{{- range $key, $value := .Values.extraEnv }} + - name: {{ $key }} + value: {{ $value | quote }} +{{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- if or .Values.enablePrometheusServer .Values.enableProbesServer }} + ports: + {{- end }} + {{- if .Values.enablePrometheusServer }} + - containerPort: {{ .Values.prometheusServerPort }} + {{- if .Values.useHostNetwork }} + hostPort: {{ .Values.prometheusServerPort }} + {{- end }} + name: http-metrics + protocol: TCP + {{- end }} + {{- if .Values.enableProbesServer }} + - containerPort: {{ .Values.probesServerPort }} + {{- if .Values.useHostNetwork }} + hostPort: {{ .Values.probesServerPort }} + {{- end }} + name: liveness-probe + protocol: TCP + {{- end }} + {{- if .Values.enableProbesServer }} + livenessProbe: + {{- toYaml .Values.probes | nindent 12 }} + {{- end }} + nodeSelector: + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: linux + {{- with .Values.nodeSelector }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.linuxNodeSelector }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: + {{- range .Values.image.pullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end -}} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml new file mode 100644 index 0000000..d5dfa6f --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml @@ -0,0 +1,196 @@ +{{- if and (lower .Values.targetNodeOs | contains "windows") (not .Values.enableSqsTerminationDraining) -}} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "aws-node-termination-handler.fullname.windows" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +spec: + {{- if (or .Values.updateStrategy .Values.windowsUpdateStrategy) }} + updateStrategy: + {{- with .Values.updateStrategy }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.windowsUpdateStrategy }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} + selector: + matchLabels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 6 }} + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: windows + template: + metadata: + {{- if (or .Values.podAnnotations .Values.windowsPodAnnotations) }} + annotations: + {{- range $key, $value := (mergeOverwrite (dict) .Values.podAnnotations .Values.windowsPodAnnotations) }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + labels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 8 }} + k8s-app: aws-node-termination-handler + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: windows + {{- range $key, $value := (mergeOverwrite (dict) .Values.podLabels .Values.windowsPodLabels) }} + {{ $key }}: {{ $value | quote }} + {{- end }} + spec: + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + volumes: + - name: "webhook-template" + configMap: + name: {{ .Values.webhookTemplateConfigMapName }} + {{- end }} + priorityClassName: {{ .Values.priorityClassName | quote }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . | quote }} + operator: In + values: + - windows + - key: {{ include "aws-node-termination-handler.nodeSelectorTermsArch" . | quote }} + operator: In + values: + - amd64 + {{- with .Values.affinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.windowsAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ template "aws-node-termination-handler.serviceAccountName" . }} + dnsPolicy: {{ .Values.dnsPolicy | default "ClusterFirst" | quote }} + containers: + - name: {{ include "aws-node-termination-handler.name" . }} + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + volumeMounts: + - name: "webhook-template" + mountPath: "/config/" + {{- end }} + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: DELETE_LOCAL_DATA + value: {{ .Values.deleteLocalData | quote }} + - name: IGNORE_DAEMON_SETS + value: {{ .Values.ignoreDaemonSets | quote }} + - name: GRACE_PERIOD + value: {{ .Values.gracePeriod | quote }} + - name: POD_TERMINATION_GRACE_PERIOD + value: {{ .Values.podTerminationGracePeriod | quote }} + - name: INSTANCE_METADATA_URL + value: {{ .Values.instanceMetadataURL | quote }} + - name: NODE_TERMINATION_GRACE_PERIOD + value: {{ .Values.nodeTerminationGracePeriod | quote }} + - name: WEBHOOK_URL + value: {{ .Values.webhookURL | quote }} + - name: WEBHOOK_HEADERS + value: {{ .Values.webhookHeaders | quote }} + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + - name: WEBHOOK_TEMPLATE_FILE + value: {{ print "/config/" .Values.webhookTemplateConfigMapKey | quote }} + {{- end }} + - name: WEBHOOK_TEMPLATE + value: {{ .Values.webhookTemplate | quote }} + - name: DRY_RUN + value: {{ .Values.dryRun | quote }} + - name: ENABLE_SPOT_INTERRUPTION_DRAINING + value: {{ .Values.enableSpotInterruptionDraining | quote }} + - name: ENABLE_SCHEDULED_EVENT_DRAINING + value: {{ .Values.enableScheduledEventDraining | quote }} + - name: ENABLE_REBALANCE_MONITORING + value: {{ .Values.enableRebalanceMonitoring | quote }} + - name: ENABLE_REBALANCE_DRAINING + value: {{ .Values.enableRebalanceDraining | quote }} + - name: CHECK_ASG_TAG_BEFORE_DRAINING + value: {{ .Values.checkASGTagBeforeDraining | quote }} + - name: MANAGED_ASG_TAG + value: {{ .Values.managedAsgTag | quote }} + - name: METADATA_TRIES + value: {{ .Values.metadataTries | quote }} + - name: CORDON_ONLY + value: {{ .Values.cordonOnly | quote }} + - name: TAINT_NODE + value: {{ .Values.taintNode | quote }} + - name: JSON_LOGGING + value: {{ .Values.jsonLogging | quote }} + - name: LOG_LEVEL + value: {{ .Values.logLevel | quote }} + - name: WEBHOOK_PROXY + value: {{ .Values.webhookProxy | quote }} + - name: UPTIME_FROM_FILE + value: {{ .Values.procUptimeFile | quote }} + - name: ENABLE_PROMETHEUS_SERVER + value: {{ .Values.enablePrometheusServer | quote }} + - name: PROMETHEUS_SERVER_PORT + value: {{ .Values.prometheusServerPort | quote }} + - name: ENABLE_PROBES_SERVER + value: {{ .Values.enableProbesServer | quote }} + - name: PROBES_SERVER_PORT + value: {{ .Values.probesServerPort | quote }} + - name: PROBES_SERVER_ENDPOINT + value: {{ .Values.probesServerEndpoint | quote }} + - name: EMIT_KUBERNETES_EVENTS + value: {{ .Values.emitKubernetesEvents | quote }} + - name: KUBERNETES_EVENTS_EXTRA_ANNOTATIONS + value: {{ .Values.kubernetesEventsExtraAnnotations | quote }} +{{- range $key, $value := .Values.extraEnv }} + - name: {{ $key }} + value: {{ $value | quote }} +{{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- if or .Values.enablePrometheusServer .Values.enableProbesServer }} + ports: + {{- end }} + {{- if .Values.enablePrometheusServer }} + - containerPort: {{ .Values.prometheusServerPort }} + hostPort: {{ .Values.prometheusServerPort }} + name: http-metrics + protocol: TCP + {{- end }} + {{- if .Values.enableProbesServer }} + - containerPort: {{ .Values.probesServerPort }} + hostPort: {{ .Values.probesServerPort }} + name: liveness-probe + protocol: TCP + {{- end }} + {{- if .Values.enableProbesServer }} + livenessProbe: + {{- toYaml .Values.probes | nindent 12 }} + {{- end }} + nodeSelector: + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: windows + {{- with .Values.nodeSelector }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.windowsNodeSelector }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: + {{- range .Values.image.pullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end -}} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml new file mode 100644 index 0000000..292a299 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml @@ -0,0 +1,218 @@ +{{- if .Values.enableSqsTerminationDraining }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 6 }} + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: linux + template: + metadata: + annotations: + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + labels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 8 }} + k8s-app: aws-node-termination-handler + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: linux + {{- range $key, $value := .Values.podLabels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + spec: + volumes: + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + - name: "webhook-template" + configMap: + name: {{ .Values.webhookTemplateConfigMapName }} + {{- end }} + - name: aws-token + projected: + sources: + - serviceAccountToken: + path: token + expirationSeconds: 86400 + audience: "sts.amazonaws.com" + priorityClassName: {{ .Values.priorityClassName | quote }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . | quote }} + operator: In + values: + - linux + - key: {{ include "aws-node-termination-handler.nodeSelectorTermsArch" . | quote }} + operator: In + values: + - amd64 + - arm64 + - arm + {{- with .Values.affinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ template "aws-node-termination-handler.serviceAccountName" . }} + hostNetwork: false + dnsPolicy: {{ .Values.dnsPolicy | quote }} + securityContext: + fsGroup: {{ .Values.securityContext.runAsGroupID }} + containers: + - name: {{ include "aws-node-termination-handler.name" . }} + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: {{ .Values.securityContext.runAsUserID }} + runAsGroup: {{ .Values.securityContext.runAsGroupID }} + allowPrivilegeEscalation: false + volumeMounts: + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + - name: "webhook-template" + mountPath: "/config/" + {{- end }} + - name: aws-token + mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/" + readOnly: true + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: DELETE_LOCAL_DATA + value: {{ .Values.deleteLocalData | quote }} + - name: IGNORE_DAEMON_SETS + value: {{ .Values.ignoreDaemonSets | quote }} + - name: POD_TERMINATION_GRACE_PERIOD + value: {{ .Values.podTerminationGracePeriod | quote }} + - name: INSTANCE_METADATA_URL + value: {{ .Values.instanceMetadataURL | quote }} + - name: NODE_TERMINATION_GRACE_PERIOD + value: {{ .Values.nodeTerminationGracePeriod | quote }} + - name: WEBHOOK_URL + {{- if .Values.webhookURLSecretName }} + valueFrom: + secretKeyRef: + name: {{ .Values.webhookURLSecretName }} + key: webhookurl + {{- else }} + value: {{ .Values.webhookURL | quote }} + {{- end }} + - name: WEBHOOK_HEADERS + value: {{ .Values.webhookHeaders | quote }} + {{- if and .Values.webhookTemplateConfigMapName .Values.webhookTemplateConfigMapKey }} + - name: WEBHOOK_TEMPLATE_FILE + value: {{ print "/config/" .Values.webhookTemplateConfigMapKey | quote }} + {{- end }} + - name: WEBHOOK_TEMPLATE + value: {{ .Values.webhookTemplate | quote }} + - name: DRY_RUN + value: {{ .Values.dryRun | quote }} + - name: METADATA_TRIES + value: {{ .Values.metadataTries | quote }} + - name: CORDON_ONLY + value: {{ .Values.cordonOnly | quote }} + - name: TAINT_NODE + value: {{ .Values.taintNode | quote }} + - name: JSON_LOGGING + value: {{ .Values.jsonLogging | quote }} + - name: LOG_LEVEL + value: {{ .Values.logLevel | quote }} + - name: WEBHOOK_PROXY + value: {{ .Values.webhookProxy | quote }} + - name: ENABLE_PROMETHEUS_SERVER + value: {{ .Values.enablePrometheusServer | quote }} + - name: ENABLE_PROBES_SERVER + value: {{ .Values.enableProbesServer | quote }} + - name: ENABLE_SPOT_INTERRUPTION_DRAINING + value: "false" + - name: ENABLE_SCHEDULED_EVENT_DRAINING + value: "false" + - name: ENABLE_REBALANCE_MONITORING + value: "false" + - name: ENABLE_REBALANCE_DRAINING + value: "false" + - name: ENABLE_SQS_TERMINATION_DRAINING + value: "true" + - name: QUEUE_URL + value: {{ .Values.queueURL | quote }} + - name: PROMETHEUS_SERVER_PORT + value: {{ .Values.prometheusServerPort | quote }} + - name: PROBES_SERVER_PORT + value: {{ .Values.probesServerPort | quote }} + - name: PROBES_SERVER_ENDPOINT + value: {{ .Values.probesServerEndpoint | quote }} + - name: AWS_REGION + value: {{ .Values.awsRegion | quote }} + - name: AWS_ENDPOINT + value: {{ .Values.awsEndpoint | quote }} + {{- if .Values.awsSecretAccessKey }} + - name: AWS_SECRET_ACCESS_KEY + value: {{ .Values.awsSecretAccessKey | quote }} + - name: AWS_ACCESS_KEY_ID + value: {{ .Values.awsAccessKeyID | quote }} + {{- end }} + - name: CHECK_ASG_TAG_BEFORE_DRAINING + value: {{ .Values.checkASGTagBeforeDraining | quote }} + - name: MANAGED_ASG_TAG + value: {{ .Values.managedAsgTag | quote }} + - name: WORKERS + value: {{ .Values.workers | quote }} + - name: EMIT_KUBERNETES_EVENTS + value: {{ .Values.emitKubernetesEvents | quote }} + - name: KUBERNETES_EVENTS_EXTRA_ANNOTATIONS + value: {{ .Values.kubernetesEventsExtraAnnotations | quote }} +{{- range $key, $value := .Values.extraEnv }} + - name: {{ $key }} + value: {{ $value | quote }} +{{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- if or .Values.enablePrometheusServer .Values.enableProbesServer }} + ports: + {{- end }} + {{- if .Values.enablePrometheusServer }} + - containerPort: {{ .Values.prometheusServerPort }} + name: http-metrics + protocol: TCP + {{- end }} + {{- if .Values.enableProbesServer }} + - containerPort: {{ .Values.probesServerPort }} + name: liveness-probe + protocol: TCP + {{- end }} + {{- if .Values.enableProbesServer }} + livenessProbe: + {{- toYaml .Values.probes | nindent 12 }} + {{- end }} + nodeSelector: + {{ include "aws-node-termination-handler.nodeSelectorTermsOs" . }}: linux + {{- with .Values.nodeSelector }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: + {{- range .Values.image.pullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/pdb.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/pdb.yaml new file mode 100644 index 0000000..1c88ef5 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/pdb.yaml @@ -0,0 +1,13 @@ +{{- if and .Values.enableSqsTerminationDraining (and .Values.podDisruptionBudget (gt (int .Values.replicas) 1)) }} +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 6 }} + {{- toYaml .Values.podDisruptionBudget | nindent 2 }} +{{- end }} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/podmonitor.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/podmonitor.yaml new file mode 100644 index 0000000..1c497d6 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/podmonitor.yaml @@ -0,0 +1,33 @@ +{{- if and (not .Values.enableSqsTerminationDraining) (and .Values.enablePrometheusServer .Values.podMonitor.create) -}} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: {{ template "aws-node-termination-handler.fullname" . }} + {{- if .Values.podMonitor.namespace }} + namespace: {{ .Values.podMonitor.namespace }} + {{- else }} + namespace: {{ .Release.Namespace }} + {{- end }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} + {{- with .Values.podMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ include "aws-node-termination-handler.name" . }} + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + podMetricsEndpoints: + - port: http-metrics + path: /metrics + {{- with .Values.podMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.podMonitor.sampleLimit }} + sampleLimit: {{ . }} + {{- end }} + selector: + matchLabels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 6 }} +{{- end -}} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/psp.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/psp.yaml new file mode 100644 index 0000000..ea953f8 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/psp.yaml @@ -0,0 +1,70 @@ +{{- if .Values.rbac.pspEnabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "aws-node-termination-handler.fullname" . }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} + annotations: + seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' +spec: + privileged: false + hostIPC: false + hostNetwork: {{ .Values.useHostNetwork }} + hostPID: false +{{- if and (and (not .Values.enableSqsTerminationDraining) .Values.useHostNetwork ) (or .Values.enablePrometheusServer .Values.enableProbesServer) }} + hostPorts: +{{- if .Values.enablePrometheusServer }} + - min: {{ .Values.prometheusServerPort }} + max: {{ .Values.prometheusServerPort }} +{{- end }} +{{- if .Values.enableProbesServer }} + - min: {{ .Values.probesServerPort }} + max: {{ .Values.probesServerPort }} +{{- end }} +{{- end }} + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + allowedCapabilities: + - '*' + fsGroup: + rule: RunAsAny + runAsUser: + rule: RunAsAny + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + volumes: + - '*' +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "aws-node-termination-handler.fullname" . }}-psp + namespace: {{ .Release.Namespace }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +rules: + - apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "aws-node-termination-handler.fullname" . }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "aws-node-termination-handler.fullname" . }}-psp + namespace: {{ .Release.Namespace }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "aws-node-termination-handler.fullname" . }}-psp +subjects: + - kind: ServiceAccount + name: {{ template "aws-node-termination-handler.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/service.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/service.yaml new file mode 100644 index 0000000..5534b0b --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/service.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.enableSqsTerminationDraining .Values.enablePrometheusServer -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +spec: + type: ClusterIP + selector: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 4 }} + ports: + - name: http-metrics + port: {{ .Values.prometheusServerPort }} + targetPort: http-metrics + protocol: TCP +{{- end -}} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/serviceaccount.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/serviceaccount.yaml new file mode 100644 index 0000000..40daa39 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "aws-node-termination-handler.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} +{{- end }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +{{- end -}} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/servicemonitor.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/servicemonitor.yaml new file mode 100644 index 0000000..52ff799 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/servicemonitor.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.enableSqsTerminationDraining (and .Values.enablePrometheusServer .Values.serviceMonitor.create) -}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} + {{- if .Values.serviceMonitor.namespace }} + namespace: {{ .Values.serviceMonitor.namespace }} + {{- else }} + namespace: {{ .Release.Namespace }} + {{- end }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} + {{- with .Values.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ include "aws-node-termination-handler.name" . }} + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + endpoints: + - port: http-metrics + path: /metrics + {{- with .Values.serviceMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.sampleLimit }} + sampleLimit: {{ . }} + {{- end }} + selector: + matchLabels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 6 }} +{{- end -}} diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/test.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/test.yaml new file mode 100644 index 0000000..a24efaa --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/test.yaml @@ -0,0 +1,175 @@ +# Test values for aws-node-termination-handler. +# This is a YAML-formatted file. +# Declare variables to test template rendering functionality. + +image: + repository: amazon/aws-node-termination-handler + tag: v1.6.1 + pullPolicy: IfNotPresent + pullSecrets: ["test"] + +securityContext: + runAsUserID: 1000 + runAsGroupID: 1000 + +nameOverride: "test-nth" +fullnameOverride: "test-aws-node-termination-handler" + +priorityClassName: system-node-critical + +podAnnotations: { + test: test +} +linuxPodAnnotations: { + test: test +} +windowsPodAnnotations: { + test: test +} + +podLabels: { + test: test +} +linuxPodLabels: { + test: test +} +windowsPodLabels: { + test: test +} + +resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "100m" + +## enableSpotInterruptionDraining If false, do not drain nodes when the spot interruption termination notice is received +enableSpotInterruptionDraining: true + +## enableScheduledEventDraining [EXPERIMENTAL] If true, drain nodes before the maintenance window starts for an EC2 instance scheduled event +enableScheduledEventDraining: true + +# Total number of times to try making the metadata request before failing. +metadataTries: 3 + +# Cordon but do not drain nodes upon spot interruption termination notice. +cordonOnly: false + +# Taint node upon spot interruption termination notice. +taintNode: false + +# Log messages in JSON format. +jsonLogging: false + +## dryRun tells node-termination-handler to only log calls to kubernetes control plane +dryRun: false + +# deleteLocalData tells kubectl to continue even if there are pods using +# emptyDir (local data that will be deleted when the node is drained). +deleteLocalData: true + +# ignoreDaemonSets causes kubectl to skip Daemon Set managed pods. +ignoreDaemonSets: true + +# gracePeriod (DEPRECATED - use podTerminationGracePeriod instead) is time in seconds given to each pod to terminate gracefully. +# If negative, the default value specified in the pod will be used. +gracePeriod: 1 +podTerminationGracePeriod: 1 + +# nodeTerminationGracePeriod specifies the period of time in seconds given to each NODE to terminate gracefully. Node draining will be scheduled based on this value to optimize the amount of compute time, but still safely drain the node before an event. +nodeTerminationGracePeriod: 1 + +# webhookURL if specified, posts event data to URL upon instance interruption action. +webhookURL: https://localhost:1338 + +# Webhook URL will be fetched from the secret store using the given name. +webhookURLSecretName: test + +# webhookProxy if specified, uses this HTTP(S) proxy configuration. +webhookProxy: tcp://localhost:1338 + +# webhookHeaders if specified, replaces the default webhook headers. +webhookHeaders: "Content-Type: json" + +# webhookTemplate if specified, replaces the default webhook message template. +webhookTemplate: "{\"Content\":\"[NTH][Instance Interruption] InstanceId\"}" + +# instanceMetadataURL is used to override the default metadata URL (default: http://169.254.169.254:80) +instanceMetadataURL: "https://localhost:1338" + +# (TESTING USE): Mount path for uptime file +procUptimeFile: "/proc/uptime" + +# Create node OS specific daemonset(s). (e.g. "linux", "windows", "linux windows") +targetNodeOs: "linux" + +# nodeSelector tells both linux and windows daemonsets where to place the node-termination-handler +# pods. By default, this value is empty and every node will receive a pod. +nodeSelector: { + test: test +} +# linuxNodeSelector tells the linux daemonset where to place the node-termination-handler +# pods. By default, this value is empty and every linux node will receive a pod. +linuxNodeSelector: { + test: test +} +# windowsNodeSelector tells the windows daemonset where to place the node-termination-handler +# pods. By default, this value is empty and every windows node will receive a pod. +windowsNodeSelector: { + test: test +} + +enablePrometheusServer: true +prometheusServerPort: 9092 + +tolerations: +- operator: "Exists" + +affinity: { + test: test +} +linuxAffinity: { + test: test +} +windowsAffinity: { + test: test +} + +serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. If name is not set and create is true, + # a name is generated using fullname template + name: test + annotations: { + test: test + } + # eks.amazonaws.com/role-arn: arn:aws:iam::AWS_ACCOUNT_ID:role/IAM_ROLE_NAME + +rbac: + # rbac.pspEnabled: `true` if PodSecurityPolicy resources should be created + pspEnabled: true + +dnsPolicy: "ClusterFirstWithHostNet" + +podMonitor: + # Specifies whether PodMonitor should be created + create: true + # The Prometheus scrape interval + interval: 30s + # The number of scraped samples that will be accepted + sampleLimit: 5000 + # Additional labels to add to the metadata + labels: { + test: test + } + +# K8s DaemonSet update strategy. +updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + linuxUpdateStrategy: "RollingUpdate" + windowsUpdateStrategy: "RollingUpdate" diff --git a/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml b/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml new file mode 100644 index 0000000..22355c2 --- /dev/null +++ b/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml @@ -0,0 +1,245 @@ +# Default values for aws-node-termination-handler. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + repository: public.ecr.aws/aws-ec2/aws-node-termination-handler + tag: v1.14.0 + pullPolicy: IfNotPresent + pullSecrets: [] + +securityContext: + runAsUserID: 1000 + runAsGroupID: 1000 + +nameOverride: "" +fullnameOverride: "" + +extraEnv: {} + +priorityClassName: system-node-critical + +podAnnotations: {} +linuxPodAnnotations: {} +windowsPodAnnotations: {} + +podLabels: {} +linuxPodLabels: {} +windowsPodLabels: {} + +# liveness probe settings. +probes: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + +resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "100m" + +# enableSqsTerminationDraining If true, this turns on queue-processor mode which drains nodes when an SQS termination event is received +enableSqsTerminationDraining: false + +# enableRebalanceMonitoring If true, cordon nodes when the rebalance recommendation notice is received +enableRebalanceMonitoring: false + +# enableRebalanceDraining If true, drain nodes when the rebalance recommendation notice is received +enableRebalanceDraining: false + +# queueURL Listens for messages on the specified SQS queue URL +queueURL: "" + +# checkASGTagBeforeDraining If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node +checkASGTagBeforeDraining: true + +# managedAsgTag The tag to ensure is on a node if checkASGTagBeforeDraining is true +managedAsgTag: "aws-node-termination-handler/managed" + +# awsRegion If specified, use the AWS region for AWS API calls +awsRegion: "" + +# awsEndpoint If specified, use the AWS endpoint to make API calls. +awsEndpoint: "" + +# These should only be used for testing w/ localstack! +awsSecretAccessKey: +awsAccessKeyID: + +# enableSpotInterruptionDraining If false, do not drain nodes when the spot interruption termination notice is received +enableSpotInterruptionDraining: "" + +# enableScheduledEventDraining [EXPERIMENTAL] If true, drain nodes before the maintenance window starts for an EC2 instance scheduled event +enableScheduledEventDraining: "" + +# Total number of times to try making the metadata request before failing. +metadataTries: 3 + +# Cordon but do not drain nodes upon spot interruption termination notice. +cordonOnly: false + +# Taint node upon spot interruption termination notice. +taintNode: false + +# Log messages in JSON format. +jsonLogging: false + +# Sets the log level +logLevel: "info" + +# dryRun tells node-termination-handler to only log calls to kubernetes control plane +dryRun: false + +# deleteLocalData tells kubectl to continue even if there are pods using +# emptyDir (local data that will be deleted when the node is drained). +deleteLocalData: "" + +# ignoreDaemonSets causes kubectl to skip Daemon Set managed pods. +ignoreDaemonSets: "" + +# gracePeriod (DEPRECATED - use podTerminationGracePeriod instead) is time in seconds given to each pod to terminate gracefully. +# If negative, the default value specified in the pod will be used. +gracePeriod: "" +podTerminationGracePeriod: "" + +# nodeTerminationGracePeriod specifies the period of time in seconds given to each NODE to terminate gracefully. Node draining will be scheduled based on this value to optimize the amount of compute time, but still safely drain the node before an event. +nodeTerminationGracePeriod: "" + +# webhookURL if specified, posts event data to URL upon instance interruption action. +webhookURL: "" + +# Webhook URL will be fetched from the secret store using the given name. +webhookURLSecretName: "" + +# webhookProxy if specified, uses this HTTP(S) proxy configuration. +webhookProxy: "" + +# webhookHeaders if specified, replaces the default webhook headers. +webhookHeaders: "" + +# webhook template file will be fetched from given config map name +# if specified, replaces the default webhook message with the content of the template file +webhookTemplateConfigMapName: "" + +# template file name stored in configmap +webhookTemplateConfigMapKey: "" + +# webhookTemplate if specified, replaces the default webhook message template. +webhookTemplate: "" + +# instanceMetadataURL is used to override the default metadata URL (default: http://169.254.169.254:80) +instanceMetadataURL: "" + +# (TESTING USE): Mount path for uptime file +procUptimeFile: "" + +# Create node OS specific daemonset(s). (e.g. "linux", "windows", "linux windows") +targetNodeOs: "linux" + +# nodeSelector tells both linux and windows daemonsets where to place the node-termination-handler +# pods. By default, this value is empty and every node will receive a pod. +nodeSelector: {} +# linuxNodeSelector tells the linux daemonset where to place the node-termination-handler +# pods. By default, this value is empty and every linux node will receive a pod. +linuxNodeSelector: {} +# windowsNodeSelector tells the windows daemonset where to place the node-termination-handler +# pods. By default, this value is empty and every windows node will receive a pod. +windowsNodeSelector: {} + +nodeSelectorTermsOs: "" +nodeSelectorTermsArch: "" + +enablePrometheusServer: false +prometheusServerPort: 9092 + +enableProbesServer: false +probesServerPort: 8080 +probesServerEndpoint: "/healthz" + +# emitKubernetesEvents If true, Kubernetes events will be emitted when interruption events are received and when actions are taken on Kubernetes nodes. In IMDS Processor mode a default set of annotations with all the node metadata gathered from IMDS will be attached to each event +emitKubernetesEvents: false + +# kubernetesEventsExtraAnnotations A comma-separated list of key=value extra annotations to attach to all emitted Kubernetes events +# Example: "first=annotation,sample.annotation/number=two" +kubernetesEventsExtraAnnotations: "" + +tolerations: + - operator: "Exists" + +affinity: {} +linuxAffinity: {} +windowsAffinity: {} + +serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. If namenot set and create is true, + # a name is generated using fullname template + name: + annotations: {} + # eks.amazonaws.com/role-arn: arn:aws:iam::AWS_ACCOUNT_ID:role/IAM_ROLE_NAME + +rbac: + # rbac.pspEnabled: `true` if PodSecurityPolicy resources should be created + pspEnabled: true + +dnsPolicy: "" + +podMonitor: + # Specifies whether PodMonitor should be created + # this needs enableSqsTerminationDraining: false + # and enablePrometheusServer: true + create: false + # Specifies whether the PodMonitor should be created in a different namespace than + # the Helm release + namespace: + # Additional labels to add to the metadata + labels: {} + # The Prometheus scrape interval + interval: 30s + # The number of scraped samples that will be accepted + sampleLimit: 5000 + +serviceMonitor: + # Specifies whether ServiceMonitor should be created + # this needs enableSqsTerminationDraining: rue + # and enablePrometheusServer: true + create: false + # Specifies whether the ServiceMonitor should be created in a different namespace than + # the Helm release + namespace: + # Additional labels to add to the metadata + labels: {} + # The Prometheus scrape interval + interval: 30s + # The number of scraped samples that will be accepted + sampleLimit: 5000 + +# K8s DaemonSet update strategy. +updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 +linuxUpdateStrategy: "" +windowsUpdateStrategy: "" + +# Determines if NTH uses host networking for Linux when running the DaemonSet (only IMDS mode; queue-processor never runs with host networking) +# If you have disabled IMDSv1 and are relying on IMDSv2, you'll need to increase the IP hop count to 2 before switching this to false +# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html +useHostNetwork: true + +# The maximal amount of parallel event processors to handle concurrent events +workers: 10 + +# The number of replicas in the NTH deployment when using queue-processor mode (NOTE: increasing this may cause duplicate webhooks since NTH pods are stateless) +replicas: 1 + +# podDisruptionBudget specifies the disruption budget for the controller pods. +# Disruption budget will be configured only when the replicaCount is greater than 1 +podDisruptionBudget: {} +# maxUnavailable: 1 diff --git a/charts/kubezero-addons/nth.patch b/charts/kubezero-addons/nth.patch new file mode 100644 index 0000000..d18ff15 --- /dev/null +++ b/charts/kubezero-addons/nth.patch @@ -0,0 +1,39 @@ +diff -tuNr charts/aws-node-termination-handler.orig/templates/deployment.yaml charts/aws-node-termination-handler/templates/deployment.yaml +--- charts/aws-node-termination-handler.orig/templates/deployment.yaml 2021-12-01 16:41:46.713472250 +0100 ++++ charts/aws-node-termination-handler/templates/deployment.yaml 2021-12-01 16:41:54.276883046 +0100 +@@ -32,6 +32,13 @@ + configMap: + name: {{ .Values.webhookTemplateConfigMapName }} + {{- end }} ++ - name: aws-token ++ projected: ++ sources: ++ - serviceAccountToken: ++ path: token ++ expirationSeconds: 86400 ++ audience: "sts.amazonaws.com" + priorityClassName: {{ .Values.priorityClassName | quote }} + affinity: + nodeAffinity: +@@ -71,6 +78,9 @@ + - name: "webhook-template" + mountPath: "/config/" + {{- end }} ++ - name: aws-token ++ mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/" ++ readOnly: true + env: + - name: NODE_NAME + valueFrom: +diff -tuNr charts/aws-node-termination-handler.orig/values.yaml charts/aws-node-termination-handler/values.yaml +--- charts/aws-node-termination-handler.orig/values.yaml 2021-12-01 16:41:46.713472250 +0100 ++++ charts/aws-node-termination-handler/values.yaml 2021-12-01 16:42:02.350299065 +0100 +@@ -15,7 +15,7 @@ + nameOverride: "" + fullnameOverride: "" + +-extraEnv: [] ++extraEnv: {} + + priorityClassName: system-node-critical + diff --git a/charts/kubezero-addons/update.sh b/charts/kubezero-addons/update.sh new file mode 100755 index 0000000..9b1de65 --- /dev/null +++ b/charts/kubezero-addons/update.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -ex + +NTH_VERSION=$(yq eval '.dependencies[] | select(.name=="aws-node-termination-handler") | .version' Chart.yaml) + +rm -rf charts/aws-node-termination-handler +helm pull eks/aws-node-termination-handler --untar --untardir charts --version $NTH_VERSION + +# diff -tuNr charts/aws-node-termination-handler.orig charts/aws-node-termination-handler > nth.patch +patch -p0 -i nth.patch --no-backup-if-mismatch diff --git a/charts/kubezero-addons/values.yaml b/charts/kubezero-addons/values.yaml index 41274bd..e6f30f6 100644 --- a/charts/kubezero-addons/values.yaml +++ b/charts/kubezero-addons/values.yaml @@ -15,9 +15,16 @@ aws-node-termination-handler: enableSqsTerminationDraining: true - # -- SQS queue ARN from kube controller stack + # -- https://sqs.${AWS::Region}.amazonaws.com/${AWS::AccountId}/${ClusterName}_Nth queueURL: "" + metadataTries: 0 + extraEnv: + # -- "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.awsNth" + AWS_ROLE_ARN: "" + AWS_WEB_IDENTITY_TOKEN_FILE: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token" + AWS_STS_REGIONAL_ENDPOINTS: "regional" + deleteLocalData: true taintNode: true diff --git a/charts/kubezero/Chart.yaml b/charts/kubezero/Chart.yaml index 32bd07a..ec9475b 100644 --- a/charts/kubezero/Chart.yaml +++ b/charts/kubezero/Chart.yaml @@ -1,17 +1,18 @@ apiVersion: v2 name: kubezero -description: KubeZero - Bootstrap and ArgoCD Root App of Apps chart +description: KubeZero - Root App of Apps chart type: application -version: 1.20.11-4 +version: 1.21.7-0 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: - kubezero - argocd maintainers: - - name: Quarky9 + - name: Stefan Reimer + email: stefan@zero-downtime.net dependencies: - name: kubezero-lib version: ">= 0.1.4" - repository: https://zero-down-time.github.io/kubezero/ -kubeVersion: ">= 1.18.0" + repository: https://cdn.zero-downtime.net/charts +kubeVersion: ">= 1.20.0" diff --git a/charts/kubezero/README.md b/charts/kubezero/README.md index 3b92708..4a92685 100644 --- a/charts/kubezero/README.md +++ b/charts/kubezero/README.md @@ -1,8 +1,8 @@ # kubezero -![Version: 1.20.11-4](https://img.shields.io/badge/Version-1.20.11--4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 1.21.7-0](https://img.shields.io/badge/Version-1.21.7--0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) -KubeZero - Bootstrap and ArgoCD Root App of Apps chart +KubeZero - Root App of Apps chart **Homepage:** @@ -10,15 +10,15 @@ KubeZero - Bootstrap and ArgoCD Root App of Apps chart | Name | Email | Url | | ---- | ------ | --- | -| Quarky9 | | | +| Stefan Reimer | stefan@zero-downtime.net | | ## Requirements -Kubernetes: `>= 1.18.0` +Kubernetes: `>= 1.20.0` | Repository | Name | Version | |------------|------|---------| -| https://zero-down-time.github.io/kubezero/ | kubezero-lib | >= 0.1.4 | +| https://cdn.zero-downtime.net/charts | kubezero-lib | >= 0.1.4 | ## Values @@ -35,9 +35,6 @@ Kubernetes: `>= 1.18.0` | aws-ebs-csi-driver.targetRevision | string | `"0.6.4"` | | | aws-efs-csi-driver.enabled | bool | `false` | | | aws-efs-csi-driver.targetRevision | string | `"0.4.2"` | | -| calico.enabled | bool | `false` | | -| calico.retain | bool | `true` | | -| calico.targetRevision | string | `"0.2.2"` | | | cert-manager.enabled | bool | `false` | | | cert-manager.namespace | string | `"cert-manager"` | | | cert-manager.targetRevision | string | `"0.7.3"` | | @@ -51,7 +48,7 @@ Kubernetes: `>= 1.18.0` | kiam.targetRevision | string | `"0.3.5"` | | | kubezero.defaultTargetRevision | string | `"*"` | | | kubezero.gitSync | object | `{}` | | -| kubezero.repoURL | string | `"https://zero-down-time.github.io/kubezero"` | | +| kubezero.repoURL | string | `"https://cdn.zero-downtime.net/charts"` | | | kubezero.server | string | `"https://kubernetes.default.svc"` | | | logging.enabled | bool | `false` | | | logging.namespace | string | `"logging"` | | @@ -61,6 +58,9 @@ Kubernetes: `>= 1.18.0` | metrics.istio.prometheus | object | `{}` | | | metrics.namespace | string | `"monitoring"` | | | metrics.targetRevision | string | `"0.5.4"` | | +| network.enabled | bool | `false` | | +| network.retain | bool | `true` | | +| network.targetRevision | string | `"0.1.0"` | | | storage.enabled | bool | `false` | | | storage.targetRevision | string | `"0.3.2"` | | diff --git a/charts/kubezero/bootstrap.sh b/charts/kubezero/bootstrap.sh index 0b28c01..bd46815 100755 --- a/charts/kubezero/bootstrap.sh +++ b/charts/kubezero/bootstrap.sh @@ -91,7 +91,10 @@ function _helm() { local chart="kubezero-${module}" local namespace=$(yq r $TMPDIR/kubezero/templates/${module}.yaml spec.destination.namespace) - local targetRevision="--version $(yq r $TMPDIR/kubezero/templates/${module}.yaml spec.source.targetRevision)" + targetRevision="" + _version="$(yq r $TMPDIR/kubezero/templates/${module}.yaml spec.source.targetRevision)" + + [ -n "$_version" ] && targetRevision="--version $_version" yq r $TMPDIR/kubezero/templates/${module}.yaml 'spec.source.helm.values' > $TMPDIR/values.yaml @@ -141,18 +144,13 @@ function cert-manager-post() { } -######## -# Kiam # -######## -function kiam-pre() { - # Certs only first - apply --set kiam.enabled=false - kubectl wait --timeout=120s --for=condition=Ready -n kube-system Certificate/kiam-server -} - -function kiam-post() { - wait_for 'kubectl get daemonset -n kube-system kiam-agent' - kubectl rollout status daemonset -n kube-system kiam-agent +########### +# ArgoCD # +########### +function argocd-pre() { + for f in $CLUSTER/secrets/argocd-*.yaml; do + kubectl apply -f $f + done } diff --git a/charts/kubezero/docs/app.yaml b/charts/kubezero/docs/app.yaml index 0ed65b5..ea7307d 100644 --- a/charts/kubezero/docs/app.yaml +++ b/charts/kubezero/docs/app.yaml @@ -7,7 +7,7 @@ metadata: spec: project: kubezero source: - repoURL: https://zero-down-time.github.io/kubezero + repoURL: https://cdn.zero-downtime.net/charts chart: kubezero targetRevision: {{ .Values.kubezero.version }} diff --git a/charts/kubezero/docs/applicationSet.yaml b/charts/kubezero/docs/applicationSet.yaml index 4d13d1b..49b2049 100644 --- a/charts/kubezero/docs/applicationSet.yaml +++ b/charts/kubezero/docs/applicationSet.yaml @@ -17,7 +17,7 @@ spec: spec: project: kubezero source: - repoURL: https://zero-down-time.github.io/kubezero + repoURL: https://cdn.zero-downtime.net/charts chart: kubezero targetRevision: '{{ "{{" }} kubezero.version {{ "}}" }}' helm: diff --git a/charts/kubezero/templates/calico.yaml b/charts/kubezero/templates/calico.yaml deleted file mode 100644 index 432da71..0000000 --- a/charts/kubezero/templates/calico.yaml +++ /dev/null @@ -1,17 +0,0 @@ -{{- define "calico-values" }} -network: {{ default "vxlan" .Values.calico.network }} -mtu: {{ default "8941" .Values.calico.mtu }} -prometheus: {{ .Values.metrics.enabled }} -{{- end }} - - -{{- define "calico-argo" }} - - ignoreDifferences: - - group: apiextensions.k8s.io - kind: CustomResourceDefinition - jsonPointers: - - /status -{{- end }} - -{{ include "kubezero-app.app" . }} diff --git a/charts/kubezero/templates/network.yaml b/charts/kubezero/templates/network.yaml new file mode 100644 index 0000000..6a444ed --- /dev/null +++ b/charts/kubezero/templates/network.yaml @@ -0,0 +1,22 @@ +{{- define "network-values" }} + +{{- with index .Values "network" "metallb" }} +metallb: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with index .Values "network" "multus" }} +multus: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with index .Values "network" "cilium" }} +cilium: + {{- toYaml . | nindent 2 }} +{{- end }} + +{{- end }} + + +{{- define "network-argo" }} +{{- end }} + +{{ include "kubezero-app.app" . }} diff --git a/charts/kubezero/values.yaml b/charts/kubezero/values.yaml index 399022e..fe1c8b4 100644 --- a/charts/kubezero/values.yaml +++ b/charts/kubezero/values.yaml @@ -1,6 +1,6 @@ kubezero: server: https://kubernetes.default.svc - repoURL: https://zero-down-time.github.io/kubezero + repoURL: https://cdn.zero-downtime.net/charts defaultTargetRevision: '*' gitSync: {} @@ -10,10 +10,10 @@ addons: enabled: false targetRevision: 0.1.0 -calico: +network: enabled: false retain: true - targetRevision: 0.2.2 + targetRevision: 0.1.0 cert-manager: enabled: false diff --git a/containers/admin/v1.21/kubezero.sh b/containers/admin/v1.21/kubezero.sh index f12b2d8..c6ab54e 100755 --- a/containers/admin/v1.21/kubezero.sh +++ b/containers/admin/v1.21/kubezero.sh @@ -147,7 +147,7 @@ if [ "$1" == 'upgrade' ]; then # migrate backup if [ -f ${HOSTFS}/usr/local/sbin/backup_control_plane.sh ]; then _repo=$(grep "export RESTIC_REPOSITORY" ${HOSTFS}/usr/local/sbin/backup_control_plane.sh) - helm template kubezero/kubezero-addons --version 0.2.0 --include-crds --namespace kube-system --kube-version $KUBE_VERSION --name-template addons \ + helm template kubezero/kubezero-addons --version 0.2.1 --include-crds --namespace kube-system --kube-version $KUBE_VERSION --name-template addons \ --set clusterBackup.enabled=true \ --set clusterBackup.repository="${_repo##*=}" \ --set clusterBackup.password="$(cat ${HOSTFS}/etc/kubernetes/clusterBackup.passphrase)" \ @@ -234,7 +234,7 @@ elif [[ "$1" =~ "^(bootstrap|recover|join)$" ]]; then # addons yq eval '.addons // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml - helm template kubezero/kubezero-addons --version 0.2.0 --include-crds --namespace kube-system --name-template addons \ + helm template kubezero/kubezero-addons --version 0.2.1 --include-crds --namespace kube-system --name-template addons \ -f _values.yaml --kube-version $KUBE_VERSION | kubectl apply -f - $LOG fi