2022-05-04 12:24:14 +00:00
|
|
|
{{- if .Values.awsNeuron.enabled }}
|
|
|
|
apiVersion: apps/v1
|
|
|
|
kind: DaemonSet
|
|
|
|
metadata:
|
|
|
|
name: neuron-device-plugin
|
|
|
|
namespace: kube-system
|
|
|
|
spec:
|
|
|
|
selector:
|
|
|
|
matchLabels:
|
|
|
|
name: neuron-device-plugin-ds
|
|
|
|
updateStrategy:
|
|
|
|
type: RollingUpdate
|
|
|
|
template:
|
|
|
|
metadata:
|
2023-08-20 16:46:05 +00:00
|
|
|
annotations:
|
|
|
|
scheduler.alpha.kubernetes.io/critical-pod: ""
|
2022-05-04 12:24:14 +00:00
|
|
|
labels:
|
|
|
|
name: neuron-device-plugin-ds
|
|
|
|
spec:
|
|
|
|
serviceAccount: neuron-device-plugin
|
|
|
|
tolerations:
|
2023-08-20 16:46:05 +00:00
|
|
|
- key: CriticalAddonsOnly
|
|
|
|
operator: Exists
|
2022-05-04 12:24:14 +00:00
|
|
|
- key: aws.amazon.com/neuron
|
|
|
|
operator: Exists
|
|
|
|
effect: NoSchedule
|
2022-05-10 19:52:42 +00:00
|
|
|
- key: kubezero-workergroup
|
|
|
|
effect: NoSchedule
|
|
|
|
operator: Exists
|
2022-05-04 12:24:14 +00:00
|
|
|
# Mark this pod as a critical add-on; when enabled, the critical add-on
|
|
|
|
# scheduler reserves resources for critical add-on pods so that they can
|
|
|
|
# be rescheduled after a failure.
|
|
|
|
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
|
|
|
|
priorityClassName: "system-node-critical"
|
|
|
|
affinity:
|
|
|
|
nodeAffinity:
|
|
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
|
|
nodeSelectorTerms:
|
|
|
|
- matchExpressions:
|
|
|
|
- key: "node.kubernetes.io/instance-type"
|
|
|
|
operator: In
|
|
|
|
values:
|
|
|
|
- inf1.xlarge
|
|
|
|
- inf1.2xlarge
|
|
|
|
- inf1.6xlarge
|
|
|
|
- inf1.24xlarge
|
2023-08-20 16:46:05 +00:00
|
|
|
- inf2.xlarge
|
|
|
|
- inf2.4xlarge
|
|
|
|
- inf2.8xlarge
|
|
|
|
- inf2.24xlarge
|
|
|
|
- inf2.48xlarge
|
|
|
|
- trn1.2xlarge
|
|
|
|
- trn1.32xlarge
|
|
|
|
- trn1n.32xlarge
|
2022-05-04 12:24:14 +00:00
|
|
|
containers:
|
|
|
|
- image: "{{ .Values.awsNeuron.image.name }}:{{ .Values.awsNeuron.image.tag }}"
|
|
|
|
imagePullPolicy: IfNotPresent
|
|
|
|
name: neuron-device-plugin
|
|
|
|
env:
|
|
|
|
- name: KUBECONFIG
|
|
|
|
value: /etc/kubernetes/kubelet.conf
|
|
|
|
- name: NODE_NAME
|
|
|
|
valueFrom:
|
|
|
|
fieldRef:
|
|
|
|
fieldPath: spec.nodeName
|
|
|
|
securityContext:
|
|
|
|
allowPrivilegeEscalation: false
|
|
|
|
capabilities:
|
|
|
|
drop: ["ALL"]
|
|
|
|
volumeMounts:
|
|
|
|
- name: device-plugin
|
|
|
|
mountPath: /var/lib/kubelet/device-plugins
|
|
|
|
- name: infa-map
|
|
|
|
mountPath: /run
|
|
|
|
volumes:
|
|
|
|
- name: device-plugin
|
|
|
|
hostPath:
|
|
|
|
path: /var/lib/kubelet/device-plugins
|
|
|
|
- name: infa-map
|
|
|
|
hostPath:
|
|
|
|
path: /run
|
|
|
|
{{- end }}
|