♻️ intel-device-plugin

This commit is contained in:
auricom
2023-08-27 19:26:06 +02:00
parent a4c46ba5fa
commit 6a063c062e
11 changed files with 116 additions and 118 deletions

View File

@@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: intel-device-plugin-operator
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: intel-device-plugins-operator
version: 0.27.1
sourceRef:
kind: HelmRepository
name: intel
namespace: flux-system
maxHistory: 2
install:
crds: CreateReplace
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: CreateReplace
remediation:
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: node-feature-discovery
namespace: kube-system

View File

@@ -51,20 +51,13 @@ spec:
targetLabel: node
securityContext:
privileged: true
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: feature.node.kubernetes.io/custom-intel-gpu
operator: In
values:
- "true"
nodeSelector:
intel.feature.node.kubernetes.io/gpu: "true"
resources:
requests:
gpu.intel.com/i915: 1
cpu: 15m
memory: 105Mi
gpu.intel.com/i915_monitoring: 1
cpu: 100m
memory: 100Mi
limits:
gpu.intel.com/i915: 1
memory: 105Mi
gpu.intel.com/i915_monitoring: 1
memory: 500Mi

View File

@@ -0,0 +1,34 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: intel-device-plugin-gpu
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: intel-device-plugins-gpu
version: 0.27.1
sourceRef:
kind: HelmRepository
name: intel
namespace: flux-system
maxHistory: 2
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: intel-device-plugin-operator
namespace: kube-system
values:
name: intel-device-plugin-gpu
sharedDevNum: 4
nodeFeatureRule: true

View File

@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kube-system
resources:
- ./helmrelease.yaml

View File

@@ -3,48 +3,52 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-intel-gpu-plugin
name: cluster-apps-intel-device-plugin
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
dependsOn:
- name: cluster-apps-node-feature-discovery
path: ./kubernetes/apps/kube-system/intel-gpu/plugin
path: ./kubernetes/apps/kube-system/intel-device-plugin/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: intel-gpu-plugin
namespace: kube-system
interval: 30m
retryInterval: 1m
timeout: 3m
timeout: 5m
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-intel-gpu-exporter
name: cluster-apps-intel-device-plugin-gpu
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
dependsOn:
- name: cluster-apps-intel-gpu-plugin
path: ./kubernetes/apps/kube-system/intel-gpu/exporter
path: ./kubernetes/apps/kube-system/intel-device-plugin/gpu
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: intel-gpu-exporter
namespace: kube-system
interval: 30m
retryInterval: 1m
timeout: 3m
timeout: 5m
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-intel-device-plugin-exporter
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
path: ./kubernetes/apps/kube-system/intel-device-plugin/exporter
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
interval: 30m
retryInterval: 1m
timeout: 5m

View File

@@ -1,83 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: &app intel-gpu-plugin
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 1.5.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 2
install:
createNamespace: true
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
values:
controller:
type: daemonset
strategy: RollingUpdate
image:
repository: docker.io/intel/intel-gpu-plugin
tag: 0.27.1
pullPolicy: IfNotPresent
args:
- -shared-dev-num
- "4"
service:
main:
enabled: false
# TODO(intel-gpu-plugin): Write probes to check for something to tell if it's working
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
persistence:
devfs:
enabled: true
type: hostPath
hostPath: /dev/dri
hostPathType: Directory
readOnly: true
sysfs:
enabled: true
type: hostPath
hostPath: /sys/class/drm
hostPathType: Directory
readOnly: true
kubeletsockets:
enabled: true
type: hostPath
hostPathType: Directory
hostPath: /var/lib/kubelet/device-plugins
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: feature.node.kubernetes.io/custom-intel-gpu
operator: In
values:
- "true"
resources:
requests:
cpu: 15m
memory: 105Mi
limits:
memory: 105Mi

View File

@@ -9,7 +9,7 @@ resources:
- ./cilium/ks.yaml
- ./descheduler/ks.yaml
- ./external-secrets/ks.yaml
- ./intel-gpu/ks.yaml
- ./intel-device-plugin/ks.yaml
- ./kubelet-csr-approver/ks.yaml
- ./metrics-server/ks.yaml
- ./node-feature-discovery/ks.yaml

View File

@@ -0,0 +1,10 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrepository_v1beta2.json
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: intel
namespace: flux-system
spec:
interval: 2h
url: https://intel.github.io/helm-charts

View File

@@ -21,6 +21,7 @@ resources:
- ./grafana.yaml
- ./hajimari.yaml
- ./ingress-nginx.yaml
- ./intel.yaml
- ./jetstack.yaml
- ./kyverno.yaml
- ./metrics-server.yaml