♻️ intel-device-plugin

This commit is contained in:
auricom
2023-08-27 19:26:06 +02:00
parent a4c46ba5fa
commit 6a063c062e
11 changed files with 116 additions and 118 deletions

View File

@@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: intel-device-plugin-operator
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: intel-device-plugins-operator
version: 0.27.1
sourceRef:
kind: HelmRepository
name: intel
namespace: flux-system
maxHistory: 2
install:
crds: CreateReplace
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: CreateReplace
remediation:
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: node-feature-discovery
namespace: kube-system

View File

@@ -51,20 +51,13 @@ spec:
targetLabel: node targetLabel: node
securityContext: securityContext:
privileged: true privileged: true
affinity: nodeSelector:
nodeAffinity: intel.feature.node.kubernetes.io/gpu: "true"
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: feature.node.kubernetes.io/custom-intel-gpu
operator: In
values:
- "true"
resources: resources:
requests: requests:
gpu.intel.com/i915: 1 gpu.intel.com/i915_monitoring: 1
cpu: 15m cpu: 100m
memory: 105Mi memory: 100Mi
limits: limits:
gpu.intel.com/i915: 1 gpu.intel.com/i915_monitoring: 1
memory: 105Mi memory: 500Mi

View File

@@ -0,0 +1,34 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: intel-device-plugin-gpu
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: intel-device-plugins-gpu
version: 0.27.1
sourceRef:
kind: HelmRepository
name: intel
namespace: flux-system
maxHistory: 2
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: intel-device-plugin-operator
namespace: kube-system
values:
name: intel-device-plugin-gpu
sharedDevNum: 4
nodeFeatureRule: true

View File

@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kube-system
resources:
- ./helmrelease.yaml

View File

@@ -3,48 +3,52 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1 apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization kind: Kustomization
metadata: metadata:
name: cluster-apps-intel-gpu-plugin name: cluster-apps-intel-device-plugin
namespace: flux-system namespace: flux-system
labels: labels:
substitution.flux.home.arpa/enabled: "true" substitution.flux.home.arpa/enabled: "true"
spec: spec:
dependsOn: path: ./kubernetes/apps/kube-system/intel-device-plugin/app
- name: cluster-apps-node-feature-discovery
path: ./kubernetes/apps/kube-system/intel-gpu/plugin
prune: true prune: true
sourceRef: sourceRef:
kind: GitRepository kind: GitRepository
name: home-ops-kubernetes name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: intel-gpu-plugin
namespace: kube-system
interval: 30m interval: 30m
retryInterval: 1m retryInterval: 1m
timeout: 3m timeout: 5m
--- ---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json # yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1 apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization kind: Kustomization
metadata: metadata:
name: cluster-apps-intel-gpu-exporter name: cluster-apps-intel-device-plugin-gpu
namespace: flux-system namespace: flux-system
labels: labels:
substitution.flux.home.arpa/enabled: "true" substitution.flux.home.arpa/enabled: "true"
spec: spec:
dependsOn: path: ./kubernetes/apps/kube-system/intel-device-plugin/gpu
- name: cluster-apps-intel-gpu-plugin
path: ./kubernetes/apps/kube-system/intel-gpu/exporter
prune: true prune: true
sourceRef: sourceRef:
kind: GitRepository kind: GitRepository
name: home-ops-kubernetes name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: intel-gpu-exporter
namespace: kube-system
interval: 30m interval: 30m
retryInterval: 1m retryInterval: 1m
timeout: 3m timeout: 5m
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-intel-device-plugin-exporter
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
path: ./kubernetes/apps/kube-system/intel-device-plugin/exporter
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
interval: 30m
retryInterval: 1m
timeout: 5m

View File

@@ -1,83 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: &app intel-gpu-plugin
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 1.5.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 2
install:
createNamespace: true
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
values:
controller:
type: daemonset
strategy: RollingUpdate
image:
repository: docker.io/intel/intel-gpu-plugin
tag: 0.27.1
pullPolicy: IfNotPresent
args:
- -shared-dev-num
- "4"
service:
main:
enabled: false
# TODO(intel-gpu-plugin): Write probes to check for something to tell if it's working
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
persistence:
devfs:
enabled: true
type: hostPath
hostPath: /dev/dri
hostPathType: Directory
readOnly: true
sysfs:
enabled: true
type: hostPath
hostPath: /sys/class/drm
hostPathType: Directory
readOnly: true
kubeletsockets:
enabled: true
type: hostPath
hostPathType: Directory
hostPath: /var/lib/kubelet/device-plugins
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: feature.node.kubernetes.io/custom-intel-gpu
operator: In
values:
- "true"
resources:
requests:
cpu: 15m
memory: 105Mi
limits:
memory: 105Mi

View File

@@ -9,7 +9,7 @@ resources:
- ./cilium/ks.yaml - ./cilium/ks.yaml
- ./descheduler/ks.yaml - ./descheduler/ks.yaml
- ./external-secrets/ks.yaml - ./external-secrets/ks.yaml
- ./intel-gpu/ks.yaml - ./intel-device-plugin/ks.yaml
- ./kubelet-csr-approver/ks.yaml - ./kubelet-csr-approver/ks.yaml
- ./metrics-server/ks.yaml - ./metrics-server/ks.yaml
- ./node-feature-discovery/ks.yaml - ./node-feature-discovery/ks.yaml

View File

@@ -0,0 +1,10 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrepository_v1beta2.json
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: intel
namespace: flux-system
spec:
interval: 2h
url: https://intel.github.io/helm-charts

View File

@@ -21,6 +21,7 @@ resources:
- ./grafana.yaml - ./grafana.yaml
- ./hajimari.yaml - ./hajimari.yaml
- ./ingress-nginx.yaml - ./ingress-nginx.yaml
- ./intel.yaml
- ./jetstack.yaml - ./jetstack.yaml
- ./kyverno.yaml - ./kyverno.yaml
- ./metrics-server.yaml - ./metrics-server.yaml