mirror of
https://github.com/auricom/home-cluster.git
synced 2025-10-01 16:05:55 +02:00
feat: add kured
This commit is contained in:
@@ -6,7 +6,6 @@ resources:
|
|||||||
- cert-manager-webhook-ovh.yaml
|
- cert-manager-webhook-ovh.yaml
|
||||||
- coredns-charts.yaml
|
- coredns-charts.yaml
|
||||||
- drone-charts.yaml
|
- drone-charts.yaml
|
||||||
- fairwinds-charts.yaml
|
|
||||||
- falco-security-charts.yaml
|
- falco-security-charts.yaml
|
||||||
- gitea-charts.yaml
|
- gitea-charts.yaml
|
||||||
- grafana-loki-charts.yaml
|
- grafana-loki-charts.yaml
|
||||||
@@ -25,3 +24,4 @@ resources:
|
|||||||
- stakater-charts.yaml
|
- stakater-charts.yaml
|
||||||
- twuni-charts.yaml
|
- twuni-charts.yaml
|
||||||
- vernemq-charts.yaml
|
- vernemq-charts.yaml
|
||||||
|
- weaveworks-kured-charts.yaml
|
||||||
|
@@ -2,9 +2,9 @@
|
|||||||
apiVersion: source.toolkit.fluxcd.io/v1beta1
|
apiVersion: source.toolkit.fluxcd.io/v1beta1
|
||||||
kind: HelmRepository
|
kind: HelmRepository
|
||||||
metadata:
|
metadata:
|
||||||
name: fairwinds-charts
|
name: weaveworks-kured-charts
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
spec:
|
spec:
|
||||||
interval: 10m
|
interval: 10m
|
||||||
url: https://charts.fairwinds.com/stable
|
url: https://weaveworks.github.io/kured
|
||||||
timeout: 3m
|
timeout: 3m
|
31
cluster/core/infrastructure/kured/helm-release.yaml
Normal file
31
cluster/core/infrastructure/kured/helm-release.yaml
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
---
|
||||||
|
apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
||||||
|
kind: HelmRelease
|
||||||
|
metadata:
|
||||||
|
name: kured
|
||||||
|
namespace: kube-system
|
||||||
|
spec:
|
||||||
|
interval: 5m
|
||||||
|
chart:
|
||||||
|
spec:
|
||||||
|
# renovate: registryUrl=https://weaveworks.github.io/kured
|
||||||
|
chart: kured
|
||||||
|
version: 2.4.3
|
||||||
|
sourceRef:
|
||||||
|
kind: HelmRepository
|
||||||
|
name: weaveworks-kured-charts
|
||||||
|
namespace: flux-system
|
||||||
|
interval: 5m
|
||||||
|
values:
|
||||||
|
updateStrategy: RollingUpdate
|
||||||
|
configuration:
|
||||||
|
rebootDays:
|
||||||
|
- we
|
||||||
|
startTime: "2:00"
|
||||||
|
endTime: "5:00"
|
||||||
|
timeZone: "Europe/Paris"
|
||||||
|
tolerations:
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: "Exists"
|
||||||
|
metrics:
|
||||||
|
create: true
|
5
cluster/core/infrastructure/kured/kustomization.yaml
Normal file
5
cluster/core/infrastructure/kured/kustomization.yaml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- helm-release.yaml
|
||||||
|
- prometheus-rule.yaml
|
29
cluster/core/infrastructure/kured/prometheus-rule.yaml
Normal file
29
cluster/core/infrastructure/kured/prometheus-rule.yaml
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
prometheus: k8s
|
||||||
|
role: alert-rules
|
||||||
|
name: kured-rules
|
||||||
|
namespace: kube-system
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: kured.rules
|
||||||
|
rules:
|
||||||
|
- alert: RebootRequired
|
||||||
|
annotations:
|
||||||
|
description: Node(s) require a manual reboot
|
||||||
|
summary: Reboot daemon has failed to do so for 24 hours
|
||||||
|
expr: max(kured_reboot_required) != 0
|
||||||
|
for: 24h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: RebootScheduled
|
||||||
|
annotations:
|
||||||
|
description: Node Reboot Scheduled
|
||||||
|
summary: Node {{$labels.node}} has been scheduled to reboot
|
||||||
|
expr: kured_reboot_required > 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
@@ -5,6 +5,7 @@ resources:
|
|||||||
- descheduler
|
- descheduler
|
||||||
- flux
|
- flux
|
||||||
- intel-gpu-plugin
|
- intel-gpu-plugin
|
||||||
|
- kured
|
||||||
- node-feature-discovery
|
- node-feature-discovery
|
||||||
- rook-ceph
|
- rook-ceph
|
||||||
- system-upgrade
|
- system-upgrade
|
||||||
|
Reference in New Issue
Block a user