feat: add kured

This commit is contained in:
auricom
2021-04-27 01:21:48 +02:00
parent 58ac3aee01
commit 3d22c6ef81
6 changed files with 69 additions and 3 deletions

View File

@@ -6,7 +6,6 @@ resources:
- cert-manager-webhook-ovh.yaml
- coredns-charts.yaml
- drone-charts.yaml
- fairwinds-charts.yaml
- falco-security-charts.yaml
- gitea-charts.yaml
- grafana-loki-charts.yaml
@@ -25,3 +24,4 @@ resources:
- stakater-charts.yaml
- twuni-charts.yaml
- vernemq-charts.yaml
- weaveworks-kured-charts.yaml

View File

@@ -2,9 +2,9 @@
apiVersion: source.toolkit.fluxcd.io/v1beta1
kind: HelmRepository
metadata:
name: fairwinds-charts
name: weaveworks-kured-charts
namespace: flux-system
spec:
interval: 10m
url: https://charts.fairwinds.com/stable
url: https://weaveworks.github.io/kured
timeout: 3m

View File

@@ -0,0 +1,31 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: kured
namespace: kube-system
spec:
interval: 5m
chart:
spec:
# renovate: registryUrl=https://weaveworks.github.io/kured
chart: kured
version: 2.4.3
sourceRef:
kind: HelmRepository
name: weaveworks-kured-charts
namespace: flux-system
interval: 5m
values:
updateStrategy: RollingUpdate
configuration:
rebootDays:
- we
startTime: "2:00"
endTime: "5:00"
timeZone: "Europe/Paris"
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
metrics:
create: true

View File

@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helm-release.yaml
- prometheus-rule.yaml

View File

@@ -0,0 +1,29 @@
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: kured-rules
namespace: kube-system
spec:
groups:
- name: kured.rules
rules:
- alert: RebootRequired
annotations:
description: Node(s) require a manual reboot
summary: Reboot daemon has failed to do so for 24 hours
expr: max(kured_reboot_required) != 0
for: 24h
labels:
severity: warning
- alert: RebootScheduled
annotations:
description: Node Reboot Scheduled
summary: Node {{$labels.node}} has been scheduled to reboot
expr: kured_reboot_required > 0
for: 5m
labels:
severity: warning

View File

@@ -5,6 +5,7 @@ resources:
- descheduler
- flux
- intel-gpu-plugin
- kured
- node-feature-discovery
- rook-ceph
- system-upgrade