Files
auricom-home-cluster/cluster/core/kube-system/kured/prometheus-rule.yaml
2021-07-28 02:40:10 +02:00

30 lines
800 B
YAML

---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: kured-rules
namespace: kube-system
spec:
groups:
- name: kured.rules
rules:
- alert: RebootRequired
annotations:
description: Node(s) require a manual reboot
summary: Reboot daemon has failed to do so for 24 hours
expr: max(kured_reboot_required) != 0
for: 24h
labels:
severity: warning
- alert: RebootScheduled
annotations:
description: Node Reboot Scheduled
summary: Node {{$labels.node}} has been scheduled to reboot
expr: kured_reboot_required > 0
for: 5m
labels:
severity: warning