Files
auricom-home-cluster/cluster/apps/storage/k10/monitoring/prometheus-rule.yaml
auricom 98450e4359 🚀 logs
2022-09-15 05:42:56 +02:00

21 lines
529 B
YAML

---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: kasten-io
namespace: kasten-io
spec:
groups:
- name: kasten.rules
rules:
- alert: JobsFailing
annotations:
summary: More than 1 failed K10 jobs occurred for the {{ $labels.policy }} policy in the last 10 minutes
expr: increase(catalog_actions_count{status="failed"}[10m]) > 0
for: 1m
labels:
severity: critical