Files
auricom-home-cluster/cluster/apps/kasten-io/k10/monitoring/prometheus-rule.yaml
2022-01-23 00:50:12 +01:00

21 lines
529 B
YAML

---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: kasten-io
namespace: kasten-io
spec:
groups:
- name: kasten.rules
rules:
- alert: JobsFailing
annotations:
summary: More than 1 failed K10 jobs occurred for the {{ $labels.policy }} policy in the last 10 minutes
expr: increase(catalog_actions_count{status="failed"}[10m]) > 0
for: 1m
labels:
severity: critical