Files
auricom-home-cluster/cluster/apps/kasten-io/k10/prometheus-rule.yaml
2021-05-26 11:17:58 +02:00

23 lines
597 B
YAML

---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: kasten-io
namespace: kasten-io
spec:
groups:
- name: kasten.rules
rules:
- alert: JobsFailing
annotations:
description: Jobs failure
summary: |-
"{{ $labels.app }} jobs amount of errors for the last 30 mins {{ $value }} for {{ $labels.policy }} policy"
expr: increase(catalog_actions_count{status="failed"}[30m]) > 0
for: 30m
labels:
severity: critical