--- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: labels: prometheus: k8s role: alert-rules name: kasten-io namespace: kasten-io spec: groups: - name: kasten.rules rules: - alert: JobsFailing annotations: summary: More than 1 failed K10 jobs occurred for the {{ $labels.policy }} policy in the last 10 minutes expr: increase(catalog_actions_count{status="failed"}[10m]) > 0 for: 1m labels: severity: critical