mirror of
https://github.com/auricom/home-cluster.git
synced 2025-09-17 18:24:14 +02:00
73 lines
2.5 KiB
YAML
73 lines
2.5 KiB
YAML
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
labels:
|
|
app: traefik
|
|
name: traefik.rules
|
|
namespace: networking
|
|
spec:
|
|
groups:
|
|
- name: traefik.rules
|
|
rules:
|
|
- alert: TraefikAbsent
|
|
annotations:
|
|
summary: "Traefik has disappeared from Prometheus service discovery."
|
|
description: "Ingresses will be down until the Traefik reverse proxy is back up."
|
|
expr: |
|
|
absent(up{job="traefik"})
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: TraefikConfigError
|
|
annotations:
|
|
summary: "Traefik config error."
|
|
description:
|
|
"Traefik has failed to load the config file. Check Traefik
|
|
logs for exact parsing error."
|
|
expr: |
|
|
traefik_config_last_reload_failure{job="traefik"} == 1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: TraefikHighHttp4xxErrorRateService
|
|
annotations:
|
|
summary: "Traefik has a high HTTP 4xx error rate."
|
|
description:
|
|
"Traefik is reporting {{ $value | humanizePercentage }} of 4xx
|
|
errors on {{ $labels.exported_service }}"
|
|
expr: |
|
|
sum(rate(traefik_service_requests_total{code=~"4.*"}[1m])) by (exported_service)
|
|
/
|
|
sum(rate(traefik_service_requests_total[1m])) by (exported_service)
|
|
> .10
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: TraefikHighHttp5xxErrorRateService
|
|
annotations:
|
|
summary: "Traefik has a high HTTP 5xx error rate."
|
|
description:
|
|
"Traefik is reporting {{ $value | humanizePercentage }} of 5xx
|
|
errors on {{ $labels.exported_service }}"
|
|
expr: |
|
|
sum(rate(traefik_service_requests_total{code=~"5.*"}[1m])) by (exported_service)
|
|
/
|
|
sum(rate(traefik_service_requests_total[1m])) by (exported_service)
|
|
> .10
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: TraefikTooManyRequest
|
|
annotations:
|
|
summary: "Traefik has too many open connections"
|
|
description:
|
|
"Traefik is reporting {{ $value }} of open connections on entrypoint
|
|
{{ $labels.entrypoint }}"
|
|
expr: |
|
|
avg(traefik_entrypoint_open_connections{job="traefik"})
|
|
> 5
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|