Files
auricom-home-cluster/cluster/apps/monitoring/loki-stack/prometheus-rule.yaml
2021-08-02 01:34:57 +02:00

110 lines
4.9 KiB
YAML

---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: loki.rules
namespace: monitoring
spec:
groups:
- name: loki.rules
rules:
- alert: LokiRequestErrors
annotations:
message: "{{ $labels.job }} {{ $labels.route }} is experiencing {{ $value | humanizePercentage }} errors."
expr: |
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route)
/
sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
> 10
for: 15m
labels:
severity: critical
- alert: LokiRequestPanics
annotations:
message: "{{ $labels.job }} is experiencing {{ $value | humanizePercentage }} increase of panics."
expr: |
sum(increase(loki_panic_total[10m])) by (namespace, job)
> 0
labels:
severity: critical
- alert: LokiRequestLatency
annotations:
message: "{{ $labels.job }} {{ $labels.route }} is experiencing {{ $value }}s 99th percentile latency."
expr: |
namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*"}
> 1
for: 15m
labels:
severity: critical
- expr: |
histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job))
record: job:loki_request_duration_seconds:99quantile
- expr: |
histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job))
record: job:loki_request_duration_seconds:50quantile
- expr: |
sum(rate(loki_request_duration_seconds_sum[1m])) by (job)
/
sum(rate(loki_request_duration_seconds_count[1m])) by (job)
record: job:loki_request_duration_seconds:avg
- expr: |
sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job)
record: job:loki_request_duration_seconds_bucket:sum_rate
- expr: |
sum(rate(loki_request_duration_seconds_sum[1m])) by (job)
record: job:loki_request_duration_seconds_sum:sum_rate
- expr: |
sum(rate(loki_request_duration_seconds_count[1m])) by (job)
record: job:loki_request_duration_seconds_count:sum_rate
- expr: |
histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job, route))
record: job_route:loki_request_duration_seconds:99quantile
- expr: |
histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job, route))
record: job_route:loki_request_duration_seconds:50quantile
- expr: |
sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)
/
sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)
record: job_route:loki_request_duration_seconds:avg
- expr: |
sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, job, route)
record: job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: |
sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)
record: job_route:loki_request_duration_seconds_sum:sum_rate
- expr: |
sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)
record: job_route:loki_request_duration_seconds_count:sum_rate
- expr: |
histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, namespace, job, route))
record: namespace_job_route:loki_request_duration_seconds:99quantile
- expr: |
histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, namespace, job, route))
record: namespace_job_route:loki_request_duration_seconds:50quantile
- expr: |
sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
/
sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds:avg
- expr: |
sum(rate(loki_request_duration_seconds_bucket[1m]))
by (le, namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
- expr: |
sum(rate(loki_request_duration_seconds_sum[1m]))
by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds_sum:sum_rate
- expr: |
sum(rate(loki_request_duration_seconds_count[1m]))
by (namespace, job, route)
record: namespace_job_route:loki_request_duration_seconds_count:sum_rate