mirror of
https://github.com/auricom/home-cluster.git
synced 2025-09-17 18:24:14 +02:00
64 lines
2.9 KiB
YAML
64 lines
2.9 KiB
YAML
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: cert-manager.rules
|
|
namespace: cert-manager
|
|
spec:
|
|
groups:
|
|
- name: cert-manager
|
|
rules:
|
|
- alert: CertManagerAbsent
|
|
expr: |
|
|
absent(up{job="cert-manager"})
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: "New certificates will not be able to be minted, and existing
|
|
ones can't be renewed until cert-manager is back."
|
|
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent
|
|
summary: "Cert Manager has dissapeared from Prometheus service discovery."
|
|
- name: certificates
|
|
rules:
|
|
- alert: CertManagerCertExpirySoon
|
|
expr: |
|
|
avg by (exported_namespace, namespace, name) (
|
|
certmanager_certificate_expiration_timestamp_seconds - time())
|
|
< (21 * 24 * 3600)
|
|
for: 1h
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: "The domain that this cert covers will be unavailable after
|
|
{{ $value | humanizeDuration }}. Clients using endpoints that this cert
|
|
protects will start to fail in {{ $value | humanizeDuration }}."
|
|
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon
|
|
summary: "The cert {{ $labels.name }} is {{ $value | humanizeDuration }}
|
|
from expiry, it should have renewed over a week ago."
|
|
- alert: CertManagerCertNotReady
|
|
expr: |
|
|
max by (name, exported_namespace, namespace, condition) (
|
|
certmanager_certificate_ready_status{condition!="True"} == 1)
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: "This certificate has not been ready to serve traffic for at least
|
|
10m. If the cert is being renewed or there is another valid cert, the ingress
|
|
controller _may_ be able to serve that instead."
|
|
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready
|
|
summary: "The cert {{ $labels.name }} is not ready to serve traffic."
|
|
- alert: CertManagerHittingRateLimits
|
|
expr: |
|
|
sum by (host) (rate(certmanager_http_acme_client_request_count{status="429"}[5m]))
|
|
> 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: "Depending on the rate limit, cert-manager may be unable to generate
|
|
certificates for up to a week."
|
|
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits
|
|
summary: "Cert manager hitting LetsEncrypt rate limits."
|