--- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: thanos.rules namespace: monitoring spec: groups: - name: thanos.rules rules: - alert: ThanosCompactionHalted expr: | thanos_compactor_halted == 1 for: 0m labels: severity: critical annotations: summary: "Thanos compaction halted on {{ $labels.instance }}" description: "Thanos compaction has failed to run and is now halted.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ThanosCompactBucketOperationFailure expr: | rate(thanos_objstore_bucket_operation_failures_total[1m]) > 0 for: 0m labels: severity: critical annotations: summary: "Thanos compact bucket operation failure on {{ $labels.instance }}" description: "Thanos compaction has failing storage operations\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ThanosCompactNotRun expr: | (time() - thanos_objstore_bucket_last_successful_upload_time) > 24*60*60 for: 0m labels: severity: critical annotations: summary: "Thanos compact not run on {{ $labels.instance }}" description: "Thanos compaction has not run in 24 hours.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"