Files
auricom-home-cluster/kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml
2025-08-19 03:16:10 +02:00

223 lines
6.3 KiB
YAML

---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-prometheus-stack
spec:
interval: 30m
chart:
spec:
chart: kube-prometheus-stack
version: 75.6.0
sourceRef:
kind: HelmRepository
name: prometheus-community
interval: 5m
install:
crds: Skip
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: Skip
remediation:
strategy: rollback
retries: 3
dependsOn:
- name: kube-prometheus-stack-crds
namespace: observability
- name: rook-ceph-cluster
namespace: rook-ceph
values:
crds:
enabled: false
###
### Component values
###
kubeApiServer:
enabled: true
serviceMonitor:
metricRelabelings:
- action: replace
sourceLabels:
- node
targetLabel: instance
kubeProxy:
enabled: false
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: false
kubeScheduler:
enabled: false
kubeStateMetrics:
metricLabelsAllowlist:
- persistentvolumeclaims=[*]
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
grafana:
enabled: false
forceDeployDashboards: true
prometheus-node-exporter:
resources:
requests:
cpu: 23m
memory: 64M
limits:
memory: 64M
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
###
### Prometheus operator values
###
prometheusOperator:
prometheusConfigReloader:
resources:
requests:
cpu: 100m
memory: 50Mi
limits:
cpu: 300m
memory: 100Mi
###
### Prometheus instance values
###
prometheus:
route:
main:
enabled: true
hostnames: ["prometheus.${SECRET_EXTERNAL_DOMAIN}"]
parentRefs:
- name: internal
namespace: network
sectionName: https
prometheusSpec:
replicas: 2
replicaExternalLabelName: replica
scrapeInterval: 1m # Must match interval in Grafana Helm chart
podMonitorSelector: &selector
matchLabels: null
probeSelector: *selector
ruleSelector: *selector
scrapeConfigSelector: *selector
serviceMonitorSelector: *selector
retention: 14d
retentionSize: 50GB
enableAdminAPI: true
walCompression: true
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: rook-ceph-block
resources:
requests:
storage: 60Gi
alertmanager:
config:
global:
resolve_timeout: 5m
receivers:
- name: "null"
- name: pushover
pushover_configs:
- user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY}
token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN}
send_resolved: true
html: true
priority: |-
{{ if eq .Status "firing" }}1{{ else }}0{{ end }}
url_title: View in Alert Manager
title: |-
[{{ .Status | toUpper -}}
{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
] {{ .CommonLabels.alertname }}
message: |-
{{- range .Alerts }}
{{- if ne .Labels.severity "" }}
<b>Severity:</b> <i>{{ .Labels.severity }}</i>
{{- else }}
<b>Severity:</b> <i>N/A</i>
{{- end }}
{{- if ne .Annotations.description "" }}
<b>Description:</b> <i>{{ .Annotations.description }}</i>
{{- else if ne .Annotations.summary "" }}
<b>Summary:</b> <i>{{ .Annotations.summary }}</i>
{{- else if ne .Annotations.message "" }}
<b>Message:</b> <i>{{ .Annotations.message }}</i>
{{- else }}
<b>Description:</b> <i>N/A</i>
{{- end }}
{{- if gt (len .Labels.SortedPairs) 0 }}
<b>Details:</b>
{{- range .Labels.SortedPairs }}
• <b>{{ .Name }}:</b> <i>{{ .Value }}</i>
{{- end }}
{{- end }}
{{- end }}
route:
receiver: pushover
routes:
- receiver: "null"
matchers:
- alertname =~ "InfoInhibitor|Watchdog|RebootScheduled"
- receiver: pushover
matchers:
- severity = "critical"
continue: true
inhibit_rules:
- source_matchers:
- severity = "critical"
target_matchers:
- severity = "warning"
equal: [alertname, namespace]
alertmanagerSpec:
replicas: 1
podAntiAffinity: hard
storage:
volumeClaimTemplate:
spec:
storageClassName: rook-ceph-block
resources:
requests:
storage: 1Gi
route:
main:
enabled: true
hostnames: ["alertmanager.${SECRET_EXTERNAL_DOMAIN}"]
parentRefs:
- name: internal
namespace: network
sectionName: https
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node