--- apiVersion: helm.toolkit.fluxcd.io/v2beta1 kind: HelmRelease metadata: name: kube-prometheus-stack namespace: monitoring spec: interval: 5m chart: spec: # renovate: registryUrl=https://prometheus-community.github.io/helm-charts chart: kube-prometheus-stack version: 35.0.3 sourceRef: kind: HelmRepository name: prometheus-community-charts namespace: flux-system interval: 5m install: createNamespace: true remediation: retries: 5 upgrade: remediation: retries: 5 values: alertmanager: config: global: resolve_timeout: 5m receivers: - name: "null" - name: "pushover" pushover_configs: - user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY} token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN} send_resolved: true html: true priority: |- {{ if eq .Status "firing" }}1{{ else }}0{{ end }} url_title: View in Alert Manager title: |- [{{ .Status | toUpper -}} {{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}} ] {{ .CommonLabels.alertname }} message: |- {{- range .Alerts }} {{- if ne .Labels.severity "" }} Severity: {{ .Labels.severity }} {{- else }} Severity: N/A {{- end }} {{- if ne .Annotations.description "" }} Description: {{ .Annotations.description }} {{- else if ne .Annotations.summary "" }} Summary: {{ .Annotations.summary }} {{- else if ne .Annotations.message "" }} Message: {{ .Annotations.message }} {{- else }} Description: N/A {{- end }} {{- if gt (len .Labels.SortedPairs) 0 }} Details: {{- range .Labels.SortedPairs }} • {{ .Name }}: {{ .Value }} {{- end }} {{- end }} {{- end }} route: receiver: "pushover" routes: - receiver: "null" matchers: - alertname =~ "InfoInhibitor|Watchdog|RebootScheduled" - receiver: "pushover" matchers: - severity = "critical" continue: true inhibit_rules: - source_matchers: - severity = "critical" target_matchers: - severity = "warning" equal: ["alertname", "namespace"] alertmanagerSpec: replicas: 2 podAntiAffinity: hard storage: volumeClaimTemplate: spec: storageClassName: rook-ceph-block resources: requests: storage: 10Gi ingress: enabled: true pathType: Prefix ingressClassName: "nginx" annotations: nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify" nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}" hosts: ["alert-manager.${SECRET_CLUSTER_DOMAIN}"] tls: - hosts: - "alert-manager.${SECRET_CLUSTER_DOMAIN}" prometheus: monitor: enabled: true relabelings: - action: replace regex: (.*) replacement: $1 sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: kubernetes_node prometheusOperator: createCustomResource: true prometheusConfigReloader: resources: requests: cpu: 150m memory: 50Mi limits: cpu: 300m memory: 50Mi nodeExporter: enabled: true serviceMonitor: relabelings: - action: replace regex: (.*) replacement: $1 sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: kubernetes_node kubelet: enabled: true serviceMonitor: metricRelabelings: - action: replace sourceLabels: - node targetLabel: instance grafana: adminPassword: ${SECRET_KUBE_PROMETHEUS_STACK_GRAFANA_ADMIN_PASSWORD} dashboards: default: kubernetes-custom: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/homelab-temparatures.json datasource: Prometheus # Ref: https://grafana.com/grafana/dashboards/2842 ceph-cluster: gnetId: 2842 revision: 14 datasource: Prometheus # Ref: https://grafana.com/grafana/dashboards/5336 ceph-osd: gnetId: 5336 revision: 5 datasource: Prometheus # Ref: https://grafana.com/grafana/dashboards/5342 ceph-pools: gnetId: 5342 revision: 5 datasource: Prometheus # Ref: https://grafana.com/grafana/dashboards/11315 flux-cluster: url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/grafana/dashboards/cluster.json datasource: Prometheus flux-control-plane: url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/grafana/dashboards/control-plane.json datasource: Prometheus home-assistant: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/home-assistant.json datasource: Prometheus homelab-temperatures: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/homelab-temperatures.json datasource: Prometheus truenas: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/truenas.json datasource: Prometheus lidarr: url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/lidarr.json datasource: Prometheus radarr: url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/radarr.json datasource: Prometheus sonarr: url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/sonarr.json datasource: Prometheus deploymentStrategy: type: Recreate persistence: enabled: false env: GF_EXPLORE_ENABLED: true GF_DISABLE_SANITIZE_HTML: true GF_PANELS_DISABLE_SANITIZE_HTML: true plugins: - natel-discrete-panel - pr0ps-trackmap-panel - grafana-piechart-panel - vonage-status-panel - grafana-worldmap-panel - grafana-clock-panel - camptocamp-prometheus-alertmanager-datasource dashboardProviders: dashboardproviders.yaml: apiVersion: 1 providers: - name: "default" orgId: 1 folder: "" type: file disableDeletion: false editable: true options: path: /var/lib/grafana/dashboards/default sidecar: datasources: enabled: true defaultDatasourceEnabled: false dashboards: enabled: true searchNamespace: ALL additionalDataSources: - name: Prometheus type: prometheus access: proxy url: http://thanos-query:9090/ isDefault: true - name: Alertmanager type: camptocamp-prometheus-alertmanager-datasource # type: alertmanager access: proxy url: http://alertmanager-operated:9093/ grafana.ini: server: root_url: https://grafana.${SECRET_CLUSTER_DOMAIN} paths: data: /var/lib/grafana/data logs: /var/log/grafana plugins: /var/lib/grafana/plugins provisioning: /etc/grafana/provisioning analytics: check_for_updates: true log: mode: console grafana_net: url: https://grafana.net smtp: enabled: false date_formats: default_timezone: Europe/Paris feature_toggles: enable: ngalert ingress: enabled: true pathType: Prefix ingressClassName: "nginx" # annotations: # traefik.ingress.kubernetes.io/router.entrypoints: "websecure" hosts: ["grafana.${SECRET_CLUSTER_DOMAIN}"] tls: - hosts: - "grafana.${SECRET_CLUSTER_DOMAIN}" kubeEtcd: enabled: false kubeControllerManager: enabled: false kubeScheduler: enabled: false kubeProxy: enabled: false prometheus: ingress: enabled: true pathType: Prefix ingressClassName: "nginx" annotations: nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify" nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}" hosts: ["prometheus.${SECRET_CLUSTER_DOMAIN}"] tls: - hosts: - "prometheus.${SECRET_CLUSTER_DOMAIN}" prometheusSpec: resources: requests: memory: 2000Mi cpu: 400m limits: memory: 6000Mi replicas: 2 replicaExternalLabelName: "replica" podAntiAffinity: hard ruleSelector: {} ruleNamespaceSelector: {} ruleSelectorNilUsesHelmValues: false serviceMonitorSelector: {} serviceMonitorNamespaceSelector: {} serviceMonitorSelectorNilUsesHelmValues: false podMonitorSelector: {} podMonitorNamespaceSelector: {} podMonitorSelectorNilUsesHelmValues: false retention: 2d retentionSize: "6GB" enableAdminAPI: true walCompression: true storageSpec: volumeClaimTemplate: spec: storageClassName: rook-ceph-block resources: requests: storage: 10Gi thanos: image: quay.io/thanos/thanos:v0.26.0 version: v0.25.2 objectStorageConfig: name: thanos-objstore-secret key: objstore.yml additionalScrapeConfigs: - job_name: "opnsense" scrape_interval: 60s metrics_path: "/metrics" static_configs: - targets: ["${LOCAL_LAN_OPNSENSE}:9273"] labels: app: "opnsense" - job_name: "truenas" scrape_interval: 60s metrics_path: "/metrics" static_configs: - targets: ["${LOCAL_LAN_TRUENAS}:9273"] labels: app: "truenas" - job_name: "truenas-remote" scrape_interval: 60s metrics_path: "/metrics" static_configs: - targets: ["${LOCAL_LAN_TRUENAS_REMOTE}:9273"] labels: app: "truenas-remote" # Example scrape config for probing ingresses via the Blackbox Exporter. # # The relabeling allows the actual ingress scrape endpoint to be configured # via the following annotations: # # * `prometheus.io/probe`: Only probe ingresses that have a value of `true` - job_name: "kubernetes-ingresses" metrics_path: /probe scrape_interval: 60s params: module: [http_2xx] kubernetes_sd_configs: - role: ingress relabel_configs: - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [ __meta_kubernetes_ingress_scheme, __address__, __meta_kubernetes_ingress_path, ] regex: (.+);(.+);(.+) replacement: ${1}://${2}${3} target_label: __param_target - target_label: __address__ replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_ingress_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_ingress_name] target_label: kubernetes_name - job_name: "kubernetes-services-http" metrics_path: /probe scrape_interval: 60s params: module: [http_2xx] kubernetes_sd_configs: - role: service relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_protocol] action: keep regex: http - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name - job_name: "kubernetes-services-tcp" metrics_path: /probe scrape_interval: 60s params: module: [tcp_connect] kubernetes_sd_configs: - role: service relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_protocol] action: keep regex: tcp - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name thanosService: enabled: true thanosServiceMonitor: enabled: true thanosIngress: enabled: true pathType: Prefix ingressClassName: "nginx" annotations: nginx.ingress.kubernetes.io/ssl-redirect: "true" nginx.ingress.kubernetes.io/backend-protocol: "GRPC" hosts: - &host "thanos-sidecar.${SECRET_CLUSTER_DOMAIN}" tls: - hosts: - *host