--- apiVersion: helm.toolkit.fluxcd.io/v2beta1 kind: HelmRelease metadata: name: kube-prometheus-stack namespace: monitoring spec: interval: 5m chart: spec: # renovate: registryUrl=https://prometheus-community.github.io/helm-charts chart: kube-prometheus-stack version: 20.0.1 sourceRef: kind: HelmRepository name: prometheus-community-charts namespace: flux-system interval: 5m timeout: 20m values: server: resources: requests: memory: 1500Mi cpu: 200m limits: memory: 2000Mi prometheusOperator: createCustomResource: true configReloaderCpu: 200m alertmanager: ingress: enabled: true pathType: Prefix ingressClassName: "nginx" annotations: nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify" nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}" # traefik.ingress.kubernetes.io/router.entrypoints: "websecure" # traefik.ingress.kubernetes.io/router.middlewares: networking-forward-auth@kubernetescrd hosts: ["alert-manager.${SECRET_CLUSTER_DOMAIN}"] tls: - hosts: - "alert-manager.${SECRET_CLUSTER_DOMAIN}" config: global: resolve_timeout: 5m receivers: - name: "null" - name: "pushover" pushover_configs: - user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY} token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN} route: receiver: "pushover" routes: - match: alertname: Watchdog receiver: "null" - receiver: "pushover" inhibit_rules: - source_match: severity: "critical" target_match: severity: "warning" # Apply inhibition if the alertname is the same. equal: ["alertname", "namespace"] alertmanagerSpec: storage: volumeClaimTemplate: spec: storageClassName: rook-ceph-block resources: requests: storage: 10Gi nodeExporter: serviceMonitor: relabelings: - action: replace regex: (.*) replacement: $1 sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: kubernetes_node kubelet: serviceMonitor: metricRelabelings: - action: replace sourceLabels: - node targetLabel: instance grafana: adminPassword: ${SECRET_KUBE_PROMETHEUS_STACK_GRAFANA_ADMIN_PASSWORD} dashboards: default: kubernetes-custom: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/homelab-temparatures.json datasource: Prometheus # Ref: https://grafana.com/grafana/dashboards/2842 ceph-cluster: gnetId: 2842 revision: 14 datasource: Prometheus # Ref: https://grafana.com/grafana/dashboards/5336 ceph-osd: gnetId: 5336 revision: 5 datasource: Prometheus # Ref: https://grafana.com/grafana/dashboards/5342 ceph-pools: gnetId: 5342 revision: 5 datasource: Prometheus # Ref: https://grafana.com/grafana/dashboards/11315 flux-cluster: url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/grafana/dashboards/cluster.json datasource: Prometheus flux-control-plane: url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/grafana/dashboards/control-plane.json datasource: Prometheus home-assistant: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/home-assistant.json datasource: Prometheus homelab-temperatures: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/homelab-temperatures.json datasource: Prometheus truenas: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/truenas.json datasource: Prometheus lidarr: url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/lidarr.json datasource: Prometheus radarr: url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/radarr.json datasource: Prometheus sonarr: url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/sonarr.json datasource: Prometheus terra: url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/terra.json datasource: Prometheus deploymentStrategy: type: Recreate persistence: enabled: false env: GF_EXPLORE_ENABLED: true GF_DISABLE_SANITIZE_HTML: true GF_PANELS_DISABLE_SANITIZE_HTML: true plugins: - natel-discrete-panel - pr0ps-trackmap-panel - grafana-piechart-panel - vonage-status-panel - grafana-worldmap-panel - grafana-clock-panel - camptocamp-prometheus-alertmanager-datasource dashboardProviders: dashboardproviders.yaml: apiVersion: 1 providers: - name: "default" orgId: 1 folder: "" type: file disableDeletion: false editable: true options: path: /var/lib/grafana/dashboards/default sidecar: datasources: enabled: true defaultDatasourceEnabled: false dashboards: enabled: true searchNamespace: ALL additionalDataSources: - name: Prometheus type: prometheus access: proxy url: http://thanos-query:9090/ isDefault: true - name: Alertmanager type: camptocamp-prometheus-alertmanager-datasource # type: alertmanager access: proxy url: http://alertmanager-operated:9093/ grafana.ini: server: root_url: https://grafana.${SECRET_CLUSTER_DOMAIN} paths: data: /var/lib/grafana/data logs: /var/log/grafana plugins: /var/lib/grafana/plugins provisioning: /etc/grafana/provisioning analytics: check_for_updates: true log: mode: console grafana_net: url: https://grafana.net smtp: enabled: false date_formats: default_timezone: Europe/Paris feature_toggles: enable: ngalert ingress: enabled: true pathType: Prefix ingressClassName: "nginx" # annotations: # traefik.ingress.kubernetes.io/router.entrypoints: "websecure" hosts: ["grafana.${SECRET_CLUSTER_DOMAIN}"] tls: - hosts: - "grafana.${SECRET_CLUSTER_DOMAIN}" kubeEtcd: enabled: false kubeControllerManager: enabled: false kubeScheduler: enabled: false kubeProxy: enabled: false prometheus: ingress: enabled: true pathType: Prefix ingressClassName: "nginx" annotations: nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify" nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}" # traefik.ingress.kubernetes.io/router.entrypoints: "websecure" # traefik.ingress.kubernetes.io/router.middlewares: networking-forward-auth@kubernetescrd hosts: ["prometheus.${SECRET_CLUSTER_DOMAIN}"] tls: - hosts: - "prometheus.${SECRET_CLUSTER_DOMAIN}" prometheusSpec: replicas: 2 replicaExternalLabelName: "replica" ruleSelector: {} ruleNamespaceSelector: {} ruleSelectorNilUsesHelmValues: false serviceMonitorSelector: {} serviceMonitorNamespaceSelector: {} serviceMonitorSelectorNilUsesHelmValues: false podMonitorSelector: {} podMonitorNamespaceSelector: {} podMonitorSelectorNilUsesHelmValues: false retention: 6h enableAdminAPI: true walCompression: true storageSpec: volumeClaimTemplate: spec: storageClassName: rook-ceph-block resources: requests: storage: 10Gi additionalScrapeConfigs: - job_name: "opnsense" scrape_interval: 60s metrics_path: "/metrics" static_configs: - targets: ["${LOCAL_LAN_OPNSENSE}:9273"] labels: app: "opnsense" - job_name: "truenas" scrape_interval: 60s metrics_path: "/metrics" static_configs: - targets: ["${LOCAL_LAN_TRUENAS}:9273"] labels: app: "truenas" # - job_name: "truenas-remote" # scrape_interval: 60s # metrics_path: "/metrics" # static_configs: # - targets: ["${LOCAL_LAN_TRUENAS_REMOTE}:9273"] # labels: # app: "truenas-remote" # Example scrape config for probing ingresses via the Blackbox Exporter. # # The relabeling allows the actual ingress scrape endpoint to be configured # via the following annotations: # # * `prometheus.io/probe`: Only probe ingresses that have a value of `true` - job_name: "kubernetes-ingresses" metrics_path: /probe scrape_interval: 60s params: module: [http_2xx] kubernetes_sd_configs: - role: ingress relabel_configs: - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [ __meta_kubernetes_ingress_scheme, __address__, __meta_kubernetes_ingress_path, ] regex: (.+);(.+);(.+) replacement: ${1}://${2}${3} target_label: __param_target - target_label: __address__ replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_ingress_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_ingress_name] target_label: kubernetes_name - job_name: "kubernetes-services-http" metrics_path: /probe scrape_interval: 60s params: module: [http_2xx] kubernetes_sd_configs: - role: service relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_protocol] action: keep regex: http - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name - job_name: "kubernetes-services-tcp" metrics_path: /probe scrape_interval: 60s params: module: [tcp_connect] kubernetes_sd_configs: - role: service relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_protocol] action: keep regex: tcp - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: blackbox-exporter-prometheus-blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: - key: app operator: In values: - prometheus topologyKey: "kubernetes.io/hostname" thanos: image: quay.io/thanos/thanos:v0.23.1 objectStorageConfig: name: thanos-objstore-secret key: objstore.yml thanosService: enabled: true