--- apiVersion: helm.toolkit.fluxcd.io/v2beta1 kind: HelmRelease metadata: name: kube-prometheus-stack namespace: monitoring spec: interval: 15m chart: spec: chart: kube-prometheus-stack version: 39.13.3 sourceRef: kind: HelmRepository name: prometheus-community-charts namespace: flux-system interval: 5m install: createNamespace: true remediation: retries: 5 upgrade: remediation: retries: 5 values: ### ### Component values ### kubeApiServer: enabled: true kubeControllerManager: enabled: false kubeEtcd: enabled: false kubelet: enabled: true serviceMonitor: metricRelabelings: - action: replace sourceLabels: - node targetLabel: instance kubeProxy: enabled: false kubeScheduler: enabled: false kubeStateMetrics: enabled: true kube-state-metrics: metricLabelsAllowlist: - "persistentvolumeclaims=[*]" prometheus: monitor: enabled: true relabelings: - action: replace regex: (.*) replacement: $1 sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: kubernetes_node resources: requests: cpu: 15m memory: 127M limits: memory: 153M grafana: enabled: false forceDeployDashboards: true nodeExporter: enabled: true prometheus-node-exporter: resources: requests: cpu: 23m memory: 64M limits: memory: 64M prometheus: monitor: enabled: true relabelings: - action: replace regex: (.*) replacement: $1 sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: kubernetes_node ### ### Prometheus operator values ### prometheusOperator: prometheusConfigReloader: resources: requests: cpu: 150m memory: 50Mi limits: cpu: 300m memory: 50Mi ### ### Prometheus instance values ### prometheus: ingress: enabled: true pathType: Prefix ingressClassName: "nginx" annotations: nginx.ingress.kubernetes.io/auth-url: "http://authelia.default.svc.cluster.local/api/verify" nginx.ingress.kubernetes.io/auth-signin: "https://auth.${SECRET_CLUSTER_DOMAIN}" hosts: ["prometheus.${SECRET_CLUSTER_DOMAIN}"] tls: - hosts: - "prometheus.${SECRET_CLUSTER_DOMAIN}" prometheusSpec: resources: requests: memory: 2000Mi cpu: 400m limits: memory: 6000Mi replicas: 1 replicaExternalLabelName: "replica" podAntiAffinity: hard ruleSelector: {} ruleNamespaceSelector: {} ruleSelectorNilUsesHelmValues: false serviceMonitorSelector: {} serviceMonitorNamespaceSelector: {} serviceMonitorSelectorNilUsesHelmValues: false podMonitorSelector: {} podMonitorNamespaceSelector: {} podMonitorSelectorNilUsesHelmValues: false probeSelectorNilUsesHelmValues: false retention: 14d retentionSize: "45GB" enableAdminAPI: true walCompression: true storageSpec: volumeClaimTemplate: spec: storageClassName: rook-ceph-block resources: requests: storage: 50Gi thanos: image: quay.io/thanos/thanos:v0.28.0 version: v0.28.0 additionalScrapeConfigs: - job_name: "opnsense" scrape_interval: 60s metrics_path: "/metrics" static_configs: - targets: ["${LOCAL_LAN_OPNSENSE}:9273"] labels: app: "opnsense" - job_name: "truenas" scrape_interval: 60s metrics_path: "/metrics" static_configs: - targets: ["${LOCAL_LAN_TRUENAS}:9273"] labels: app: "truenas" - job_name: "truenas-remote" scrape_interval: 60s metrics_path: "/metrics" static_configs: - targets: ["${LOCAL_LAN_TRUENAS_REMOTE}:9273"] labels: app: "truenas-remote" # Example scrape config for probing ingresses via the Blackbox Exporter. # # The relabeling allows the actual ingress scrape endpoint to be configured # via the following annotations: # # * `prometheus.io/probe`: Only probe ingresses that have a value of `true` - job_name: "kubernetes-ingresses" metrics_path: /probe scrape_interval: 60s params: module: [http_2xx] kubernetes_sd_configs: - role: ingress relabel_configs: - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [ __meta_kubernetes_ingress_scheme, __address__, __meta_kubernetes_ingress_path, ] regex: (.+);(.+);(.+) replacement: ${1}://${2}${3} target_label: __param_target - target_label: __address__ replacement: blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_ingress_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_ingress_name] target_label: kubernetes_name - job_name: "kubernetes-services-http" metrics_path: /probe scrape_interval: 60s params: module: [http_2xx] kubernetes_sd_configs: - role: service relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_protocol] action: keep regex: http - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name - job_name: "kubernetes-services-tcp" metrics_path: /probe scrape_interval: 60s params: module: [tcp_connect] kubernetes_sd_configs: - role: service relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_protocol] action: keep regex: tcp - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: blackbox-exporter:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name thanosService: enabled: true thanosServiceMonitor: enabled: true thanosIngress: enabled: true pathType: Prefix ingressClassName: "nginx" annotations: nginx.ingress.kubernetes.io/ssl-redirect: "true" nginx.ingress.kubernetes.io/backend-protocol: "GRPC" hosts: - &host "thanos-sidecar.${SECRET_CLUSTER_DOMAIN}" tls: - hosts: - *host alertmanager: config: global: resolve_timeout: 5m receivers: - name: "null" - name: "pushover" pushover_configs: - user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY} token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN} send_resolved: true html: true priority: |- {{ if eq .Status "firing" }}1{{ else }}0{{ end }} url_title: View in Alert Manager title: |- [{{ .Status | toUpper -}} {{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}} ] {{ .CommonLabels.alertname }} message: |- {{- range .Alerts }} {{- if ne .Labels.severity "" }} Severity: {{ .Labels.severity }} {{- else }} Severity: N/A {{- end }} {{- if ne .Annotations.description "" }} Description: {{ .Annotations.description }} {{- else if ne .Annotations.summary "" }} Summary: {{ .Annotations.summary }} {{- else if ne .Annotations.message "" }} Message: {{ .Annotations.message }} {{- else }} Description: N/A {{- end }} {{- if gt (len .Labels.SortedPairs) 0 }} Details: {{- range .Labels.SortedPairs }} • {{ .Name }}: {{ .Value }} {{- end }} {{- end }} {{- end }} route: receiver: "pushover" routes: - receiver: "null" matchers: - alertname =~ "InfoInhibitor|Watchdog|RebootScheduled" - receiver: "pushover" matchers: - severity = "critical" continue: true inhibit_rules: - source_matchers: - severity = "critical" target_matchers: - severity = "warning" equal: ["alertname", "namespace"] alertmanagerSpec: replicas: 1 podAntiAffinity: hard storage: volumeClaimTemplate: spec: storageClassName: rook-ceph-block resources: requests: storage: 1Gi ingress: enabled: true pathType: Prefix ingressClassName: "nginx" annotations: nginx.ingress.kubernetes.io/auth-url: "http://authelia.default.svc.cluster.local/api/verify" nginx.ingress.kubernetes.io/auth-signin: "https://auth.${SECRET_CLUSTER_DOMAIN}" hosts: ["alert-manager.${SECRET_CLUSTER_DOMAIN}"] tls: - hosts: - "alert-manager.${SECRET_CLUSTER_DOMAIN}" prometheus: monitor: enabled: true relabelings: - action: replace regex: (.*) replacement: $1 sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: kubernetes_node