mirror of
https://github.com/auricom/home-cluster.git
synced 2025-09-17 18:24:14 +02:00
382 lines
12 KiB
YAML
382 lines
12 KiB
YAML
---
|
|
apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
|
kind: HelmRelease
|
|
metadata:
|
|
name: kube-prometheus-stack
|
|
namespace: monitoring
|
|
spec:
|
|
interval: 15m
|
|
chart:
|
|
spec:
|
|
chart: kube-prometheus-stack
|
|
version: 39.13.3
|
|
sourceRef:
|
|
kind: HelmRepository
|
|
name: prometheus-community-charts
|
|
namespace: flux-system
|
|
interval: 5m
|
|
install:
|
|
createNamespace: true
|
|
remediation:
|
|
retries: 5
|
|
upgrade:
|
|
remediation:
|
|
retries: 5
|
|
values:
|
|
###
|
|
### Component values
|
|
###
|
|
kubeApiServer:
|
|
enabled: true
|
|
kubeControllerManager:
|
|
enabled: false
|
|
|
|
kubeEtcd:
|
|
enabled: false
|
|
|
|
kubelet:
|
|
enabled: true
|
|
serviceMonitor:
|
|
metricRelabelings:
|
|
- action: replace
|
|
sourceLabels:
|
|
- node
|
|
targetLabel: instance
|
|
|
|
kubeProxy:
|
|
enabled: false
|
|
|
|
kubeScheduler:
|
|
enabled: false
|
|
|
|
kubeStateMetrics:
|
|
enabled: true
|
|
kube-state-metrics:
|
|
metricLabelsAllowlist:
|
|
- "persistentvolumeclaims=[*]"
|
|
prometheus:
|
|
monitor:
|
|
enabled: true
|
|
relabelings:
|
|
- action: replace
|
|
regex: (.*)
|
|
replacement: $1
|
|
sourceLabels:
|
|
- __meta_kubernetes_pod_node_name
|
|
targetLabel: kubernetes_node
|
|
resources:
|
|
requests:
|
|
cpu: 15m
|
|
memory: 127M
|
|
limits:
|
|
memory: 153M
|
|
|
|
grafana:
|
|
enabled: false
|
|
forceDeployDashboards: true
|
|
|
|
nodeExporter:
|
|
enabled: true
|
|
|
|
prometheus-node-exporter:
|
|
resources:
|
|
requests:
|
|
cpu: 23m
|
|
memory: 64M
|
|
limits:
|
|
memory: 64M
|
|
prometheus:
|
|
monitor:
|
|
enabled: true
|
|
relabelings:
|
|
- action: replace
|
|
regex: (.*)
|
|
replacement: $1
|
|
sourceLabels:
|
|
- __meta_kubernetes_pod_node_name
|
|
targetLabel: kubernetes_node
|
|
|
|
###
|
|
### Prometheus operator values
|
|
###
|
|
prometheusOperator:
|
|
prometheusConfigReloader:
|
|
resources:
|
|
requests:
|
|
cpu: 150m
|
|
memory: 50Mi
|
|
limits:
|
|
cpu: 300m
|
|
memory: 50Mi
|
|
|
|
###
|
|
### Prometheus instance values
|
|
###
|
|
prometheus:
|
|
ingress:
|
|
enabled: true
|
|
pathType: Prefix
|
|
ingressClassName: "nginx"
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/auth-url: "http://authelia.default.svc.cluster.local/api/verify"
|
|
nginx.ingress.kubernetes.io/auth-signin: "https://auth.${SECRET_CLUSTER_DOMAIN}"
|
|
hosts: ["prometheus.${SECRET_CLUSTER_DOMAIN}"]
|
|
tls:
|
|
- hosts:
|
|
- "prometheus.${SECRET_CLUSTER_DOMAIN}"
|
|
prometheusSpec:
|
|
resources:
|
|
requests:
|
|
memory: 2000Mi
|
|
cpu: 400m
|
|
limits:
|
|
memory: 6000Mi
|
|
replicas: 1
|
|
replicaExternalLabelName: "replica"
|
|
podAntiAffinity: hard
|
|
ruleSelector: {}
|
|
ruleNamespaceSelector: {}
|
|
ruleSelectorNilUsesHelmValues: false
|
|
serviceMonitorSelector: {}
|
|
serviceMonitorNamespaceSelector: {}
|
|
serviceMonitorSelectorNilUsesHelmValues: false
|
|
podMonitorSelector: {}
|
|
podMonitorNamespaceSelector: {}
|
|
podMonitorSelectorNilUsesHelmValues: false
|
|
probeSelectorNilUsesHelmValues: false
|
|
retention: 14d
|
|
retentionSize: "45GB"
|
|
enableAdminAPI: true
|
|
walCompression: true
|
|
storageSpec:
|
|
volumeClaimTemplate:
|
|
spec:
|
|
storageClassName: rook-ceph-block
|
|
resources:
|
|
requests:
|
|
storage: 50Gi
|
|
thanos:
|
|
image: quay.io/thanos/thanos:v0.28.0
|
|
version: v0.28.0
|
|
additionalScrapeConfigs:
|
|
- job_name: "opnsense"
|
|
scrape_interval: 60s
|
|
metrics_path: "/metrics"
|
|
static_configs:
|
|
- targets: ["${LOCAL_LAN_OPNSENSE}:9273"]
|
|
labels:
|
|
app: "opnsense"
|
|
- job_name: "truenas"
|
|
scrape_interval: 60s
|
|
metrics_path: "/metrics"
|
|
static_configs:
|
|
- targets: ["${LOCAL_LAN_TRUENAS}:9273"]
|
|
labels:
|
|
app: "truenas"
|
|
- job_name: "truenas-remote"
|
|
scrape_interval: 60s
|
|
metrics_path: "/metrics"
|
|
static_configs:
|
|
- targets: ["${LOCAL_LAN_TRUENAS_REMOTE}:9273"]
|
|
labels:
|
|
app: "truenas-remote"
|
|
# Example scrape config for probing ingresses via the Blackbox Exporter.
|
|
#
|
|
# The relabeling allows the actual ingress scrape endpoint to be configured
|
|
# via the following annotations:
|
|
#
|
|
# * `prometheus.io/probe`: Only probe ingresses that have a value of `true`
|
|
- job_name: "kubernetes-ingresses"
|
|
metrics_path: /probe
|
|
scrape_interval: 60s
|
|
params:
|
|
module: [http_2xx]
|
|
kubernetes_sd_configs:
|
|
- role: ingress
|
|
relabel_configs:
|
|
- source_labels:
|
|
[__meta_kubernetes_ingress_annotation_prometheus_io_probe]
|
|
action: keep
|
|
regex: true
|
|
- source_labels:
|
|
[
|
|
__meta_kubernetes_ingress_scheme,
|
|
__address__,
|
|
__meta_kubernetes_ingress_path,
|
|
]
|
|
regex: (.+);(.+);(.+)
|
|
replacement: ${1}://${2}${3}
|
|
target_label: __param_target
|
|
- target_label: __address__
|
|
replacement: blackbox-exporter:9115
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_ingress_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_ingress_name]
|
|
target_label: kubernetes_name
|
|
- job_name: "kubernetes-services-http"
|
|
metrics_path: /probe
|
|
scrape_interval: 60s
|
|
params:
|
|
module: [http_2xx]
|
|
kubernetes_sd_configs:
|
|
- role: service
|
|
relabel_configs:
|
|
- source_labels:
|
|
[__meta_kubernetes_service_annotation_prometheus_io_probe]
|
|
action: keep
|
|
regex: true
|
|
- source_labels:
|
|
[__meta_kubernetes_service_annotation_prometheus_io_protocol]
|
|
action: keep
|
|
regex: http
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
- target_label: __address__
|
|
replacement: blackbox-exporter:9115
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_service_name]
|
|
target_label: kubernetes_name
|
|
- job_name: "kubernetes-services-tcp"
|
|
metrics_path: /probe
|
|
scrape_interval: 60s
|
|
params:
|
|
module: [tcp_connect]
|
|
kubernetes_sd_configs:
|
|
- role: service
|
|
relabel_configs:
|
|
- source_labels:
|
|
[__meta_kubernetes_service_annotation_prometheus_io_probe]
|
|
action: keep
|
|
regex: true
|
|
- source_labels:
|
|
[__meta_kubernetes_service_annotation_prometheus_io_protocol]
|
|
action: keep
|
|
regex: tcp
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
- target_label: __address__
|
|
replacement: blackbox-exporter:9115
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_service_name]
|
|
target_label: kubernetes_name
|
|
thanosService:
|
|
enabled: true
|
|
thanosServiceMonitor:
|
|
enabled: true
|
|
thanosIngress:
|
|
enabled: true
|
|
pathType: Prefix
|
|
ingressClassName: "nginx"
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
|
hosts:
|
|
- &host "thanos-sidecar.${SECRET_CLUSTER_DOMAIN}"
|
|
tls:
|
|
- hosts:
|
|
- *host
|
|
|
|
alertmanager:
|
|
config:
|
|
global:
|
|
resolve_timeout: 5m
|
|
receivers:
|
|
- name: "null"
|
|
- name: "pushover"
|
|
pushover_configs:
|
|
- user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY}
|
|
token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN}
|
|
send_resolved: true
|
|
html: true
|
|
priority: |-
|
|
{{ if eq .Status "firing" }}1{{ else }}0{{ end }}
|
|
url_title: View in Alert Manager
|
|
title: |-
|
|
[{{ .Status | toUpper -}}
|
|
{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
|
|
] {{ .CommonLabels.alertname }}
|
|
message: |-
|
|
{{- range .Alerts }}
|
|
{{- if ne .Labels.severity "" }}
|
|
<b>Severity:</b> <i>{{ .Labels.severity }}</i>
|
|
{{- else }}
|
|
<b>Severity:</b> <i>N/A</i>
|
|
{{- end }}
|
|
{{- if ne .Annotations.description "" }}
|
|
<b>Description:</b> <i>{{ .Annotations.description }}</i>
|
|
{{- else if ne .Annotations.summary "" }}
|
|
<b>Summary:</b> <i>{{ .Annotations.summary }}</i>
|
|
{{- else if ne .Annotations.message "" }}
|
|
<b>Message:</b> <i>{{ .Annotations.message }}</i>
|
|
{{- else }}
|
|
<b>Description:</b> <i>N/A</i>
|
|
{{- end }}
|
|
{{- if gt (len .Labels.SortedPairs) 0 }}
|
|
<b>Details:</b>
|
|
{{- range .Labels.SortedPairs }}
|
|
• <b>{{ .Name }}:</b> <i>{{ .Value }}</i>
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- end }}
|
|
route:
|
|
receiver: "pushover"
|
|
routes:
|
|
- receiver: "null"
|
|
matchers:
|
|
- alertname =~ "InfoInhibitor|Watchdog|RebootScheduled"
|
|
- receiver: "pushover"
|
|
matchers:
|
|
- severity = "critical"
|
|
continue: true
|
|
inhibit_rules:
|
|
- source_matchers:
|
|
- severity = "critical"
|
|
target_matchers:
|
|
- severity = "warning"
|
|
equal: ["alertname", "namespace"]
|
|
alertmanagerSpec:
|
|
replicas: 1
|
|
podAntiAffinity: hard
|
|
storage:
|
|
volumeClaimTemplate:
|
|
spec:
|
|
storageClassName: rook-ceph-block
|
|
resources:
|
|
requests:
|
|
storage: 1Gi
|
|
ingress:
|
|
enabled: true
|
|
pathType: Prefix
|
|
ingressClassName: "nginx"
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/auth-url: "http://authelia.default.svc.cluster.local/api/verify"
|
|
nginx.ingress.kubernetes.io/auth-signin: "https://auth.${SECRET_CLUSTER_DOMAIN}"
|
|
hosts: ["alert-manager.${SECRET_CLUSTER_DOMAIN}"]
|
|
tls:
|
|
- hosts:
|
|
- "alert-manager.${SECRET_CLUSTER_DOMAIN}"
|
|
prometheus:
|
|
monitor:
|
|
enabled: true
|
|
relabelings:
|
|
- action: replace
|
|
regex: (.*)
|
|
replacement: $1
|
|
sourceLabels:
|
|
- __meta_kubernetes_pod_node_name
|
|
targetLabel: kubernetes_node
|