From 7c44be33f3ac2ad9f9ebe0c668d55f18eaaec6a9 Mon Sep 17 00:00:00 2001
From: auricom <27022259+auricom@users.noreply.github.com>
Date: Sat, 30 Apr 2022 15:20:37 +0200
Subject: [PATCH] fix: kube-prometheus-stack
---
.../kube-prometheus-stack/helm-release.yaml | 192 +++++++++++-------
1 file changed, 123 insertions(+), 69 deletions(-)
diff --git a/cluster/apps/monitoring/kube-prometheus-stack/helm-release.yaml b/cluster/apps/monitoring/kube-prometheus-stack/helm-release.yaml
index b6045e864..dafd827ef 100644
--- a/cluster/apps/monitoring/kube-prometheus-stack/helm-release.yaml
+++ b/cluster/apps/monitoring/kube-prometheus-stack/helm-release.yaml
@@ -16,8 +16,103 @@ spec:
name: prometheus-community-charts
namespace: flux-system
interval: 5m
- timeout: 20m
+ install:
+ createNamespace: true
+ remediation:
+ retries: 5
+ upgrade:
+ remediation:
+ retries: 5
values:
+ alertmanager:
+ config:
+ global:
+ resolve_timeout: 5m
+ receivers:
+ - name: "null"
+ - name: "pushover"
+ pushover_configs:
+ - user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY}
+ token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN}
+ send_resolved: true
+ html: true
+ priority: |-
+ {{ if eq .Status "firing" }}1{{ else }}0{{ end }}
+ url_title: View in Alert Manager
+ title: |-
+ [{{ .Status | toUpper -}}
+ {{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
+ ] {{ .CommonLabels.alertname }}
+ message: |-
+ {{- range .Alerts }}
+ {{- if ne .Labels.severity "" }}
+ Severity: {{ .Labels.severity }}
+ {{- else }}
+ Severity: N/A
+ {{- end }}
+ {{- if ne .Annotations.description "" }}
+ Description: {{ .Annotations.description }}
+ {{- else if ne .Annotations.summary "" }}
+ Summary: {{ .Annotations.summary }}
+ {{- else if ne .Annotations.message "" }}
+ Message: {{ .Annotations.message }}
+ {{- else }}
+ Description: N/A
+ {{- end }}
+ {{- if gt (len .Labels.SortedPairs) 0 }}
+ Details:
+ {{- range .Labels.SortedPairs }}
+ • {{ .Name }}: {{ .Value }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+ route:
+ receiver: "pushover"
+ routes:
+ - receiver: "null"
+ matchers:
+ - alertname =~ "InfoInhibitor|Watchdog"
+ - receiver: "pushover"
+ matchers:
+ - severity = "critical"
+ continue: true
+ inhibit_rules:
+ - source_matchers:
+ - severity = "critical"
+ target_matchers:
+ - severity = "warning"
+ equal: ["alertname", "namespace"]
+ alertmanagerSpec:
+ replicas: 2
+ podAntiAffinity: hard
+ storage:
+ volumeClaimTemplate:
+ spec:
+ storageClassName: rook-ceph-block
+ resources:
+ requests:
+ storage: 10Gi
+ ingress:
+ enabled: true
+ pathType: Prefix
+ ingressClassName: "nginx"
+ annotations:
+ nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify"
+ nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}"
+ hosts: ["alert-manager.${SECRET_CLUSTER_DOMAIN}"]
+ tls:
+ - hosts:
+ - "alert-manager.${SECRET_CLUSTER_DOMAIN}"
+ prometheus:
+ monitor:
+ enabled: true
+ relabelings:
+ - action: replace
+ regex: (.*)
+ replacement: $1
+ sourceLabels:
+ - __meta_kubernetes_pod_node_name
+ targetLabel: kubernetes_node
prometheusOperator:
createCustomResource: true
prometheusConfigReloader:
@@ -28,55 +123,8 @@ spec:
limits:
cpu: 300m
memory: 50Mi
- alertmanager:
- ingress:
- enabled: true
- pathType: Prefix
- ingressClassName: "nginx"
- annotations:
- nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify"
- nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}"
- # traefik.ingress.kubernetes.io/router.entrypoints: "websecure"
- # traefik.ingress.kubernetes.io/router.middlewares: networking-forward-auth@kubernetescrd
- hosts: ["alert-manager.${SECRET_CLUSTER_DOMAIN}"]
- tls:
- - hosts:
- - "alert-manager.${SECRET_CLUSTER_DOMAIN}"
- config:
- global:
- resolve_timeout: 5m
- receivers:
- - name: "null"
- - name: "pushover"
- pushover_configs:
- - user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY}
- token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN}
- route:
- receiver: "pushover"
- routes:
- - receiver: "null"
- match:
- alertname: InfoInhibitor
- - match:
- alertname: Watchdog
- receiver: "null"
- - receiver: "pushover"
- inhibit_rules:
- - source_match:
- severity: "critical"
- target_match:
- severity: "warning"
- # Apply inhibition if the alertname is the same.
- equal: ["alertname", "namespace"]
- alertmanagerSpec:
- storage:
- volumeClaimTemplate:
- spec:
- storageClassName: rook-ceph-block
- resources:
- requests:
- storage: 10Gi
nodeExporter:
+ enabled: true
serviceMonitor:
relabelings:
- action: replace
@@ -86,6 +134,7 @@ spec:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
kubelet:
+ enabled: true
serviceMonitor:
metricRelabelings:
- action: replace
@@ -231,8 +280,6 @@ spec:
annotations:
nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify"
nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}"
- # traefik.ingress.kubernetes.io/router.entrypoints: "websecure"
- # traefik.ingress.kubernetes.io/router.middlewares: networking-forward-auth@kubernetescrd
hosts: ["prometheus.${SECRET_CLUSTER_DOMAIN}"]
tls:
- hosts:
@@ -240,12 +287,13 @@ spec:
prometheusSpec:
resources:
requests:
- memory: 1500Mi
+ memory: 2000Mi
cpu: 400m
limits:
- memory: 2000Mi
+ memory: 6000Mi
replicas: 2
replicaExternalLabelName: "replica"
+ podAntiAffinity: hard
ruleSelector: {}
ruleNamespaceSelector: {}
ruleSelectorNilUsesHelmValues: false
@@ -255,7 +303,8 @@ spec:
podMonitorSelector: {}
podMonitorNamespaceSelector: {}
podMonitorSelectorNilUsesHelmValues: false
- retention: 6h
+ retention: 2d
+ retentionSize: "6GB"
enableAdminAPI: true
walCompression: true
storageSpec:
@@ -265,6 +314,12 @@ spec:
resources:
requests:
storage: 10Gi
+ thanos:
+ image: quay.io/thanos/thanos:v0.25.2
+ version: v0.25.2
+ objectStorageConfig:
+ name: thanos-objstore-secret
+ key: objstore.yml
additionalScrapeConfigs:
- job_name: "opnsense"
scrape_interval: 60s
@@ -380,20 +435,19 @@ spec:
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- affinity:
- podAntiAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- - labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - prometheus
- topologyKey: "kubernetes.io/hostname"
- thanos:
- image: quay.io/thanos/thanos:v0.25.2
- objectStorageConfig:
- name: thanos-objstore-secret
- key: objstore.yml
thanosService:
enabled: true
+ thanosServiceMonitor:
+ enabled: true
+ thanosIngress:
+ enabled: true
+ pathType: Prefix
+ ingressClassName: "nginx"
+ annotations:
+ nginx.ingress.kubernetes.io/ssl-redirect: "true"
+ nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
+ hosts:
+ - &host "thanos-sidecar.${SECRET_CLUSTER_DOMAIN}"
+ tls:
+ - hosts:
+ - *host