Files
auricom-home-cluster/cluster/apps/monitoring/kube-prometheus-stack/helm-release.yaml
2021-06-17 21:22:23 +02:00

389 lines
14 KiB
YAML

---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: kube-prometheus-stack
namespace: monitoring
spec:
interval: 5m
chart:
spec:
# renovate: registryUrl=https://prometheus-community.github.io/helm-charts
chart: kube-prometheus-stack
version: 16.9.1
sourceRef:
kind: HelmRepository
name: prometheus-community-charts
namespace: flux-system
interval: 5m
timeout: 20m
values:
server:
resources:
requests:
memory: 1500Mi
cpu: 200m
limits:
memory: 2000Mi
prometheusOperator:
createCustomResource: true
configReloaderCpu: 200m
alertmanager:
ingress:
enabled: true
pathType: Prefix
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify"
nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}/"
hosts: ["alert-manager.${SECRET_CLUSTER_DOMAIN}"]
tls:
- hosts:
- alert-manager.${SECRET_CLUSTER_DOMAIN}
config:
global:
resolve_timeout: 5m
receivers:
- name: "null"
- name: "pushover"
pushover_configs:
- user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY}
token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN}
route:
receiver: "pushover"
routes:
- match:
alertname: Watchdog
receiver: "null"
- receiver: "pushover"
inhibit_rules:
- source_match:
severity: "critical"
target_match:
severity: "warning"
# Apply inhibition if the alertname is the same.
equal: ["alertname", "namespace"]
alertmanagerSpec:
storage:
volumeClaimTemplate:
spec:
storageClassName: rook-ceph-block
resources:
requests:
storage: 10Gi
nodeExporter:
serviceMonitor:
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
kubelet:
serviceMonitor:
metricRelabelings:
- action: replace
sourceLabels:
- node
targetLabel: instance
grafana:
adminPassword: ${SECRET_KUBE_PROMETHEUS_STACK_GRAFANA_ADMIN_PASSWORD}
dashboards:
default:
kubernetes-custom:
url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/homelab-temparatures.json
datasource: Prometheus
# Ref: https://grafana.com/grafana/dashboards/12175
calico-felix:
gnetId: 12175
revision: 5
datasource: Prometheus
# Ref: https://grafana.com/grafana/dashboards/2842
ceph-cluster:
gnetId: 2842
revision: 14
datasource: Prometheus
# Ref: https://grafana.com/grafana/dashboards/5336
ceph-osd:
gnetId: 5336
revision: 5
datasource: Prometheus
# Ref: https://grafana.com/grafana/dashboards/5342
ceph-pools:
gnetId: 5342
revision: 5
datasource: Prometheus
# Ref: https://grafana.com/grafana/dashboards/11315
flux-cluster:
url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/grafana/dashboards/cluster.json
datasource: Prometheus
flux-control-plane:
url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/grafana/dashboards/control-plane.json
datasource: Prometheus
vernemq:
url: https://raw.githubusercontent.com/vernemq/vernemq/master/metrics_scripts/grafana/VerneMQ%20Node%20Metrics.json
datasource: Prometheus
home-assistant:
url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/home-assistant.json
datasource: Prometheus
truenas:
url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/apps/monitoring/kube-prometheus-stack/grafana-dashboards/truenas.json
datasource: Prometheus
lidarr:
url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/lidarr.json
datasource: Prometheus
radarr:
url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/radarr.json
datasource: Prometheus
sonarr:
url: https://raw.githubusercontent.com/k8s-at-home/grafana-dashboards/main/sonarr.json
datasource: Prometheus
deploymentStrategy:
type: Recreate
persistence:
enabled: false
env:
GF_EXPLORE_ENABLED: true
GF_DISABLE_SANITIZE_HTML: true
GF_PANELS_DISABLE_SANITIZE_HTML: true
plugins:
- natel-discrete-panel
- pr0ps-trackmap-panel
- grafana-piechart-panel
- vonage-status-panel
- grafana-worldmap-panel
- grafana-clock-panel
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: "default"
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default
sidecar:
datasources:
enabled: true
defaultDatasourceEnabled: false
dashboards:
enabled: true
searchNamespace: ALL
additionalDataSources:
- name: Prometheus
type: prometheus
access: proxy
url: http://thanos-query-http:10902/
isDefault: true
grafana.ini:
server:
root_url: https://grafana.${SECRET_CLUSTER_DOMAIN}
paths:
data: /var/lib/grafana/data
logs: /var/log/grafana
plugins: /var/lib/grafana/plugins
provisioning: /etc/grafana/provisioning
analytics:
check_for_updates: true
log:
mode: console
grafana_net:
url: https://grafana.net
smtp:
enabled: false
ingress:
enabled: true
pathType: Prefix
annotations:
kubernetes.io/ingress.class: "nginx"
hosts: ["grafana.${SECRET_CLUSTER_DOMAIN}"]
tls:
- hosts:
- grafana.${SECRET_CLUSTER_DOMAIN}
kubeEtcd:
enabled: false
kubeControllerManager:
enabled: false
kubeScheduler:
enabled: false
kubeProxy:
enabled: false
prometheus:
ingress:
enabled: true
pathType: Prefix
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify"
nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN}/"
hosts: ["prometheus.${SECRET_CLUSTER_DOMAIN}"]
tls:
- hosts:
- prometheus.${SECRET_CLUSTER_DOMAIN}
prometheusSpec:
replicas: 2
replicaExternalLabelName: "replica"
ruleSelector: {}
ruleNamespaceSelector: {}
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelector: {}
serviceMonitorNamespaceSelector: {}
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelector: {}
podMonitorNamespaceSelector: {}
podMonitorSelectorNilUsesHelmValues: false
retention: 6h
enableAdminAPI: true
walCompression: true
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: rook-ceph-block
resources:
requests:
storage: 10Gi
additionalScrapeConfigs:
- job_name: "opnsense"
scrape_interval: 60s
metrics_path: "/metrics"
static_configs:
- targets: ["opnsense.${SECRET_CLUSTER_DOMAIN_ROOT}:9273"]
labels:
app: "opnsense"
- job_name: "hass"
scrape_interval: 60s
metrics_path: "/api/prometheus"
bearer_token: "${SECRET_HASS_TOKEN}"
static_configs:
- targets: ["home-assistant.home.svc.cluster.local:8123"]
labels:
app: "hass"
- job_name: "truenas"
scrape_interval: 60s
metrics_path: "/metrics"
static_configs:
- targets: ["truenas.${SECRET_CLUSTER_DOMAIN_ROOT}:9273"]
labels:
app: "truenas"
- job_name: "truenas-remote"
scrape_interval: 60s
metrics_path: "/metrics"
static_configs:
- targets: ["truenas-remote.${SECRET_CLUSTER_DOMAIN_ROOT}:9273"]
labels:
app: "truenas-remote"
# Example scrape config for probing ingresses via the Blackbox Exporter.
#
# The relabeling allows the actual ingress scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/probe`: Only probe ingresses that have a value of `true`
- job_name: "kubernetes-ingresses"
metrics_path: /probe
scrape_interval: 60s
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: ingress
relabel_configs:
- source_labels:
[__meta_kubernetes_ingress_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels:
[
__meta_kubernetes_ingress_scheme,
__address__,
__meta_kubernetes_ingress_path,
]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter-prometheus-blackbox-exporter:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
- job_name: "kubernetes-services-http"
metrics_path: /probe
scrape_interval: 60s
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels:
[__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels:
[__meta_kubernetes_service_annotation_prometheus_io_protocol]
action: keep
regex: http
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter-prometheus-blackbox-exporter:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- job_name: "kubernetes-services-tcp"
metrics_path: /probe
scrape_interval: 60s
params:
module: [tcp_connect]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels:
[__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels:
[__meta_kubernetes_service_annotation_prometheus_io_protocol]
action: keep
regex: tcp
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter-prometheus-blackbox-exporter:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- prometheus
topologyKey: "kubernetes.io/hostname"
thanos:
image: quay.io/thanos/thanos:v0.21.1
objectStorageConfig:
name: thanos-objstore-secret
key: objstore.yml
thanosService:
enabled: true