Files
auricom-home-cluster/cluster/monitoring/kube-prometheus-stack.yaml
2021-03-22 01:50:39 +01:00

332 lines
11 KiB
YAML

---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: kube-prometheus-stack
namespace: monitoring
spec:
interval: 5m
chart:
spec:
# renovate: registryUrl=https://prometheus-community.github.io/helm-charts
chart: kube-prometheus-stack
version: 14.1.2
sourceRef:
kind: HelmRepository
name: prometheus-community-charts
namespace: flux-system
interval: 5m
timeout: 20m
values:
server:
resources:
requests:
memory: 1500Mi
cpu: 200m
limits:
memory: 2000Mi
prometheusOperator:
createCustomResource: true
alertmanager:
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/auth-url: "http://authelia.auth.svc.cluster.local/api/verify"
nginx.ingress.kubernetes.io/auth-signin: "https://login.k3s.xpander.ovh/"
hosts: [alert-manager.k3s.xpander.ovh]
tls:
- hosts:
- alert-manager.k3s.xpander.ovh
config:
global:
resolve_timeout: 5m
route:
receiver: "pushover"
routes:
- match:
alertname: Watchdog
receiver: "null"
- receiver: "pushover"
inhibit_rules:
- source_match:
severity: "critical"
target_match:
severity: "warning"
# Apply inhibition if the alertname is the same.
equal: ["alertname", "namespace"]
alertmanagerSpec:
storage:
volumeClaimTemplate:
spec:
storageClassName: longhorn
resources:
requests:
storage: 10Gi
nodeExporter:
serviceMonitor:
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
kubelet:
serviceMonitor:
metricRelabelings:
- action: replace
sourceLabels:
- node
targetLabel: instance
grafana:
dashboards:
default:
kubernetes-custom:
url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/monitoring/grafana-dashboards/kubernetes-custom.json
datasource: Prometheus
longhorn:
url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/monitoring/grafana-dashboards/longhorn.json
datasource: Prometheus
pfsense-dashboard:
url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/monitoring/grafana-dashboards/pfsense.json
datasource: influxdb-pfsense
truenas-dashboard:
url: https://raw.githubusercontent.com/auricom/home-cluster/main/cluster/monitoring/grafana-dashboards/truenas.json
datasource: influxdb-graphite
deploymentStrategy:
type: Recreate
persistence:
enabled: false
env:
GF_EXPLORE_ENABLED: true
GF_DISABLE_SANITIZE_HTML: true
GF_PANELS_DISABLE_SANITIZE_HTML: true
plugins:
- natel-discrete-panel
- pr0ps-trackmap-panel
- grafana-piechart-panel
- vonage-status-panel
- https://github.com/panodata/grafana-map-panel/releases/download/0.9.0/grafana-map-panel-0.9.0.zip;grafana-worldmap-panel-ng
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: "default"
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default
sidecar:
datasources:
enabled: true
defaultDatasourceEnabled: false
dashboards:
enabled: true
searchNamespace: ALL
additionalDataSources:
- name: Prometheus
type: prometheus
access: proxy
url: http://thanos-query-http:10902/
isDefault: true
- name: loki
type: loki
access: proxy
url: http://loki:3100/
- name: influxdb-pfsense
type: influxdb
acces: server
url: http://influxdb:8086/
database: pfsense
user: pfsense
- name: influxdb-rpi-os
type: influxdb
acces: server
url: http://influxdb:8086/
database: rpi-os
user: rpi-os
- name: influxdb-graphite
type: influxdb
database: graphite
acces: server
url: http://influxdb:8086/
- name: influxdb-home_assistant
type: influxdb
acces: server
url: http://influxdb:8086/
database: home_assistant
grafana.ini:
server:
root_url: https://grafana.k3s.xpander.ovh
paths:
data: /var/lib/grafana/data
logs: /var/log/grafana
plugins: /var/lib/grafana/plugins
provisioning: /etc/grafana/provisioning
analytics:
check_for_updates: true
log:
mode: console
grafana_net:
url: https://grafana.net
smtp:
enabled: false
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/auth-url: "http://authelia.auth.svc.cluster.local/api/verify"
nginx.ingress.kubernetes.io/auth-signin: "https://login.k3s.xpander.ovh/"
hosts: [grafana.k3s.xpander.ovh]
tls:
- hosts:
- grafana.k3s.xpander.ovh
kubeEtcd:
enabled: false
kubeControllerManager:
enabled: false
kubeScheduler:
enabled: false
kubeProxy:
enabled: false
prometheus:
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/auth-url: "http://authelia.auth.svc.cluster.local/api/verify"
nginx.ingress.kubernetes.io/auth-signin: "https://login.k3s.xpander.ovh/"
hosts: [prometheus.k3s.xpander.ovh]
tls:
- hosts:
- prometheus.k3s.xpander.ovh
prometheusSpec:
replicas: 2
replicaExternalLabelName: "replica"
ruleSelector: {}
ruleNamespaceSelector: {}
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelector: {}
serviceMonitorNamespaceSelector: {}
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelector: {}
podMonitorNamespaceSelector: {}
podMonitorSelectorNilUsesHelmValues: false
retention: 6h
enableAdminAPI: true
walCompression: true
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: longhorn
resources:
requests:
storage: 10Gi
thanos:
image: quay.io/thanos/thanos:v0.18.0
objectStorageConfig:
name: thanos
key: object-store.yaml
additionalScrapeConfigs:
# Example scrape config for probing ingresses via the Blackbox Exporter.
#
# The relabeling allows the actual ingress scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/probe`: Only probe ingresses that have a value of `true`
- job_name: "kubernetes-ingresses"
metrics_path: /probe
scrape_interval: 60s
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: ingress
relabel_configs:
- source_labels:
[__meta_kubernetes_ingress_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels:
[
__meta_kubernetes_ingress_scheme,
__address__,
__meta_kubernetes_ingress_path,
]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter-prometheus-blackbox-exporter:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
- job_name: "kubernetes-services-http"
metrics_path: /probe
scrape_interval: 60s
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels:
[__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels:
[__meta_kubernetes_service_annotation_prometheus_io_protocol]
action: keep
regex: http
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter-prometheus-blackbox-exporter:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- job_name: "kubernetes-services-tcp"
metrics_path: /probe
scrape_interval: 60s
params:
module: [tcp_connect]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels:
[__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels:
[__meta_kubernetes_service_annotation_prometheus_io_protocol]
action: keep
regex: tcp
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter-prometheus-blackbox-exporter:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
valuesFrom:
- kind: ConfigMap
name: prometheus-stack-helmrelease