mirror of
https://github.com/auricom/home-cluster.git
synced 2025-09-17 18:24:14 +02:00
♻️ flux kustomizations
This commit is contained in:
130
kubernetes/apps/monitoring/loki/app/config-map.yaml
Normal file
130
kubernetes/apps/monitoring/loki/app/config-map.yaml
Normal file
@@ -0,0 +1,130 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: loki-alerting-rules
|
||||
namespace: monitoring
|
||||
data:
|
||||
loki-alerting-rules.yaml: |-
|
||||
groups:
|
||||
#
|
||||
# SMART Failures
|
||||
#
|
||||
- name: smart-failure
|
||||
rules:
|
||||
- alert: SmartFailures
|
||||
expr: |
|
||||
sum by (hostname) (count_over_time({hostname=~".+"} | json | _SYSTEMD_UNIT = "smartmontools.service" !~ "(?i)previous self-test completed without error" !~ "(?i)Prefailure" |~ "(?i)(error|fail)"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "SMART has reported failures on host {{ $labels.hostname }}"
|
||||
#
|
||||
# zigbee2mqtt
|
||||
#
|
||||
- name: zigbee2mqtt
|
||||
rules:
|
||||
- alert: ZigbeeUnableToReachMQTT
|
||||
expr: |
|
||||
sum(count_over_time({app="zigbee2mqtt"} |~ "(?i)not connected to mqtt server"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "Zigbee2mqtt is unable to reach MQTT"
|
||||
#
|
||||
# zwavejs2mqtt
|
||||
#
|
||||
- name: zwavejs2mqtt
|
||||
rules:
|
||||
- alert: ZwaveUnableToReachMQTT
|
||||
expr: |
|
||||
sum(count_over_time({app="zwavejs2mqtt"} |~ "(?i)error while connecting mqtt"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "Zwavejs2mqtt is unable to reach MQTT"
|
||||
#
|
||||
# frigate
|
||||
#
|
||||
- name: frigate
|
||||
rules:
|
||||
- alert: FrigateUnableToReachMQTT
|
||||
expr: |
|
||||
sum(count_over_time({app="frigate"} |~ "(?i)unable to connect to mqtt server"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "Frigate is unable to reach MQTT"
|
||||
#
|
||||
# *arr
|
||||
#
|
||||
- name: arr
|
||||
rules:
|
||||
- alert: ArrDatabaseIsLocked
|
||||
expr: |
|
||||
sum by (app) (count_over_time({app=~".*arr"} |~ "(?i)database is locked"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "{{ $labels.app }} is experiencing locked database issues"
|
||||
- alert: ArrDatabaseIsMalformed
|
||||
expr: |
|
||||
sum by (app) (count_over_time({app=~".*arr"} |~ "(?i)database disk image is malformed"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "{{ $labels.app }} is experiencing malformed database disk image issues"
|
||||
#
|
||||
# home-assistant
|
||||
#
|
||||
- name: home-assistant
|
||||
rules:
|
||||
- alert: HomeAssistantUnableToReachPostgresql
|
||||
expr: |
|
||||
sum by (app) (count_over_time({app="home-assistant"} |~ "(?i)error in database connectivity"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "Home Assistant is unable to connect to postgresql"
|
||||
#
|
||||
# valetudo
|
||||
#
|
||||
- name: valetudo
|
||||
rules:
|
||||
- alert: ValetudoUnableToReachMQTT
|
||||
expr: |
|
||||
sum by (hostname) (count_over_time({hostname="valetudo"} |~ "(?i).*error.*mqtt.*"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "Valetudo is unable to connect to mqtt"
|
||||
#
|
||||
# node-red
|
||||
#
|
||||
- name: node-red
|
||||
rules:
|
||||
- alert: NodeRedUnableToReachHomeAssistant
|
||||
expr: |
|
||||
sum by (app) (count_over_time({app="node-red"} |~ "(?i)home assistant.*connecting to undefined"[2m])) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
category: logs
|
||||
annotations:
|
||||
summary: "Node-Red is unable to connect to Home Assistant"
|
189
kubernetes/apps/monitoring/loki/app/helmrelease.yaml
Normal file
189
kubernetes/apps/monitoring/loki/app/helmrelease.yaml
Normal file
@@ -0,0 +1,189 @@
|
||||
---
|
||||
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: loki
|
||||
namespace: monitoring
|
||||
spec:
|
||||
interval: 15m
|
||||
chart:
|
||||
spec:
|
||||
chart: loki
|
||||
version: 3.8.0
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: grafana
|
||||
namespace: flux-system
|
||||
install:
|
||||
createNamespace: true
|
||||
remediation:
|
||||
retries: 3
|
||||
upgrade:
|
||||
remediation:
|
||||
retries: 3
|
||||
values:
|
||||
loki:
|
||||
structuredConfig:
|
||||
auth_enabled: false
|
||||
server:
|
||||
log_level: info
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9095
|
||||
memberlist:
|
||||
join_members: ["loki-memberlist"]
|
||||
limits_config:
|
||||
retention_period: 14d
|
||||
enforce_metric_name: false
|
||||
reject_old_samples: true
|
||||
reject_old_samples_max_age: 168h
|
||||
max_cache_freshness_per_query: 10m
|
||||
split_queries_by_interval: 15m
|
||||
ingestion_rate_mb: 8
|
||||
ingestion_burst_size_mb: 16
|
||||
schema_config:
|
||||
configs:
|
||||
- from: "2021-08-01"
|
||||
store: boltdb-shipper
|
||||
object_store: s3
|
||||
schema: v11
|
||||
index:
|
||||
prefix: loki_index_
|
||||
period: 24h
|
||||
common:
|
||||
path_prefix: /var/loki
|
||||
replication_factor: 3
|
||||
storage:
|
||||
s3:
|
||||
s3: null
|
||||
insecure: true
|
||||
s3forcepathstyle: true
|
||||
ring:
|
||||
kvstore:
|
||||
store: memberlist
|
||||
ruler:
|
||||
enable_api: true
|
||||
enable_alertmanager_v2: true
|
||||
alertmanager_url: http://kube-prometheus-stack-alertmanager:9093
|
||||
storage:
|
||||
type: local
|
||||
local:
|
||||
directory: /rules
|
||||
rule_path: /tmp/scratch
|
||||
ring:
|
||||
kvstore:
|
||||
store: memberlist
|
||||
distributor:
|
||||
ring:
|
||||
kvstore:
|
||||
store: memberlist
|
||||
compactor:
|
||||
working_directory: /var/loki/boltdb-shipper-compactor
|
||||
shared_store: s3
|
||||
compaction_interval: 10m
|
||||
retention_enabled: true
|
||||
retention_delete_delay: 2h
|
||||
retention_delete_worker_count: 150
|
||||
ingester:
|
||||
max_chunk_age: 1h
|
||||
lifecycler:
|
||||
ring:
|
||||
kvstore:
|
||||
store: memberlist
|
||||
analytics:
|
||||
reporting_enabled: false
|
||||
gateway:
|
||||
enabled: true
|
||||
replicas: 3
|
||||
affinity: |
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 1
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
{{- include "loki.gatewaySelectorLabels" . | nindent 12 }}
|
||||
topologyKey: kubernetes.io/hostname
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: "nginx"
|
||||
hosts:
|
||||
- host: &host "loki.${SECRET_CLUSTER_DOMAIN}"
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- *host
|
||||
write:
|
||||
replicas: 3
|
||||
affinity: |
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 1
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
{{- include "loki.writeSelectorLabels" . | nindent 12 }}
|
||||
topologyKey: kubernetes.io/hostname
|
||||
persistence:
|
||||
size: 10Gi
|
||||
storageClass: rook-ceph-block
|
||||
read:
|
||||
replicas: 3
|
||||
affinity: |
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 1
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
{{- include "loki.readSelectorLabels" . | nindent 12 }}
|
||||
topologyKey: kubernetes.io/hostname
|
||||
extraVolumeMounts:
|
||||
- name: loki-rules
|
||||
mountPath: /rules/fake
|
||||
- name: loki-rules-tmp
|
||||
mountPath: /tmp/scratch
|
||||
- name: loki-tmp
|
||||
mountPath: /tmp/loki-tmp
|
||||
extraVolumes:
|
||||
- name: loki-rules
|
||||
emptyDir: {}
|
||||
- name: loki-rules-tmp
|
||||
emptyDir: {}
|
||||
- name: loki-tmp
|
||||
emptyDir: {}
|
||||
persistence:
|
||||
size: 10Gi
|
||||
storageClass: rook-ceph-block
|
||||
monitoring:
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
metricsInstance:
|
||||
enabled: false
|
||||
selfMonitoring:
|
||||
enabled: false
|
||||
grafanaAgent:
|
||||
installOperator: false
|
||||
lokiCanary:
|
||||
enabled: false
|
||||
test:
|
||||
enabled: false
|
||||
valuesFrom:
|
||||
- kind: ConfigMap
|
||||
name: loki-chunks-bucket
|
||||
valuesKey: BUCKET_NAME
|
||||
targetPath: loki.structuredConfig.common.storage.s3.bucketnames
|
||||
- kind: ConfigMap
|
||||
name: loki-chunks-bucket
|
||||
valuesKey: BUCKET_HOST
|
||||
targetPath: loki.structuredConfig.common.storage.s3.endpoint
|
||||
- kind: Secret
|
||||
name: loki-chunks-bucket
|
||||
valuesKey: AWS_ACCESS_KEY_ID
|
||||
targetPath: loki.structuredConfig.common.storage.s3.access_key_id
|
||||
- kind: Secret
|
||||
name: loki-chunks-bucket
|
||||
valuesKey: AWS_SECRET_ACCESS_KEY
|
||||
targetPath: loki.structuredConfig.common.storage.s3.secret_access_key
|
9
kubernetes/apps/monitoring/loki/app/kustomization.yaml
Normal file
9
kubernetes/apps/monitoring/loki/app/kustomization.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
---
|
||||
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: monitoring
|
||||
resources:
|
||||
- ./object-bucket-claim.yaml
|
||||
- ./config-map.yaml
|
||||
- ./helmrelease.yaml
|
11
kubernetes/apps/monitoring/loki/app/object-bucket-claim.yaml
Normal file
11
kubernetes/apps/monitoring/loki/app/object-bucket-claim.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
---
|
||||
apiVersion: objectbucket.io/v1alpha1
|
||||
kind: ObjectBucketClaim
|
||||
metadata:
|
||||
name: loki-chunks-bucket
|
||||
namespace: monitoring
|
||||
spec:
|
||||
bucketName: loki-chunks
|
||||
storageClassName: rook-ceph-bucket
|
||||
additionalConfig:
|
||||
maxSize: "50G"
|
25
kubernetes/apps/monitoring/loki/ks.yaml
Normal file
25
kubernetes/apps/monitoring/loki/ks.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
---
|
||||
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/kustomization_v1beta2.json
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: cluster-apps-loki-app
|
||||
namespace: flux-system
|
||||
labels:
|
||||
substitution.flux.home.arpa/enabled: "true"
|
||||
spec:
|
||||
dependsOn:
|
||||
- name: cluster-apps-rook-ceph-cluster
|
||||
path: ./kubernetes/apps/monitoring/loki/app
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: home-ops-kubernetes
|
||||
healthChecks:
|
||||
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
||||
kind: HelmRelease
|
||||
name: loki
|
||||
namespace: monitoring
|
||||
interval: 30m
|
||||
retryInterval: 1m
|
||||
timeout: 5m
|
Reference in New Issue
Block a user