mirror of
https://github.com/auricom/home-cluster.git
synced 2025-09-17 18:24:14 +02:00
new gitops template
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
---
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: longhorn
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
interval: 5m
|
||||
chart:
|
||||
spec:
|
||||
# renovate: registryUrl=https://charts.longhorn.io
|
||||
chart: longhorn
|
||||
version: 1.1.0
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: longhorn-charts
|
||||
namespace: flux-system
|
||||
interval: 5m
|
||||
values:
|
||||
defaultSettings:
|
||||
backupTarget: s3://longhorn@us-east-1/
|
||||
backupTargetCredentialSecret: minio-truenas-credentials
|
||||
createDefaultDiskLabeledNodes: true
|
||||
defaultDataPath: /var/lib/longhorn/
|
||||
replicaSoftAntiAffinity: false
|
||||
storageOverProvisioningPercentage: 300
|
||||
storageMinimalAvailablePercentage: 25
|
||||
upgradeChecker: true
|
||||
defaultReplicaCount: 3
|
||||
guaranteedEngineCPU: 0.25
|
||||
defaultLonghornStaticStorageClass: longhorn-backups
|
||||
backupstorePollInterval: 10800
|
||||
autoSalvage: true
|
||||
disableSchedulingOnCordonedNode: true
|
||||
replicaZoneSoftAntiAffinity: true
|
||||
volumeAttachmentRecoveryPolicy: wait
|
||||
csi:
|
||||
kubeletRootDir: /var/lib/kubelet
|
||||
tls: true
|
||||
ingress:
|
||||
enabled: false
|
26
cluster/core/infrastructure/longhorn-system/ingress.yaml
Normal file
26
cluster/core/infrastructure/longhorn-system/ingress.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
---
|
||||
kind: Ingress
|
||||
apiVersion: networking.k8s.io/v1
|
||||
metadata:
|
||||
name: longhorn-ui
|
||||
namespace: longhorn-system
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: "nginx"
|
||||
ingress.kubernetes.io/secure-backends: "true"
|
||||
nginx.ingress.kubernetes.io/auth-url: "http://authelia.networking.svc.cluster.local/api/verify"
|
||||
nginx.ingress.kubernetes.io/auth-signin: "https://login.${SECRET_CLUSTER_DOMAIN_CERT}/"
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- longhorn.${SECRET_CLUSTER_DOMAIN}
|
||||
rules:
|
||||
- host: longhorn.${SECRET_CLUSTER_DOMAIN}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: longhorn-frontend
|
||||
port:
|
||||
number: 80
|
@@ -0,0 +1,8 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- helm-release.yaml
|
||||
- ingress.yaml
|
||||
- monitoring.yaml
|
||||
- storageclass.yaml
|
||||
- secret.enc.yaml
|
109
cluster/core/infrastructure/longhorn-system/monitoring.yaml
Normal file
109
cluster/core/infrastructure/longhorn-system/monitoring.yaml
Normal file
@@ -0,0 +1,109 @@
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: longhorn-prometheus-servicemonitor
|
||||
namespace: longhorn-system
|
||||
labels:
|
||||
name: longhorn-prometheus-servicemonitor
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: longhorn-manager
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- longhorn-system
|
||||
endpoints:
|
||||
- port: manager
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
prometheus: longhorn
|
||||
role: alert-rules
|
||||
name: prometheus-longhorn-rules
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: longhorn.rules
|
||||
rules:
|
||||
#- alert: LonghornVolumeActualSpaceUsedWarning
|
||||
# annotations:
|
||||
# description: The actual space used by Longhorn volume {{$labels.volume}} on {{$labels.node}} is at {{$value}}% capacity for
|
||||
# more than 5 minutes.
|
||||
# summary: The actual used space of Longhorn volume is over 90% of the capacity.
|
||||
# expr: (longhorn_volume_actual_size_bytes / longhorn_volume_capacity_bytes) * 100 > 90
|
||||
# for: 5m
|
||||
# labels:
|
||||
# issue: The actual used space of Longhorn volume {{$labels.volume}} on {{$labels.node}} is high.
|
||||
# severity: warning
|
||||
- alert: LonghornVolumeStatusCritical
|
||||
annotations:
|
||||
description: Longhorn volume {{$labels.volume}} on {{$labels.node}} is Fault for
|
||||
more than 2 minutes.
|
||||
summary: Longhorn volume {{$labels.volume}} is Fault
|
||||
expr: longhorn_volume_robustness == 3
|
||||
for: 5m
|
||||
labels:
|
||||
issue: Longhorn volume {{$labels.volume}} is Fault.
|
||||
severity: critical
|
||||
- alert: LonghornVolumeStatusWarning
|
||||
annotations:
|
||||
description: Longhorn volume {{$labels.volume}} on {{$labels.node}} is Degraded for
|
||||
more than 5 minutes.
|
||||
summary: Longhorn volume {{$labels.volume}} is Degraded
|
||||
expr: longhorn_volume_robustness == 2
|
||||
for: 5m
|
||||
labels:
|
||||
issue: Longhorn volume {{$labels.volume}} is Degraded.
|
||||
severity: warning
|
||||
- alert: LonghornNodeStorageWarning
|
||||
annotations:
|
||||
description: The used storage of node {{$labels.node}} is at {{$value}}% capacity for
|
||||
more than 5 minutes.
|
||||
summary: The used storage of node is over 70% of the capacity.
|
||||
expr: (longhorn_node_storage_usage_bytes / longhorn_node_storage_capacity_bytes) * 100 > 70
|
||||
for: 5m
|
||||
labels:
|
||||
issue: The used storage of node {{$labels.node}} is high.
|
||||
severity: warning
|
||||
- alert: LonghornDiskStorageWarning
|
||||
annotations:
|
||||
description: The used storage of disk {{$labels.disk}} on node {{$labels.node}} is at {{$value}}% capacity for
|
||||
more than 5 minutes.
|
||||
summary: The used storage of disk is over 70% of the capacity.
|
||||
expr: (longhorn_disk_usage_bytes / longhorn_disk_capacity_bytes) * 100 > 70
|
||||
for: 5m
|
||||
labels:
|
||||
issue: The used storage of disk {{$labels.disk}} on node {{$labels.node}} is high.
|
||||
severity: warning
|
||||
- alert: LonghornNodeDown
|
||||
annotations:
|
||||
description: There are {{$value}} Longhorn nodes which have been offline for more than 5 minutes.
|
||||
summary: Longhorn nodes is offline
|
||||
expr: longhorn_node_total - (count(longhorn_node_status{condition="ready"}==1) OR on() vector(0))
|
||||
for: 5m
|
||||
labels:
|
||||
issue: There are {{$value}} Longhorn nodes are offline
|
||||
severity: critical
|
||||
- alert: LonghornIntanceManagerCPUUsageWarning
|
||||
annotations:
|
||||
description: Longhorn instance manager {{$labels.instance_manager}} on {{$labels.node}} has CPU Usage / CPU request is {{$value}}% for
|
||||
more than 5 minutes.
|
||||
summary: Longhorn instance manager {{$labels.instance_manager}} on {{$labels.node}} has CPU Usage / CPU request is over 300%.
|
||||
expr: (longhorn_instance_manager_cpu_usage_millicpu/longhorn_instance_manager_cpu_requests_millicpu) * 100 > 300
|
||||
for: 5m
|
||||
labels:
|
||||
issue: Longhorn instance manager {{$labels.instance_manager}} on {{$labels.node}} comsumes 3 times the CPU request.
|
||||
severity: warning
|
||||
- alert: LonghornNodeCPUUsageWarning
|
||||
annotations:
|
||||
description: Longhorn node {{$labels.node}} has CPU Usage / CPU capacity is {{$value}}% for
|
||||
more than 5 minutes.
|
||||
summary: Longhorn node {{$labels.node}} experiences high CPU pressure for more than 5m.
|
||||
expr: (longhorn_node_cpu_usage_millicpu / longhorn_node_cpu_capacity_millicpu) * 100 > 90
|
||||
for: 5m
|
||||
labels:
|
||||
issue: Longhorn node {{$labels.node}} experiences high CPU pressure.
|
||||
severity: warning
|
39
cluster/core/infrastructure/longhorn-system/secret.enc.yaml
Normal file
39
cluster/core/infrastructure/longhorn-system/secret.enc.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
kind: Secret
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: minio-truenas-credentials
|
||||
namespace: longhorn-system
|
||||
data:
|
||||
AWS_ACCESS_KEY_ID: ENC[AES256_GCM,data:uE5CV9wcWg8=,iv:l41hwC+43JWRbcsqpRwukwkpHcWjMmGf9eNtR8kV0VM=,tag:TrHP2GlnSbqWE7TS9neGfw==,type:str]
|
||||
AWS_SECRET_ACCESS_KEY: ENC[AES256_GCM,data:Jhg/KgZzOmU8jB3K0pMuke8BuUIWRVoQ1US3cw==,iv:lRidTSpintFfwd4/W32FGHEMy/v06ILrN62nPoMB3ew=,tag:NYT3ST+lsp6QkvjTEeXHBw==,type:str]
|
||||
AWS_ENDPOINTS: ENC[AES256_GCM,data:SdIM5UQmzsibf6lD0UN/2ztF03WeM5GqoEi71HtaNKeDRNqCXAssFhUd0l0=,iv:Ep5Xdpu48QriwOA1qmBPaNpcbiudNkpH+I2YiFpYCFY=,tag:4oJYhEMyUsIG8OJ+73wf1g==,type:str]
|
||||
#ENC[AES256_GCM,data:/pUAj7tHPkqci0vh/I5x5M6LebjodkftjOsXFCpQyW2D,iv:qTDtrQVblNVeUfAtBoUgO0rbqGzf4jQbjna0OQZdUf0=,tag:XZc9TX3zGZGbNz3CyYmKLw==,type:comment]
|
||||
type: Opaque
|
||||
sops:
|
||||
kms: []
|
||||
gcp_kms: []
|
||||
azure_kv: []
|
||||
hc_vault: []
|
||||
lastmodified: "2021-04-14T14:49:06Z"
|
||||
mac: ENC[AES256_GCM,data:CdXYSx72+JQMw4ZuCma8u0VTM5wNYNC0L2iBSBuLA0nr8YzMh59CAjc2S3ITpnusFQ3onisurrDoKj25GRJu0Dns4d1oluKGdsiIc8nfwSsRxxfRKb+iPa0B0lGsI2XvuvqBYcWLZ0S988NXfi8VCyaXIdoFMFjOPel9+KqPSio=,iv:8aq1YspzEiXqOIPHzZhAs930uwomdtKQtdKxSHjb90Y=,tag:sHikRF8/3+VDnVKtWEtcSA==,type:str]
|
||||
pgp:
|
||||
- created_at: "2021-04-14T14:49:06Z"
|
||||
enc: |
|
||||
-----BEGIN PGP MESSAGE-----
|
||||
|
||||
hQGMA/JorPHm1g9XAQv/RCNYZMMGchIhqCt7S0jCFaGTqWvtydckIGQLZN3CCwmo
|
||||
xfMoaGf43yMKER21ilP3CY/EXQNzwz2di5M0/biofkaH5yiohcufECS6+rB9J/wI
|
||||
Ub5RsMuNdnZSNzsNTd/T3PgUbhuqNOiOBv3BM59SfbMa3z1w3StFdWk0h4zXfezc
|
||||
Vj/wtpV+1SonfCZ0QWqRB/crnAYSASoINS8kqU3I53VkoDM6pWoX4mjA7V+5x3aL
|
||||
5ZdqvUte42ANqNG9SLnnLQzhjKxEnb1K3R1VB2qmvCmWB3aY8hq9zKuK/x6WH9B4
|
||||
rtBiIB3BCtJeUC0rGRvBNlfxPDdegDWqae7y6JdQWRB4QaoYxVzKPNS0Msz7zjlH
|
||||
Rf75ZWWUJnKmHKzAQBHrgegUiR4GipEe5v63m0kInM3J8MHtolkJ22kCXeancYWl
|
||||
XnnZwWmyVz46BTR71EvdbApSmlDQjRCK3x/5FodtCZeWP1QEfC0lwRAlk2lyrPx7
|
||||
/L8KnFLK+NF9uR2Xylzf0l4BD+mNEAfIq7hvy4Gh8Ek50gpAmNGLq6zRNj0Sh6dz
|
||||
zbVyYHYIwEXCnvaN8UNumSqvTQ9e322bRXsYwVLLQXT58ZX/jbzvSwUkNalTJamx
|
||||
X6t5Qj8/5XOjupH0IoR0
|
||||
=8fGE
|
||||
-----END PGP MESSAGE-----
|
||||
fp: C8F8A49D04A1AB639F8EA21CDBA4B1DCB1FA5BDD
|
||||
encrypted_regex: ^(data|stringData)$
|
||||
version: 3.6.1
|
@@ -0,0 +1,17 @@
|
||||
---
|
||||
kind: StorageClass
|
||||
apiVersion: storage.k8s.io/v1
|
||||
metadata:
|
||||
name: longhorn-backups
|
||||
provisioner: driver.longhorn.io
|
||||
allowVolumeExpansion: true
|
||||
reclaimPolicy: Retain
|
||||
parameters:
|
||||
numberOfReplicas: "3"
|
||||
staleReplicaTimeout: "2880"
|
||||
fromBackup: ""
|
||||
diskSelector: "ssd,fast"
|
||||
nodeSelector: "storage,fast"
|
||||
recurringJobs:
|
||||
'[{"name":"backup", "task":"backup", "cron":"30 23 * * *", "retain":1,
|
||||
"labels": {"interval":"daily"}}]'
|
Reference in New Issue
Block a user