feat: overhaul

This commit is contained in:
auricom
2025-01-04 00:00:04 +01:00
parent b14022014b
commit 0c9529c7a2
408 changed files with 3187 additions and 2380 deletions

View File

@@ -0,0 +1,75 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app apprise
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.6.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 2
install:
createNamespace: true
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: rook-ceph-cluster
namespace: rook-ceph
values:
controllers:
apprise:
containers:
app:
image:
# https://hub.docker.com/r/caronc/apprise
repository: docker.io/caronc/apprise
tag: 1.1.7@sha256:7507518c63fc84044831c74b28aa3e3d2153c60b4726310c36818b5234b997c6
env:
TZ: "${TIMEZONE:=Etc/UTC}"
probes:
liveness:
enabled: true
readiness:
enabled: true
service:
app:
controller: *app
ports:
http:
port: 8000
ingress:
app:
enabled: true
className: internal
annotations:
hajimari.io/icon: bell-cog
hosts:
- host: &host "{{ .Release.Name }}.${SECRET_EXTERNAL_DOMAIN}"
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts:
- *host
persistence:
config:
enabled: true
existingClaim: *app
globalMounts:
- path: /config

View File

@@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
- ../../../../templates/gatus/guarded
- ../../../../templates/volsync

View File

@@ -0,0 +1,28 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app apprise
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: rook-ceph-cluster
- name: volsync
path: ./kubernetes/apps/observability/apprise/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 1Gi

View File

@@ -0,0 +1,74 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app blackbox-exporter
spec:
interval: 30m
chart:
spec:
chart: prometheus-blackbox-exporter
version: 9.1.0
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
fullnameOverride: *app
ingress:
enabled: true
className: internal
hosts:
- host: blackbox-exporter.${SECRET_EXTERNAL_DOMAIN}
paths:
- path: /
pathType: Prefix
securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
capabilities:
add: [NET_RAW]
config:
modules:
http_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: [HTTP/1.1, HTTP/2.0]
follow_redirects: true
preferred_ip_protocol: ipv4
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: ipv4
tcp_connect:
prober: tcp
timeout: 5s
tcp:
preferred_ip_protocol: ipv4
serviceMonitor:
enabled: true
defaults:
interval: 1m
scrapeTimeout: 10s
prometheusRule:
enabled: true
rules:
- alert: BlackboxProbeFailed
expr: probe_success == 0
for: 15m
labels:
severity: critical
annotations:
summary: |-
The host {{ $labels.target }} is currently unreachable

View File

@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
- ./probes.yaml

View File

@@ -0,0 +1,14 @@
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/probe_v1.json
---
kind: Probe
apiVersion: monitoring.coreos.com/v1
metadata:
name: devices
spec:
module: icmp
prober:
url: blackbox-exporter.observability.svc.cluster.local:9115
targets:
staticConfig:
static:
- pikvm.${SECRET_INTERNAL_DOMAIN}

View File

@@ -0,0 +1,20 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app blackbox-exporter
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/observability/blackbox-exporter/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
timeout: 15m

View File

@@ -0,0 +1,37 @@
---
web:
port: ${CUSTOM_WEB_PORT}
storage:
type: sqlite
path: /config/sqlite.db
caching: true
metrics: true
debug: false
ui:
title: Status | Gatus
header: Status
alerting:
pushover:
application-token: ${CUSTOM_PUSHOVER_APP_TOKEN}
user-key: ${CUSTOM_PUSHOVER_USER_KEY}
default-alert:
description: health-check failed
send-on-resolved: true
failure-threshold: 5
success-threshold: 2
connectivity:
checker:
target: 192.168.8.1:53
interval: 1m
endpoints:
- name: status
group: external
url: https://status.${SECRET_EXTERNAL_DOMAIN}
interval: 1m
client:
dns-resolver: tcp://192.168.8.1:53
insecure: true
conditions:
- "[STATUS] == 200"
alerts:
- type: pushover

View File

@@ -0,0 +1,21 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: gatus
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: gatus-secret
template:
data:
CUSTOM_PUSHOVER_APP_TOKEN: '{{ .PUSHOVER_API_TOKEN }}'
CUSTOM_PUSHOVER_USER_KEY: '{{ .PUSHOVER_USER_KEY }}'
dataFrom:
- extract:
key: gatus
- extract:
key: pushover

View File

@@ -0,0 +1,148 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app gatus
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.6.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
dependsOn:
- name: rook-ceph-cluster
namespace: rook-ceph
values:
controllers:
gatus:
annotations:
reloader.stakater.com/auto: "true"
initContainers:
init-config:
image:
repository: ghcr.io/kiwigrid/k8s-sidecar
tag: 1.28.4@sha256:20caf4e241e1f9f9231527db5e75b735aa7b0da7bee3d262cbe369bb9b33469f
env:
FOLDER: /config
LABEL: gatus.io/enabled
NAMESPACE: ALL
RESOURCE: both
UNIQUE_FILENAMES: true
METHOD: WATCH
restartPolicy: Always
resources:
requests:
cpu: 10m
limits:
memory: 128Mi
containers:
app:
image:
repository: ghcr.io/twin/gatus
tag: v5.15.0@sha256:45686324db605e57dfa8b0931d8d57fe06298f52685f06aa9654a1f710d461bb
env:
GATUS_CONFIG_PATH: /config
GATUS_DELAY_START_SECONDS: 5
WEB_PORT: &port 80
envFrom:
- secretRef:
name: gatus-secret
probes:
liveness: &probes
enabled: true
custom: true
spec:
httpGet:
path: /health
port: *port
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
readiness: *probes
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities: { drop: [ALL] }
resources:
requests:
cpu: 100m
limits:
memory: 256Mi
defaultPodOptions:
dnsConfig:
options:
- { name: ndots, value: "1" }
securityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
fsGroupChangePolicy: OnRootMismatch
seccompProfile: { type: RuntimeDefault }
service:
app:
controller: gatus
ports:
http:
port: *port
serviceMonitor:
app:
serviceName: gatus
endpoints:
- port: http
scheme: http
path: /metrics
interval: 1m
scrapeTimeout: 10s
ingress:
app:
enabled: true
className: external
annotations:
hajimari.io/icon: mdi:list-status
gethomepage.dev/enabled: "true"
gethomepage.dev/name: Gatus
gethomepage.dev/description: Automated developer-oriented status page.
gethomepage.dev/group: Applications
gethomepage.dev/icon: gatus.png
gethomepage.dev/pod-selector: >-
app in (
gatus
)
hosts:
- host: &host "status.${SECRET_EXTERNAL_DOMAIN}"
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts:
- *host
serviceAccount:
create: true
name: *app
persistence:
config:
existingClaim: gatus
config-file:
type: configMap
name: gatus-configmap
globalMounts:
- path: /config/config.yaml
subPath: config.yaml
readOnly: true

View File

@@ -0,0 +1,17 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml
- ./pvc.yaml
- ./rbac.yaml
configMapGenerator:
- name: gatus-configmap
files:
- ./config/config.yaml
generatorOptions:
disableNameSuffixHash: true
annotations:
kustomize.toolkit.fluxcd.io/substitute: disabled

View File

@@ -0,0 +1,11 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: gatus
spec:
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 2Gi
storageClassName: rook-ceph-block

View File

@@ -0,0 +1,34 @@
---
# trunk-ignore(checkov/CKV_K8S_21)
apiVersion: v1
kind: ServiceAccount
metadata:
name: gatus
labels:
app.kubernetes.io/managed-by: Helm
annotations:
meta.helm.sh/release-name: gatus
meta.helm.sh/release-namespace: observability
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: gatus
rules:
# trunk-ignore(trivy/KSV041)
- apiGroups: [""]
resources: [configmaps, secrets]
verbs: [get, watch, list]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: gatus
roleRef:
kind: ClusterRole
name: gatus
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: gatus
namespace: observability

View File

@@ -0,0 +1,26 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app gatus
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
path: ./kubernetes/apps/observability/gatus/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app

View File

@@ -0,0 +1,21 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.zinn.ca/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: grafana-secrets
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: grafana-secret
creationPolicy: Owner
deletionPolicy: Delete
template:
engineVersion: v2
data:
GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: "{{ .GRAFANA_OAUTH_CLIENT_SECRET }}"
dataFrom:
- extract:
key: authelia

View File

@@ -0,0 +1,353 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: grafana
spec:
interval: 30m
chart:
spec:
chart: grafana
version: 8.8.2
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
values:
annotations:
reloader.stakater.com/auto: "true"
secret.reloader.stakater.com/reload: authelia-secret
rbac:
pspEnabled: false
env:
GF_AUTH_GENERIC_OAUTH_API_URL: https://auth.${SECRET_EXTERNAL_DOMAIN}/api/oidc/userinfo
GF_AUTH_GENERIC_OAUTH_AUTH_URL: https://auth.${SECRET_EXTERNAL_DOMAIN}/api/oidc/authorization
GF_AUTH_GENERIC_OAUTH_CLIENT_ID: grafana
GF_AUTH_GENERIC_OAUTH_TOKEN_URL: https://auth.${SECRET_EXTERNAL_DOMAIN}/api/oidc/token
GF_DATE_FORMATS_USE_BROWSER_LOCALE: true
GF_EXPLORE_ENABLED: true
GF_PANELS_DISABLE_SANITIZE_HTML: true
GF_LOG_FILTERS: rendering:debug
GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS: natel-discrete-panel,pr0ps-trackmap-panel,panodata-map-panel
GF_DATE_FORMATS_FULL_DATE: DD.MM.YYYY hh:mm:ss
GF_SECURITY_ALLOW_EMBEDDING: true
GF_SECURITY_COOKIE_SAMESITE: grafana
GF_SERVER_ROOT_URL: https://grafana.${SECRET_EXTERNAL_DOMAIN}
envFromSecrets:
- name: grafana-secret
grafana.ini:
analytics:
check_for_updates: false
check_for_plugin_updates: false
reporting_enabled: false
auth:
signout_redirect_url: "https://auth.${SECRET_EXTERNAL_DOMAIN}/logout"
oauth_auto_login: true
oauth_allow_insecure_email_lookup: true
auth.generic_oauth:
enabled: true
name: Authelia
icon: signin
scopes: openid profile email groups
empty_scopes: false
login_attribute_path: preferred_username
groups_attribute_path: groups
name_attribute_path: name
use_pkce: true
auth.generic_oauth.group_mapping:
org_id: 1
role_attribute_path: |
contains(groups[*], 'admins') && 'Admin' || contains(groups[*], 'people') && 'Viewer'
auth.basic:
enabled: false
auth.anonymous:
enabled: false
# org_id: 1
# org_role: Viewer
news:
news_feed_enabled: false
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: default
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default-folder
- name: ceph
orgId: 1
folder: Ceph
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/ceph-folder
- name: flux
orgId: 1
folder: Flux
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/flux-folder
- name: kubernetes
orgId: 1
folder: Kubernetes
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/kubernetes-folder
- name: nginx
orgId: 1
folder: Nginx
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/nginx-folder
- name: prometheus
orgId: 1
folder: Prometheus
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/prometheus-folder
- name: unifi
orgId: 1
folder: Unifi
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/unifi-folder
datasources:
datasources.yaml:
apiVersion: 1
deleteDatasources:
- { name: Alertmanager, orgId: 1 }
- { name: Loki, orgId: 1 }
- { name: Prometheus, orgId: 1 }
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
url: http://prometheus-operated.observability.svc.cluster.local:9090
isDefault: true
# - name: Loki
# type: loki
# uid: loki
# access: proxy
# url: http://loki-gateway.observability.svc.cluster.local.:80
- name: Alertmanager
type: alertmanager
uid: alertmanager
access: proxy
url: http://alertmanager-operated.observability.svc.cluster.local:9093
jsonData:
implementation: prometheus
dashboards:
default:
home-assistant:
url: https://raw.githubusercontent.com/auricom/home-ops/main/kubernetes/apps/observability/grafana/dashboards/home-assistant.json
datasource: Prometheus
homelab-temperatures:
url: https://raw.githubusercontent.com/auricom/home-ops/main/kubernetes/apps/observability/grafana/dashboards/homelab-temperatures.json
datasource: Prometheus
external-dns:
# renovate: depName="External-dns"
gnetId: 15038
revision: 3
datasource: Prometheus
minio:
# renovate: depName="MinIO Dashboard"
gnetId: 13502
revision: 26
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
node-exporter-full:
# renovate: depName="Node Exporter Full"
gnetId: 1860
revision: 37
datasource: Prometheus
spegel:
# renovate: depName="Spegel"
gnetId: 18089
revision: 1
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
zfs:
# renovate: depName="ZFS"
gnetId: 7845
revision: 4
datasource: Prometheus
cert-manager:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json
datasource: Prometheus
dragonfly:
url: https://raw.githubusercontent.com/dragonflydb/dragonfly/main/tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json
datasource: Prometheus
external-secrets:
url: https://raw.githubusercontent.com/external-secrets/external-secrets/main/docs/snippets/dashboard.json
datasource: Prometheus
node-feature-discovery:
url: https://raw.githubusercontent.com/kubernetes-sigs/node-feature-discovery/master/examples/grafana-dashboard.json
datasource: Prometheus
zrepl:
url: https://raw.githubusercontent.com/zrepl/zrepl/master/dist/grafana/grafana-prometheus-zrepl.json
datasource: Prometheus
ceph:
ceph-cluster:
# renovate: depName="Ceph Cluster"
gnetId: 2842
revision: 18
datasource: Prometheus
ceph-osd:
# renovate: depName="Ceph - OSD (Single)"
gnetId: 5336
revision: 9
datasource: Prometheus
ceph-pools:
# renovate: depName="Ceph - Pools"
gnetId: 5342
revision: 9
datasource: Prometheus
flux:
flux-cluster:
url: https://raw.githubusercontent.com/fluxcd/flux2-monitoring-example/main/monitoring/configs/dashboards/cluster.json
datasource: Prometheus
flux-control-plane:
url: https://raw.githubusercontent.com/fluxcd/flux2-monitoring-example/main/monitoring/configs/dashboards/control-plane.json
datasource: Prometheus
kubernetes:
kubernetes-api-server:
# renovate: depName="Kubernetes / System / API Server"
gnetId: 15761
revision: 18
datasource: Prometheus
kubernetes-coredns:
# renovate: depName="Kubernetes / System / CoreDNS"
gnetId: 15762
revision: 19
datasource: Prometheus
kubernetes-global:
# renovate: depName="Kubernetes / Views / Global"
gnetId: 15757
revision: 42
datasource: Prometheus
kubernetes-namespaces:
# renovate: depName="Kubernetes / Views / Namespaces"
gnetId: 15758
revision: 41
datasource: Prometheus
kubernetes-nodes:
# renovate: depName="Kubernetes / Views / Nodes"
gnetId: 15759
revision: 32
datasource: Prometheus
kubernetes-pods:
# renovate: depName="Kubernetes / Views / Pods"
gNetId: 15760
revision: 21
datasource: Prometheus
kubernetes-volumes:
# renovate: depName="K8s / Storage / Volumes / Cluster"
gnetId: 11454
revision: 14
datasource: Prometheus
nginx:
nginx:
url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/nginx.json
datasource: Prometheus
nginx-request-handling-performance:
url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/request-handling-performance.json
datasource: Prometheus
prometheus:
prometheus:
# renovate: depName="Prometheus"
gnetId: 19105
revision: 6
datasource: Prometheus
unifi:
unifi-insights:
# renovate: depName="UniFi-Poller: Client Insights - Prometheus"
gnetId: 11315
revision: 9
datasource: Prometheus
unifi-network-sites:
# renovate: depName="UniFi-Poller: Network Sites - Prometheus"
gnetId: 11311
revision: 5
datasource: Prometheus
unifi-uap:
# renovate: depName="UniFi-Poller: UAP Insights - Prometheus"
gnetId: 11314
revision: 10
datasource: Prometheus
unifi-usw:
# renovate: depName="UniFi-Poller: USW Insights - Prometheus"
gnetId: 11312
revision: 9
datasource: Prometheus
sidecar:
dashboards:
enabled: true
searchNamespace: ALL
labelValue: ""
label: grafana_dashboard
folderAnnotation: grafana_folder
provider:
disableDelete: true
foldersFromFilesStructure: true
datasources:
enabled: true
searchNamespace: ALL
labelValue: ""
plugins:
- grafana-clock-panel
- grafana-piechart-panel
- grafana-worldmap-panel
- natel-discrete-panel
- pr0ps-trackmap-panel
- vonage-status-panel
serviceMonitor:
enabled: true
ingress:
enabled: true
ingressClassName: internal
annotations:
hajimari.io/icon: simple-icons:grafana
gethomepage.dev/enabled: "true"
gethomepage.dev/name: Grafana
gethomepage.dev/description: Observability and data visualization platform.
gethomepage.dev/group: Infrastructure
gethomepage.dev/icon: grafana.png
gethomepage.dev/pod-selector: >-
app in (
grafana
)
hosts:
- &host "grafana.${SECRET_EXTERNAL_DOMAIN}"
tls:
- hosts:
- *host
persistence:
enabled: false
testFramework:
enabled: false

View File

@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml

View File

@@ -0,0 +1,437 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 40
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 6,
"options": {
"legend": {
"calcs": [
"mean",
"max",
"min"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_temperature_celsius{entity=\"sensor.temperature_living_room_temperature\"})",
"instant": false,
"interval": "",
"legendFormat": "Living Room",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "sum(hass_sensor_temperature_celsius{entity=~\"sensor.temperature_bedroom_temperature\"})",
"hide": false,
"interval": "",
"legendFormat": "Bedroom 1",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "sum(hass_sensor_temperature_celsius{entity=~\"sensor.temperature_office_temperature\"})",
"hide": false,
"interval": "",
"legendFormat": "Office",
"range": true,
"refId": "C"
}
],
"title": "Home",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 9
},
"id": 7,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_power_w{entity=\"sensor.servers_plug_switch_electricity_power\"})",
"interval": "",
"legendFormat": "Servers",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_power_w{entity=\"sensor.bureau_plug_switch_electricity_power\"})",
"hide": false,
"interval": "",
"legendFormat": "claude-fixe",
"range": true,
"refId": "B"
}
],
"title": "Computers",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 18
},
"id": 8,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_power_w{entity=~\"sensor.power_plug_avr_power\"})",
"interval": "",
"legendFormat": "AVR",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "hass_sensor_power_w{entity=~\"sensor.power_plug_projector_power\"}",
"hide": false,
"interval": "",
"legendFormat": "Projector",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "hass_sensor_power_w{entity=~\"sensor.power_plug_subwoofer_power\"}",
"hide": false,
"interval": "",
"legendFormat": "Subwoofer",
"range": true,
"refId": "C"
}
],
"title": "Home Cinema",
"type": "timeseries"
}
],
"schemaVersion": 37,
"style": "dark",
"tags": [
"home-assistant"
],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Home-Assistant",
"uid": "sn-bOoWMk",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,395 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"t²ype": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"Value"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 5,
"options": {
"legend": {
"calcs": [
"mean",
"max",
"min"
],
"displayMode": "table",
"placement": "right",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(node_cpu_temperature_celsius{app=\"opnsense\"})",
"interval": "",
"legendFormat": "Temperature",
"range": true,
"refId": "A"
}
],
"title": "OPNsense - CPU",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "orange",
"value": 80
},
{
"color": "red",
"value": 90
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 9
},
"id": 2,
"options": {
"legend": {
"calcs": [
"mean",
"max",
"min"
],
"displayMode": "table",
"placement": "right",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "avg (node_hwmon_temp_celsius{chip=\"platform_coretemp_0\"}) by (chip,kubernetes_node) * ignoring(chip_name) group_left(chip_name) avg (node_hwmon_chip_names) by (chip,chip_name,kubernetes_node)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{chip_name}}",
"range": true,
"refId": "A"
}
],
"title": "k3s Nodes - CPU",
"transformations": [],
"type": "timeseries"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 40
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 18
},
"id": 6,
"options": {
"legend": {
"calcs": [
"mean",
"max",
"min"
],
"displayMode": "list",
"placement": "right",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_temperature_celsius{entity=~\"sensor.temperature_servers_temperature\"})",
"interval": "",
"legendFormat": "datacenter",
"range": true,
"refId": "A"
}
],
"title": "Datacenter",
"type": "timeseries"
}
],
"schemaVersion": 37,
"style": "dark",
"tags": [
"homelab"
],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Homelab / Temperatures",
"uid": "aEY0BVGnz",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,942 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "panel",
"id": "gauge",
"name": "Gauge",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.1.0"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"limit": 100,
"name": "Annotations & Alerts",
"showIn": 0,
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"iteration": 1630317513315,
"links": [],
"panels": [
{
"collapsed": false,
"datasource": "${DS_PROMETHEUS}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"panels": [],
"repeat": null,
"title": "Summary",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "rgb(255, 255, 255)",
"value": 600
}
]
},
"unit": "dtdurations"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 2,
"x": 0,
"y": 1
},
"id": 7,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "value"
},
"pluginVersion": "8.1.0",
"targets": [
{
"exemplar": true,
"expr": "system_uptime{host=\"$host\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Uptime",
"type": "stat"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "semi-dark-green",
"value": null
},
{
"color": "semi-dark-yellow",
"value": 80
},
{
"color": "semi-dark-red",
"value": 90
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 4,
"x": 2,
"y": 1
},
"id": 4,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.1.0",
"targets": [
{
"exemplar": true,
"expr": "sum(100-cpu_usage_idle{host=\"$host\", cpu=\"cpu-total\"})",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "CPU used",
"type": "gauge"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "semi-dark-green",
"value": null
},
{
"color": "semi-dark-yellow",
"value": 95
},
{
"color": "semi-dark-red",
"value": 98
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 4,
"x": 6,
"y": 1
},
"id": 5,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.1.0",
"targets": [
{
"exemplar": true,
"expr": "mem_used_percent{host=\"$host\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Memory used",
"type": "gauge"
},
{
"datasource": "${DS_PROMETHEUS}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "semi-dark-green",
"value": null
},
{
"color": "semi-dark-yellow",
"value": 70
},
{
"color": "semi-dark-red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 4,
"x": 10,
"y": 1
},
"id": 8,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {
"titleSize": 1
}
},
"pluginVersion": "8.1.0",
"targets": [
{
"exemplar": true,
"expr": "zfs_pool_capacity{host=\"$host\", pool=~\"storage|vol1\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Pool capacity",
"type": "gauge"
},
{
"collapsed": false,
"datasource": "${DS_PROMETHEUS}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 5
},
"id": 12,
"panels": [],
"title": "System",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 6
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "sum(100-cpu_usage_idle{host=\"$host\", cpu=\"cpu-total\"})",
"interval": "",
"legendFormat": "CPU usage",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "decbytes"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 6
},
"id": 13,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "rate(net_bytes_recv{host=\"$host\", interface=~\"em0|igb0\"}[5m])",
"interval": "",
"legendFormat": "received by {{interface}}",
"refId": "A"
},
{
"exemplar": true,
"expr": "rate(net_bytes_sent{host=\"$host\", interface=~\"em0|igb0\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "sent by {{interface}}",
"refId": "B"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Network Usage",
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 11
},
"id": 14,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "sum(temp_temp{host=\"$host\"}) / 4",
"interval": "",
"legendFormat": "Temperature",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "CPU temperature",
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 11
},
"id": 15,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": false,
"expr": "sum(disktemp_temperature{host=\"$host\"}) by (disk)",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Disks temperatures",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": "${DS_PROMETHEUS}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 17,
"panels": [],
"title": "Block devices",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 17
},
"id": 19,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "sum(rate(diskio_read_bytes{host=\"$host\",name=~\"ada[0-9]|nvd[0-9]\"}[5m])) by (name)",
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Disk IO Reads",
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 17
},
"id": 20,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "sum(rate(diskio_write_bytes{host=\"$host\",name=~\"ada[0-9]|nvd[0-9]\"}[5m])) by (name)",
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Disk IO Writes",
"type": "timeseries"
}
],
"schemaVersion": 30,
"style": "dark",
"tags": [
"truenas",
"telegraf"
],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "${DS_PROMETHEUS}",
"definition": "label_values(system_uptime, host)",
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "host",
"options": [],
"query": {
"query": "label_values(system_uptime, host)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "/(.*)/",
"skipUrlSync": false,
"sort": 2,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "TrueNAS",
"uid": "l65MB4M7z",
"version": 1
}

View File

@@ -0,0 +1,24 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app grafana
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/observability/grafana/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app

View File

@@ -0,0 +1,24 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: kube-prometheus-stack
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: kube-prometheus-stack-secret
template:
engineVersion: v2
data:
# App
MINIO_PROM_TOKEN: "{{ .MINIO__PROMETHEUS_TOKEN }}"
PIKVM_USERNAME: "{{ .username }}"
PIKVM_PASSWORD: "{{ .password }}"
dataFrom:
- extract:
key: minio
- extract:
key: PiKVM

View File

@@ -0,0 +1,257 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-prometheus-stack
spec:
interval: 30m
chart:
spec:
chart: kube-prometheus-stack
version: 67.5.0
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
interval: 5m
install:
crds: Skip
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: Skip
remediation:
strategy: rollback
retries: 3
dependsOn:
- name: kube-prometheus-stack-crds
namespace: observability
- name: rook-ceph-cluster
namespace: rook-ceph
values:
crds:
enabled: false
###
### Component values
###
kubeApiServer:
enabled: true
serviceMonitor:
metricRelabelings:
- action: replace
sourceLabels:
- node
targetLabel: instance
kubeProxy:
enabled: false
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: false
kubeScheduler:
enabled: false
kubeStateMetrics:
metricLabelsAllowlist:
- persistentvolumeclaims=[*]
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
grafana:
enabled: false
forceDeployDashboards: true
prometheus-node-exporter:
resources:
requests:
cpu: 23m
memory: 64M
limits:
memory: 64M
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
###
### Prometheus operator values
###
prometheusOperator:
prometheusConfigReloader:
resources:
requests:
cpu: 100m
memory: 50Mi
limits:
cpu: 300m
memory: 100Mi
###
### Prometheus instance values
###
prometheus:
ingress:
enabled: true
pathType: Prefix
ingressClassName: internal
annotations:
nginx.ingress.kubernetes.io/auth-method: GET
nginx.ingress.kubernetes.io/auth-url: http://authelia.default.svc.cluster.local.:8888/api/verify
nginx.ingress.kubernetes.io/auth-signin: https://auth.${SECRET_EXTERNAL_DOMAIN}?rm=$request_method
nginx.ingress.kubernetes.io/auth-response-headers: Remote-User,Remote-Name,Remote-Groups,Remote-Email
nginx.ingress.kubernetes.io/auth-snippet: proxy_set_header X-Forwarded-Method $request_method;
hajimari.io/appName: Prometheus
hajimari.io/icon: simple-icons:prometheus
gethomepage.dev/enabled: "true"
gethomepage.dev/name: Prometheus
gethomepage.dev/description: Systems and service monitoring system.
gethomepage.dev/group: Infrastructure
gethomepage.dev/icon: prometheus.png
gethomepage.dev/pod-selector: >-
app in (
prometheus-kube-prometheus-stack-prometheus
)
hosts: ["prometheus.${SECRET_EXTERNAL_DOMAIN}"]
tls:
- hosts:
- "prometheus.${SECRET_EXTERNAL_DOMAIN}"
prometheusSpec:
replicas: 2
replicaExternalLabelName: replica
scrapeInterval: 1m # Must match interval in Grafana Helm chart
podMonitorSelector: &selector
matchLabels: null
probeSelector: *selector
ruleSelector: *selector
scrapeConfigSelector: *selector
serviceMonitorSelector: *selector
retention: 14d
retentionSize: 50GB
enableAdminAPI: true
walCompression: true
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: rook-ceph-block
resources:
requests:
storage: 20Gi
alertmanager:
config:
global:
resolve_timeout: 5m
receivers:
- name: "null"
- name: pushover
pushover_configs:
- user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY}
token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN}
send_resolved: true
html: true
priority: |-
{{ if eq .Status "firing" }}1{{ else }}0{{ end }}
url_title: View in Alert Manager
title: |-
[{{ .Status | toUpper -}}
{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
] {{ .CommonLabels.alertname }}
message: |-
{{- range .Alerts }}
{{- if ne .Labels.severity "" }}
<b>Severity:</b> <i>{{ .Labels.severity }}</i>
{{- else }}
<b>Severity:</b> <i>N/A</i>
{{- end }}
{{- if ne .Annotations.description "" }}
<b>Description:</b> <i>{{ .Annotations.description }}</i>
{{- else if ne .Annotations.summary "" }}
<b>Summary:</b> <i>{{ .Annotations.summary }}</i>
{{- else if ne .Annotations.message "" }}
<b>Message:</b> <i>{{ .Annotations.message }}</i>
{{- else }}
<b>Description:</b> <i>N/A</i>
{{- end }}
{{- if gt (len .Labels.SortedPairs) 0 }}
<b>Details:</b>
{{- range .Labels.SortedPairs }}
• <b>{{ .Name }}:</b> <i>{{ .Value }}</i>
{{- end }}
{{- end }}
{{- end }}
route:
receiver: pushover
routes:
- receiver: "null"
matchers:
- alertname =~ "InfoInhibitor|Watchdog|RebootScheduled"
- receiver: pushover
matchers:
- severity = "critical"
continue: true
inhibit_rules:
- source_matchers:
- severity = "critical"
target_matchers:
- severity = "warning"
equal: [alertname, namespace]
alertmanagerSpec:
replicas: 1
podAntiAffinity: hard
storage:
volumeClaimTemplate:
spec:
storageClassName: rook-ceph-block
resources:
requests:
storage: 1Gi
ingress:
enabled: true
pathType: Prefix
ingressClassName: internal
annotations:
nginx.ingress.kubernetes.io/auth-method: GET
nginx.ingress.kubernetes.io/auth-url: http://authelia.default.svc.cluster.local.:8888/api/verify
nginx.ingress.kubernetes.io/auth-signin: https://auth.${SECRET_EXTERNAL_DOMAIN}?rm=$request_method
nginx.ingress.kubernetes.io/auth-response-headers: Remote-User,Remote-Name,Remote-Groups,Remote-Email
nginx.ingress.kubernetes.io/auth-snippet: proxy_set_header X-Forwarded-Method $request_method;
hajimari.io/appName: Alert Manager
hajimari.io/icon: mdi:alert-decagram-outline
gethomepage.dev/enabled: "true"
gethomepage.dev/name: Alert-Manager
gethomepage.dev/description: Handles alerts sent by Prometheus.
gethomepage.dev/group: Infrastructure
gethomepage.dev/icon: alertmanager.png
gethomepage.dev/pod-selector: >-
app in (
alertmanager-kube-prometheus-stack-alertmanager
)
hosts: ["alert-manager.${SECRET_EXTERNAL_DOMAIN}"]
tls:
- hosts:
- "alert-manager.${SECRET_EXTERNAL_DOMAIN}"
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node

View File

@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml
- ./prometheusrule.yaml
- ./scrapeconfig.yaml

View File

@@ -0,0 +1,34 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: miscellaneous-rules
spec:
groups:
- name: dockerhub
rules:
- alert: BootstrapRateLimitRisk
annotations:
summary: Kubernetes cluster at risk of being rate limited by dockerhub on bootstrap
expr: count(time() - container_last_seen{image=~"(docker.io).*",container!=""} < 30) > 100
for: 15m
labels:
severity: critical
- name: oom
rules:
- alert: OOMKilled
annotations:
summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
labels:
severity: critical
- name: zfs
rules:
- alert: ZfsUnexpectedPoolState
annotations:
summary: ZFS pool {{$labels.zpool}} on {{$labels.instance}} is in a unexpected state {{$labels.state}}
expr: node_zfs_zpool_state{state!="online"} > 0
for: 15m
labels:
severity: critical

View File

@@ -0,0 +1,86 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: &name node-exporter
spec:
staticConfigs:
- targets:
- pikvm.${SECRET_INTERNAL_DOMAIN}:9100
- opnsense.${SECRET_INTERNAL_DOMAIN}:9273
- storage.${SECRET_INTERNAL_DOMAIN}:9100
metricsPath: /metrics
relabelings:
- action: replace
targetLabel: job
replacement: *name
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: &name podman-exporter
spec:
staticConfigs:
- targets: ["storage.${SECRET_INTERNAL_DOMAIN}:9882"]
metricsPath: /metrics
relabelings:
- action: replace
targetLabel: job
replacement: *name
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: &name pikvm
spec:
staticConfigs:
- targets: ["pikvm.${SECRET_INTERNAL_DOMAIN}"]
metricsPath: /api/export/prometheus/metrics
basicAuth:
username:
name: kube-prometheus-stack-secret
key: PIKVM_USERNAME
password:
name: kube-prometheus-stack-secret
key: PIKVM_PASSWORD
scheme: HTTPS
relabelings:
- action: replace
targetLabel: job
replacement: *name
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: &name zrepl
spec:
staticConfigs:
- targets: ["storage.${SECRET_INTERNAL_DOMAIN}:9811"]
metricsPath: /metrics
relabelings:
- action: replace
targetLabel: job
replacement: *name
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: &name minio
spec:
staticConfigs:
- targets: ["s3.${SECRET_INTERNAL_DOMAIN}"]
metricsPath: /minio/v2/metrics/cluster
authorization:
credentials:
name: kube-prometheus-stack-secret
key: MINIO_PROM_TOKEN
scheme: HTTPS
relabelings:
- action: replace
targetLabel: job
replacement: *name

View File

@@ -0,0 +1,23 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-prometheus-stack-crds
spec:
interval: 30m
chart:
spec:
chart: prometheus-operator-crds
version: 17.0.2
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3

View File

@@ -0,0 +1,6 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml

View File

@@ -0,0 +1,72 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app kube-prometheus-stack-crds
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/observability/kube-prometheus-stack/crds
prune: false # never should be deleted
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
timeout: 5m
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app kube-prometheus-stack
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: rook-ceph-cluster
- name: kube-prometheus-stack-crds
path: ./kubernetes/apps/observability/kube-prometheus-stack/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
# renovate: datasource=docker depName=quay.io/thanos/thanos
THANOS_VERSION: v0.35.0
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app kube-prometheus-rules
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: kube-prometheus-stack
path: ./kubernetes/apps/observability/kube-prometheus-stack/rules
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View File

@@ -0,0 +1,6 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./zfs.yaml

View File

@@ -0,0 +1,17 @@
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: zrepl-replication-errors
spec:
groups:
- name: zrepl.rules
rules:
- alert: ZreplReplicationFilesystemErrors
expr: zrepl_replication_filesystem_errors > 0
for: 15m # Alert if the condition persists for 15 minutes
labels:
severity: warning
annotations:
summary: "Zrepl replication filesystem errors detected"
description: "Zrepl job {{ $labels.zrepl_job }} has encountered filesystem errors during replication."

View File

@@ -0,0 +1,15 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./apprise/ks.yaml
- ./blackbox-exporter/ks.yaml
- ./gatus/ks.yaml
- ./grafana/ks.yaml
- ./kube-prometheus-stack/ks.yaml
- ./mailrise/ks.yaml
- ./scrutiny/ks.yaml

View File

@@ -0,0 +1,15 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: mailrise
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: mailrise-secret
dataFrom:
- extract:
key: mailrise

View File

@@ -0,0 +1,93 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app mailrise
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.6.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 2
install:
createNamespace: true
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
uninstall:
keepHistory: false
values:
defaultPodOptions:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: *app
controllers:
mailrise:
replicas: 2
strategy: RollingUpdate
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: docker.io/yoryan/mailrise
tag: 1.4.0@sha256:66082168090b9a83f01cc71a9d7b1994840adbbbffbe4d2cf644272fbbc23a1a
env:
TZ: ${TIMEZONE}
envFrom:
- secretRef:
name: mailrise-secret
resources:
requests:
cpu: 10m
memory: 10Mi
limits:
memory: 200Mi
service:
app:
controller: *app
type: LoadBalancer
loadBalancerIP: 192.168.169.112
externalTrafficPolicy: Local
ports:
http:
port: 8025
ingress:
app:
enabled: true
className: internal
annotations:
hajimari.io/enable: "false"
hosts:
- host: &host "{{ .Release.Name }}.${SECRET_EXTERNAL_DOMAIN}"
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts:
- *host
persistence:
config:
enabled: true
type: configMap
name: mailrise-configmap
globalMounts:
- path: /etc/mailrise.conf
subPath: mailrise.conf
readOnly: true

View File

@@ -0,0 +1,15 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml
configMapGenerator:
- name: mailrise-configmap
files:
- mailrise.conf=./mailrise.yaml
generatorOptions:
disableNameSuffixHash: true
annotations:
kustomize.toolkit.fluxcd.io/substitute: disabled

View File

@@ -0,0 +1,7 @@
---
configs:
kresus@mailrise.home.arpa:
mailrise:
title_template: "KRESUS"
urls:
- !env_var PUSHOVER_KRESUS

View File

@@ -0,0 +1,26 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app mailrise
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/observability/mailrise/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
dependsOn:
- name: external-secrets-stores
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app

View File

@@ -0,0 +1,37 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: observability
labels:
kustomize.toolkit.fluxcd.io/prune: disabled
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/notification.toolkit.fluxcd.io/provider_v1beta3.json
apiVersion: notification.toolkit.fluxcd.io/v1beta3
kind: Provider
metadata:
name: alert-manager
namespace: observability
spec:
type: alertmanager
address: http://kube-prometheus-stack-alertmanager.observability:9093/api/v2/alerts/
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/notification.toolkit.fluxcd.io/alert_v1beta3.json
apiVersion: notification.toolkit.fluxcd.io/v1beta3
kind: Alert
metadata:
name: alert-manager
namespace: observability
spec:
providerRef:
name: alert-manager
eventSeverity: error
eventSources:
- kind: HelmRelease
name: "*"
exclusionList:
- "error.*lookup github\\.com"
- "error.*lookup raw\\.githubusercontent\\.com"
- "dial.*tcp.*timeout"
- "waiting.*socket"
suspend: false

View File

@@ -0,0 +1,21 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: scrutiny
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: scrutiny-secret
template:
engineVersion: v2
data:
SCRUTINY_NOTIFY_URLS: pushover://shoutrrr:{{ .PUSHOVER_API_TOKEN }}@{{ .PUSHOVER_USER_KEY }}
dataFrom:
- extract:
key: pushover
- extract:
key: scrutiny

View File

@@ -0,0 +1,128 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app scrutiny
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.6.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 2
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
uninstall:
keepHistory: false
values:
defaultPodOptions:
automountServiceAccountToken: false
controllers:
scrutiny:
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: ghcr.io/analogj/scrutiny
tag: master-web@sha256:8dfb9fdba2203eac1eba1143d54afdb74458ddd97326714d373369fce636ae28
env:
TZ: ${TIMEZONE}
SCRUTINY_WEB_INFLUXDB_HOST: influx.database.svc.cluster.local
envFrom:
- secretRef:
name: scrutiny-secret
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
memory: 128Mi
service:
app:
controller: *app
ports:
http:
port: &port 8080
ingress:
app:
enabled: true
className: internal
annotations:
hajimari.io/icon: mdi:harddiskstatus
gethomepage.dev/enabled: "true"
gethomepage.dev/name: Scrutiny
gethomepage.dev/description: Hard Drive S.M.A.R.T Monitoring.
gethomepage.dev/group: Infrastructure
gethomepage.dev/icon: scrutiny.png
gethomepage.dev/pod-selector: >-
app in (
scrutiny
)
hosts:
- host: &host "{{ .Release.Name }}.${SECRET_EXTERNAL_DOMAIN}"
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts:
- *host
probes:
liveness:
enabled: true
custom: true
spec:
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
httpGet:
path: /api/health
port: *port
readiness:
enabled: true
custom: true
spec:
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
httpGet:
path: /api/health
port: *port
startup:
enabled: true
custom: true
spec:
initialDelaySeconds: 0
timeoutSeconds: 1
periodSeconds: 5
failureThreshold: 30
httpGet:
path: /api/health
port: *port
persistence:
config:
enabled: true
existingClaim: *app
globalMounts:
- path: /opt/scrutiny/config
udev:
enabled: true
type: hostPath
hostPath: /run/udev
globalMounts:
- path: /run/udev

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- helmrelease.yaml
- ../../../../templates/volsync

View File

@@ -0,0 +1,85 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app scrutiny-collector
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.6.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 2
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: scrutiny
values:
defaultPodOptions:
automountServiceAccountToken: false
# capabilities:
# add: ["SYS_RAWIO"] # allow access to smartctl
controllers:
scrutiny-collector:
type: daemonset
containers:
app:
image:
repository: ghcr.io/analogj/scrutiny
tag: master-collector@sha256:c98f3ee3ce30239b166717e94ebcc856fddd907fc105af6cb7345eea54584ff1
env:
COLLECTOR_API_ENDPOINT: http://scrutiny.observability.svc.cluster.local:8080
COLLECTOR_HOST_ID:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
TZ: ${TIMEZONE}
securityContext:
privileged: true
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
memory: 128Mi
service:
app:
controller: *app
enabled: false
persistence:
config:
enabled: true
type: emptyDir
globalMounts:
- path: /opt/scrutiny/config
udev:
enabled: true
type: hostPath
hostPath: /run/udev
globalMounts:
- path: /run/udev
nvme0n1:
enabled: true
type: hostPath
hostPath: /dev/nvme0n1
globalMounts:
- path: /dev/nvme0n1
sda:
enabled: true
type: hostPath
hostPath: /dev/sda
globalMounts:
- path: /dev/sda

View File

@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helmrelease.yaml

View File

@@ -0,0 +1,53 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app scrutiny
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
- name: rook-ceph-cluster
- name: volsync
path: ./kubernetes/apps/observability/scrutiny/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 2Gi
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app scrutiny-collector
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/observability/scrutiny/collector
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app