♻️ flux kustomizations

This commit is contained in:
auricom
2022-12-26 15:24:33 +01:00
parent b4572bf19a
commit ca31e11491
730 changed files with 6825 additions and 3766 deletions

View File

@@ -0,0 +1,165 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: grafana
namespace: monitoring
spec:
interval: 15m
chart:
spec:
chart: grafana
version: 6.48.0
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
interval: 15m
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
rbac:
pspEnabled: false
env:
GF_EXPLORE_ENABLED: true
GF_PANELS_DISABLE_SANITIZE_HTML: true
GF_LOG_FILTERS: rendering:debug
GF_DATE_FORMATS_FULL_DATE: "DD.MM.YYYY hh:mm:ss"
GF_SECURITY_ALLOW_EMBEDDING: true
GF_SECURITY_COOKIE_SAMESITE: grafana
admin:
existingSecret: grafana-admin-creds
grafana.ini:
auth:
signout_redirect_url: "https://auth.${SECRET_CLUSTER_DOMAIN}/logout"
oauth_auto_login: false
auth.generic_oauth:
enabled: true
name: Authelia
client_id: grafana
client_secret: "${SECRET_GRAFANA_OAUTH_CLIENT_SECRET}"
scopes: "openid profile email groups"
empty_scopes: false
auth_url: "https://auth.${SECRET_CLUSTER_DOMAIN}/api/oidc/authorization"
token_url: "https://auth.${SECRET_CLUSTER_DOMAIN}/api/oidc/token"
api_url: "https://auth.${SECRET_CLUSTER_DOMAIN}/api/oidc/userinfo"
login_attribute_path: preferred_username
groups_attribute_path: groups
name_attribute_path: name
use_pkce: true
auth.generic_oauth.group_mapping:
role_attribute_path: |
contains(groups[*], 'admins') && 'Admin' || contains(groups[*], 'people') && 'Viewer'
org_id: 1
auth.basic:
disable_login_form: false
auth.anonymous:
enabled: true
org_name: HomeOps
org_id: 1
org_role: Viewer
server:
root_url: "https://grafana.${SECRET_CLUSTER_DOMAIN}"
paths:
data: /var/lib/grafana/data
logs: /var/log/grafana
plugins: /var/lib/grafana/plugins
provisioning: /etc/grafana/provisioning
analytics:
check_for_updates: false
log:
mode: console
grafana_net:
url: https://grafana.net
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: "default"
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default
datasources:
datasources.yaml:
apiVersion: 1
# list of datasources that should be deleted from the database
deleteDatasources:
- name: Loki
orgId: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://thanos-query-frontend.monitoring.svc.cluster.local.:9090
isDefault: true
# - name: Loki
# type: loki
# access: proxy
# url: http://loki-gateway.monitoring.svc.cluster.local.:80
dashboards:
default:
home-assistant:
url: https://raw.githubusercontent.com/auricom/home-ops/main/kubernetes/apps/monitoring/grafana/dashboards/home-assistant.json
datasource: Prometheus
homelab-temperatures:
url: https://raw.githubusercontent.com/auricom/home-ops/main/kubernetes/apps/monitoring/grafana/dashboards/homelab-temperatures.json
datasource: Prometheus
truenas:
url: https://raw.githubusercontent.com/auricom/home-ops/main/kubernetes/apps/monitoring/grafana/dashboards/truenas.json
datasource: Prometheus
sidecar:
dashboards:
enabled: true
searchNamespace: ALL
# folderAnnotation: grafana_folder
# provider:
# foldersFromFilesStructure: true
datasources:
enabled: true
searchNamespace: ALL
plugins:
- natel-discrete-panel
- pr0ps-trackmap-panel
- grafana-piechart-panel
- vonage-status-panel
- grafana-worldmap-panel
- grafana-clock-panel
serviceMonitor:
enabled: true
ingress:
enabled: true
ingressClassName: "nginx"
annotations:
nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,172.16.0.0/12,192.168.0.0/16"
nginx.ingress.kubernetes.io/affinity: "cookie"
nginx.ingress.kubernetes.io/session-cookie-name: "grafana"
nginx.ingress.kubernetes.io/session-cookie-expires: "172800"
nginx.ingress.kubernetes.io/session-cookie-max-age: "172800"
hosts:
- &host "grafana.${SECRET_CLUSTER_DOMAIN}"
tls:
- hosts:
- *host
persistence:
enabled: false
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values: ["grafana"]
topologyKey: kubernetes.io/hostname

View File

@@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./secrets.sops.yaml
- ./helmrelease.yaml

View File

@@ -0,0 +1,29 @@
# yamllint disable
apiVersion: v1
kind: Secret
metadata:
name: grafana-admin-creds
namespace: monitoring
stringData:
admin-user: ENC[AES256_GCM,data:NrH2m8c=,iv:uO1V1XHpx5q72uiZ7ZZ07oagTou64bY2cmA+O+sjbQs=,tag:0kMdvkMr3W83rmwOwmv//w==,type:str]
admin-password: ENC[AES256_GCM,data:/UlQnEL9N3pr/XIYKIY=,iv:AtUad/V1y3UG9TGUZnaT7G7lykhzm3Yx7gzaLE/0tlA=,tag:qQ9nok5b1uH+az0gmTKHEw==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age:
- recipient: age1hhurqwmfvl9m3vh3hk8urulfzcdsrep2ax2neazqt435yhpamu3qj20asg
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBHQmtZeUVvaWtSNzZBWHBx
VWxYMjY0MlFSVEN0cjhvQUFxVWNHbFB2cndzCkZURTNGQXBXSm8yT0hvWVR0aDVC
NmVhRDNaUFh4eWYyUTFqRTZIQ2o5QkUKLS0tIHhuM3lFREZyYnhlZ3JKQUJwVEdX
Z3d6U0dVUWhPTDBZcXY4cFNsRGM3cFUKdIPaiHrS/B4zNHpNaxi9zYrOv+HrZ/oP
NVkIbemYIYGKhcqSjRy53EQhIimu0q4oCxal6KkXahVB0edysD9JBQ==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2022-07-08T08:38:39Z"
mac: ENC[AES256_GCM,data:y/XhXzy4Q3CQOpJFbMtMlDAOfoE3AoewrqL2LD7k3uaGtN5qcZRvZrshtlFc6aLu0Xz0Tquhk2knaRVx4iHBPosHchBQkBnOKydpI7vnqJTpTk9l6rbB08Xy4hwTZToiIonvYclceXeVbt/HKtdasq1LGJVBogNeGEQrn50kVUY=,iv:jDdz7nEw8h3J6Py9MWAnj5mTXY5jxhYvxHB53riiP/M=,tag:znmJxs869qluZNSnk8QmGg==,type:str]
pgp: []
encrypted_regex: ^(data|stringData)$
version: 3.7.3

View File

@@ -0,0 +1,437 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 40
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 6,
"options": {
"legend": {
"calcs": [
"mean",
"max",
"min"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_temperature_celsius{entity=\"sensor.temperature_living_room_temperature\"})",
"instant": false,
"interval": "",
"legendFormat": "Living Room",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "sum(hass_sensor_temperature_celsius{entity=~\"sensor.temperature_bedroom_temperature\"})",
"hide": false,
"interval": "",
"legendFormat": "Bedroom 1",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "sum(hass_sensor_temperature_celsius{entity=~\"sensor.temperature_office_temperature\"})",
"hide": false,
"interval": "",
"legendFormat": "Office",
"range": true,
"refId": "C"
}
],
"title": "Home",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 9
},
"id": 7,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_power_w{entity=\"sensor.servers_plug_switch_electricity_power\"})",
"interval": "",
"legendFormat": "Servers",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_power_w{entity=\"sensor.bureau_plug_switch_electricity_power\"})",
"hide": false,
"interval": "",
"legendFormat": "claude-fixe",
"range": true,
"refId": "B"
}
],
"title": "Computers",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 18
},
"id": 8,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_power_w{entity=~\"sensor.power_plug_avr_power\"})",
"interval": "",
"legendFormat": "AVR",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "hass_sensor_power_w{entity=~\"sensor.power_plug_projector_power\"}",
"hide": false,
"interval": "",
"legendFormat": "Projector",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "hass_sensor_power_w{entity=~\"sensor.power_plug_subwoofer_power\"}",
"hide": false,
"interval": "",
"legendFormat": "Subwoofer",
"range": true,
"refId": "C"
}
],
"title": "Home Cinema",
"type": "timeseries"
}
],
"schemaVersion": 37,
"style": "dark",
"tags": [
"home-assistant"
],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Home-Assistant",
"uid": "sn-bOoWMk",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,395 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"t²ype": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"Value"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 5,
"options": {
"legend": {
"calcs": [
"mean",
"max",
"min"
],
"displayMode": "table",
"placement": "right",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(node_cpu_temperature_celsius{app=\"opnsense\"})",
"interval": "",
"legendFormat": "Temperature",
"range": true,
"refId": "A"
}
],
"title": "OPNsense - CPU",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"decimals": 0,
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "orange",
"value": 80
},
{
"color": "red",
"value": 90
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 9
},
"id": 2,
"options": {
"legend": {
"calcs": [
"mean",
"max",
"min"
],
"displayMode": "table",
"placement": "right",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "avg (node_hwmon_temp_celsius{chip=\"platform_coretemp_0\"}) by (chip,kubernetes_node) * ignoring(chip_name) group_left(chip_name) avg (node_hwmon_chip_names) by (chip,chip_name,kubernetes_node)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{chip_name}}",
"range": true,
"refId": "A"
}
],
"title": "k3s Nodes - CPU",
"transformations": [],
"type": "timeseries"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 40
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 18
},
"id": 6,
"options": {
"legend": {
"calcs": [
"mean",
"max",
"min"
],
"displayMode": "list",
"placement": "right",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "avg(hass_sensor_temperature_celsius{entity=~\"sensor.temperature_servers_temperature\"})",
"interval": "",
"legendFormat": "datacenter",
"range": true,
"refId": "A"
}
],
"title": "Datacenter",
"type": "timeseries"
}
],
"schemaVersion": 37,
"style": "dark",
"tags": [
"homelab"
],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Homelab / Temperatures",
"uid": "aEY0BVGnz",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,942 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "panel",
"id": "gauge",
"name": "Gauge",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.1.0"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"limit": 100,
"name": "Annotations & Alerts",
"showIn": 0,
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"iteration": 1630317513315,
"links": [],
"panels": [
{
"collapsed": false,
"datasource": "${DS_PROMETHEUS}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"panels": [],
"repeat": null,
"title": "Summary",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "rgb(255, 255, 255)",
"value": 600
}
]
},
"unit": "dtdurations"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 2,
"x": 0,
"y": 1
},
"id": 7,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "value"
},
"pluginVersion": "8.1.0",
"targets": [
{
"exemplar": true,
"expr": "system_uptime{host=\"$host\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Uptime",
"type": "stat"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "semi-dark-green",
"value": null
},
{
"color": "semi-dark-yellow",
"value": 80
},
{
"color": "semi-dark-red",
"value": 90
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 4,
"x": 2,
"y": 1
},
"id": 4,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.1.0",
"targets": [
{
"exemplar": true,
"expr": "sum(100-cpu_usage_idle{host=\"$host\", cpu=\"cpu-total\"})",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "CPU used",
"type": "gauge"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "semi-dark-green",
"value": null
},
{
"color": "semi-dark-yellow",
"value": 95
},
{
"color": "semi-dark-red",
"value": 98
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 4,
"x": 6,
"y": 1
},
"id": 5,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.1.0",
"targets": [
{
"exemplar": true,
"expr": "mem_used_percent{host=\"$host\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Memory used",
"type": "gauge"
},
{
"datasource": "${DS_PROMETHEUS}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "semi-dark-green",
"value": null
},
{
"color": "semi-dark-yellow",
"value": 70
},
{
"color": "semi-dark-red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 4,
"x": 10,
"y": 1
},
"id": 8,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {
"titleSize": 1
}
},
"pluginVersion": "8.1.0",
"targets": [
{
"exemplar": true,
"expr": "zfs_pool_capacity{host=\"$host\", pool=~\"storage|vol1\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Pool capacity",
"type": "gauge"
},
{
"collapsed": false,
"datasource": "${DS_PROMETHEUS}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 5
},
"id": 12,
"panels": [],
"title": "System",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 6
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "sum(100-cpu_usage_idle{host=\"$host\", cpu=\"cpu-total\"})",
"interval": "",
"legendFormat": "CPU usage",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "decbytes"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 6
},
"id": 13,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "rate(net_bytes_recv{host=\"$host\", interface=~\"em0|igb0\"}[5m])",
"interval": "",
"legendFormat": "received by {{interface}}",
"refId": "A"
},
{
"exemplar": true,
"expr": "rate(net_bytes_sent{host=\"$host\", interface=~\"em0|igb0\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "sent by {{interface}}",
"refId": "B"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Network Usage",
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 11
},
"id": 14,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "sum(temp_temp{host=\"$host\"}) / 4",
"interval": "",
"legendFormat": "Temperature",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "CPU temperature",
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 11
},
"id": 15,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": false,
"expr": "sum(disktemp_temperature{host=\"$host\"}) by (disk)",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Disks temperatures",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": "${DS_PROMETHEUS}",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 17,
"panels": [],
"title": "Block devices",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 17
},
"id": 19,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "sum(rate(diskio_read_bytes{host=\"$host\",name=~\"ada[0-9]|nvd[0-9]\"}[5m])) by (name)",
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Disk IO Reads",
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 17
},
"id": 20,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.5",
"targets": [
{
"exemplar": true,
"expr": "sum(rate(diskio_write_bytes{host=\"$host\",name=~\"ada[0-9]|nvd[0-9]\"}[5m])) by (name)",
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Disk IO Writes",
"type": "timeseries"
}
],
"schemaVersion": 30,
"style": "dark",
"tags": [
"truenas",
"telegraf"
],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "${DS_PROMETHEUS}",
"definition": "label_values(system_uptime, host)",
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "host",
"options": [],
"query": {
"query": "label_values(system_uptime, host)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "/(.*)/",
"skipUrlSync": false,
"sort": 2,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "TrueNAS",
"uid": "l65MB4M7z",
"version": 1
}

View File

@@ -0,0 +1,25 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/kustomization_v1beta2.json
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: cluster-apps-grafana
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
dependsOn:
- name: cluster-apps-cloudnative-pg-app
path: ./kubernetes/apps/monitoring/grafana/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: grafana
namespace: monitoring
interval: 30m
retryInterval: 1m
timeout: 3m

View File

@@ -0,0 +1,267 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: kube-prometheus-stack
namespace: monitoring
spec:
interval: 15m
chart:
spec:
chart: kube-prometheus-stack
version: 43.2.0
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
interval: 5m
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
###
### Component values
###
kubeApiServer:
enabled: true
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: false
kubelet:
enabled: true
serviceMonitor:
metricRelabelings:
- action: replace
sourceLabels:
- node
targetLabel: instance
kubeProxy:
enabled: false
kubeScheduler:
enabled: false
kubeStateMetrics:
enabled: true
metricLabelsAllowlist:
- "persistentvolumeclaims=[*]"
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
grafana:
enabled: false
forceDeployDashboards: true
nodeExporter:
enabled: true
prometheus-node-exporter:
resources:
requests:
cpu: 23m
memory: 64M
limits:
memory: 64M
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
###
### Prometheus operator values
###
prometheusOperator:
prometheusConfigReloader:
resources:
requests:
cpu: 100m
memory: 50Mi
limits:
cpu: 300m
memory: 100Mi
###
### Prometheus instance values
###
prometheus:
ingress:
enabled: true
pathType: Prefix
ingressClassName: "nginx"
annotations:
auth.home.arpa/enabled: "true"
hosts: ["prometheus.${SECRET_CLUSTER_DOMAIN}"]
tls:
- hosts:
- "prometheus.${SECRET_CLUSTER_DOMAIN}"
prometheusSpec:
replicas: 1
replicaExternalLabelName: "replica"
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
probeSelectorNilUsesHelmValues: false
retention: 6h
enableAdminAPI: true
walCompression: true
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: "rook-ceph-block"
resources:
requests:
storage: 20Gi
thanos:
image: quay.io/thanos/thanos:v0.29.0
# renovate: datasource=docker depName=quay.io/thanos/thanos
version: "v0.29.0"
objectStorageConfig:
name: thanos-objstore-secret
key: objstore.yml
additionalScrapeConfigs:
- job_name: "opnsense"
scrape_interval: 60s
metrics_path: "/metrics"
static_configs:
- targets: ["${LOCAL_LAN_OPNSENSE}:9273"]
labels:
app: "opnsense"
- job_name: "truenas"
scrape_interval: 60s
metrics_path: "/metrics"
static_configs:
- targets: ["${LOCAL_LAN_TRUENAS}:9273"]
labels:
app: "truenas"
- job_name: "truenas-remote"
scrape_interval: 60s
metrics_path: "/metrics"
static_configs:
- targets: ["${LOCAL_LAN_TRUENAS_REMOTE}:9273"]
labels:
app: "truenas-remote"
thanosService:
enabled: true
thanosServiceMonitor:
enabled: true
thanosIngress:
enabled: true
pathType: Prefix
ingressClassName: "nginx"
annotations:
auth.home.arpa/enabled: "true"
hosts:
- &thanosHost "thanos-sidecar.${SECRET_DOMAIN}"
tls:
- hosts:
- *thanosHost
alertmanager:
config:
global:
resolve_timeout: 5m
receivers:
- name: "null"
- name: "pushover"
pushover_configs:
- user_key: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_USER_KEY}
token: ${SECRET_KUBE_PROMETHEUS_STACK_ALERTMANAGER_PUSHOVER_TOKEN}
send_resolved: true
html: true
priority: |-
{{ if eq .Status "firing" }}1{{ else }}0{{ end }}
url_title: View in Alert Manager
title: |-
[{{ .Status | toUpper -}}
{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
] {{ .CommonLabels.alertname }}
message: |-
{{- range .Alerts }}
{{- if ne .Labels.severity "" }}
<b>Severity:</b> <i>{{ .Labels.severity }}</i>
{{- else }}
<b>Severity:</b> <i>N/A</i>
{{- end }}
{{- if ne .Annotations.description "" }}
<b>Description:</b> <i>{{ .Annotations.description }}</i>
{{- else if ne .Annotations.summary "" }}
<b>Summary:</b> <i>{{ .Annotations.summary }}</i>
{{- else if ne .Annotations.message "" }}
<b>Message:</b> <i>{{ .Annotations.message }}</i>
{{- else }}
<b>Description:</b> <i>N/A</i>
{{- end }}
{{- if gt (len .Labels.SortedPairs) 0 }}
<b>Details:</b>
{{- range .Labels.SortedPairs }}
• <b>{{ .Name }}:</b> <i>{{ .Value }}</i>
{{- end }}
{{- end }}
{{- end }}
route:
receiver: "pushover"
routes:
- receiver: "null"
matchers:
- alertname =~ "InfoInhibitor|Watchdog|RebootScheduled"
- receiver: "pushover"
matchers:
- severity = "critical"
continue: true
inhibit_rules:
- source_matchers:
- severity = "critical"
target_matchers:
- severity = "warning"
equal: ["alertname", "namespace"]
alertmanagerSpec:
replicas: 1
podAntiAffinity: hard
storage:
volumeClaimTemplate:
spec:
storageClassName: rook-ceph-block
resources:
requests:
storage: 1Gi
ingress:
enabled: true
pathType: Prefix
ingressClassName: "nginx"
annotations:
auth.home.arpa/enabled: "true"
hosts: ["alert-manager.${SECRET_CLUSTER_DOMAIN}"]
tls:
- hosts:
- "alert-manager.${SECRET_CLUSTER_DOMAIN}"
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node

View File

@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./helmrelease.yaml

View File

@@ -0,0 +1,25 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/kustomization_v1beta2.json
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: cluster-apps-kube-prometheus-stack-app
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
dependsOn:
- name: cluster-apps-rook-ceph-cluster
path: ./kubernetes/apps/monitoring/kube-prometheus-stack/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: kube-prometheus-stack
namespace: monitoring
interval: 30m
retryInterval: 1m
timeout: 5m

View File

@@ -0,0 +1,14 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./grafana/ks.yaml
- ./kube-prometheus-stack/ks.yaml
- ./loki/ks.yaml
- ./smartctl-exporter/ks.yaml
- ./thanos/ks.yaml
- ./vector/ks.yaml

View File

@@ -0,0 +1,130 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: loki-alerting-rules
namespace: monitoring
data:
loki-alerting-rules.yaml: |-
groups:
#
# SMART Failures
#
- name: smart-failure
rules:
- alert: SmartFailures
expr: |
sum by (hostname) (count_over_time({hostname=~".+"} | json | _SYSTEMD_UNIT = "smartmontools.service" !~ "(?i)previous self-test completed without error" !~ "(?i)Prefailure" |~ "(?i)(error|fail)"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "SMART has reported failures on host {{ $labels.hostname }}"
#
# zigbee2mqtt
#
- name: zigbee2mqtt
rules:
- alert: ZigbeeUnableToReachMQTT
expr: |
sum(count_over_time({app="zigbee2mqtt"} |~ "(?i)not connected to mqtt server"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "Zigbee2mqtt is unable to reach MQTT"
#
# zwavejs2mqtt
#
- name: zwavejs2mqtt
rules:
- alert: ZwaveUnableToReachMQTT
expr: |
sum(count_over_time({app="zwavejs2mqtt"} |~ "(?i)error while connecting mqtt"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "Zwavejs2mqtt is unable to reach MQTT"
#
# frigate
#
- name: frigate
rules:
- alert: FrigateUnableToReachMQTT
expr: |
sum(count_over_time({app="frigate"} |~ "(?i)unable to connect to mqtt server"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "Frigate is unable to reach MQTT"
#
# *arr
#
- name: arr
rules:
- alert: ArrDatabaseIsLocked
expr: |
sum by (app) (count_over_time({app=~".*arr"} |~ "(?i)database is locked"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "{{ $labels.app }} is experiencing locked database issues"
- alert: ArrDatabaseIsMalformed
expr: |
sum by (app) (count_over_time({app=~".*arr"} |~ "(?i)database disk image is malformed"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "{{ $labels.app }} is experiencing malformed database disk image issues"
#
# home-assistant
#
- name: home-assistant
rules:
- alert: HomeAssistantUnableToReachPostgresql
expr: |
sum by (app) (count_over_time({app="home-assistant"} |~ "(?i)error in database connectivity"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "Home Assistant is unable to connect to postgresql"
#
# valetudo
#
- name: valetudo
rules:
- alert: ValetudoUnableToReachMQTT
expr: |
sum by (hostname) (count_over_time({hostname="valetudo"} |~ "(?i).*error.*mqtt.*"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "Valetudo is unable to connect to mqtt"
#
# node-red
#
- name: node-red
rules:
- alert: NodeRedUnableToReachHomeAssistant
expr: |
sum by (app) (count_over_time({app="node-red"} |~ "(?i)home assistant.*connecting to undefined"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
summary: "Node-Red is unable to connect to Home Assistant"

View File

@@ -0,0 +1,189 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: loki
namespace: monitoring
spec:
interval: 15m
chart:
spec:
chart: loki
version: 3.8.0
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
loki:
structuredConfig:
auth_enabled: false
server:
log_level: info
http_listen_port: 3100
grpc_listen_port: 9095
memberlist:
join_members: ["loki-memberlist"]
limits_config:
retention_period: 14d
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
max_cache_freshness_per_query: 10m
split_queries_by_interval: 15m
ingestion_rate_mb: 8
ingestion_burst_size_mb: 16
schema_config:
configs:
- from: "2021-08-01"
store: boltdb-shipper
object_store: s3
schema: v11
index:
prefix: loki_index_
period: 24h
common:
path_prefix: /var/loki
replication_factor: 3
storage:
s3:
s3: null
insecure: true
s3forcepathstyle: true
ring:
kvstore:
store: memberlist
ruler:
enable_api: true
enable_alertmanager_v2: true
alertmanager_url: http://kube-prometheus-stack-alertmanager:9093
storage:
type: local
local:
directory: /rules
rule_path: /tmp/scratch
ring:
kvstore:
store: memberlist
distributor:
ring:
kvstore:
store: memberlist
compactor:
working_directory: /var/loki/boltdb-shipper-compactor
shared_store: s3
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 2h
retention_delete_worker_count: 150
ingester:
max_chunk_age: 1h
lifecycler:
ring:
kvstore:
store: memberlist
analytics:
reporting_enabled: false
gateway:
enabled: true
replicas: 3
affinity: |
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
labelSelector:
matchLabels:
{{- include "loki.gatewaySelectorLabels" . | nindent 12 }}
topologyKey: kubernetes.io/hostname
ingress:
enabled: true
ingressClassName: "nginx"
hosts:
- host: &host "loki.${SECRET_CLUSTER_DOMAIN}"
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- *host
write:
replicas: 3
affinity: |
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
labelSelector:
matchLabels:
{{- include "loki.writeSelectorLabels" . | nindent 12 }}
topologyKey: kubernetes.io/hostname
persistence:
size: 10Gi
storageClass: rook-ceph-block
read:
replicas: 3
affinity: |
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
labelSelector:
matchLabels:
{{- include "loki.readSelectorLabels" . | nindent 12 }}
topologyKey: kubernetes.io/hostname
extraVolumeMounts:
- name: loki-rules
mountPath: /rules/fake
- name: loki-rules-tmp
mountPath: /tmp/scratch
- name: loki-tmp
mountPath: /tmp/loki-tmp
extraVolumes:
- name: loki-rules
emptyDir: {}
- name: loki-rules-tmp
emptyDir: {}
- name: loki-tmp
emptyDir: {}
persistence:
size: 10Gi
storageClass: rook-ceph-block
monitoring:
serviceMonitor:
enabled: false
metricsInstance:
enabled: false
selfMonitoring:
enabled: false
grafanaAgent:
installOperator: false
lokiCanary:
enabled: false
test:
enabled: false
valuesFrom:
- kind: ConfigMap
name: loki-chunks-bucket
valuesKey: BUCKET_NAME
targetPath: loki.structuredConfig.common.storage.s3.bucketnames
- kind: ConfigMap
name: loki-chunks-bucket
valuesKey: BUCKET_HOST
targetPath: loki.structuredConfig.common.storage.s3.endpoint
- kind: Secret
name: loki-chunks-bucket
valuesKey: AWS_ACCESS_KEY_ID
targetPath: loki.structuredConfig.common.storage.s3.access_key_id
- kind: Secret
name: loki-chunks-bucket
valuesKey: AWS_SECRET_ACCESS_KEY
targetPath: loki.structuredConfig.common.storage.s3.secret_access_key

View File

@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./object-bucket-claim.yaml
- ./config-map.yaml
- ./helmrelease.yaml

View File

@@ -0,0 +1,11 @@
---
apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: loki-chunks-bucket
namespace: monitoring
spec:
bucketName: loki-chunks
storageClassName: rook-ceph-bucket
additionalConfig:
maxSize: "50G"

View File

@@ -0,0 +1,25 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/kustomization_v1beta2.json
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: cluster-apps-loki-app
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
dependsOn:
- name: cluster-apps-rook-ceph-cluster
path: ./kubernetes/apps/monitoring/loki/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: loki
namespace: monitoring
interval: 30m
retryInterval: 1m
timeout: 5m

View File

@@ -0,0 +1,7 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
labels:
kustomize.toolkit.fluxcd.io/prune: disabled

View File

@@ -0,0 +1,34 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: &app smartctl-exporter
namespace: default
spec:
interval: 15m
chart:
spec:
chart: prometheus-smartctl-exporter
version: 0.3.1
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
fullnameOverride: *app
config:
devices:
- /dev/sda
- /dev/nvme0n1
serviceMonitor:
enabled: true
prometheusRules:
enabled: false

View File

@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./helmrelease.yaml

View File

@@ -0,0 +1,23 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/kustomization_v1beta2.json
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: cluster-apps-smartctl-exporter
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
path: ./kubernetes/apps/monitoring/smartctl-exporter/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: smartctl-exporter
namespace: monitoring
interval: 30m
retryInterval: 1m
timeout: 3m

View File

@@ -0,0 +1,123 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: thanos
namespace: monitoring
spec:
interval: 15m
chart:
spec:
chart: thanos
version: 11.6.5
sourceRef:
kind: HelmRepository
name: bitnami
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
image:
registry: quay.io
repository: thanos/thanos
tag: v0.29.0
objstoreConfig:
type: s3
query:
enabled: true
replicaCount: 2
podAntiAffinityPreset: hard
replicaLabels:
- replica
dnsDiscovery:
sidecarsService: kube-prometheus-stack-thanos-discovery
sidecarsNamespace: monitoring
stores:
- "dnssrv+_grpc._tcp.kube-prometheus-stack-thanos-discovery"
- "thanos-store.${SECRET_DOMAIN}:443"
ingress:
enabled: true
hostname: &host "thanos-query.${SECRET_CLUSTER_DOMAIN}"
annotations:
auth.home.arpa/enabled: "true"
ingressClassName: "nginx"
tls: true
extraTls:
- hosts:
- *host
resources:
requests:
cpu: 15m
memory: 64M
limits:
memory: 99M
queryFrontend:
enabled: true
bucketweb:
enabled: true
refresh: "10m"
compactor:
enabled: true
extraFlags:
- "--compact.concurrency"
- "4"
retentionResolutionRaw: 14d
retentionResolution5m: 14d
retentionResolution1h: 30d
ingress:
enabled: true
hostname: &host "thanos-compactor.${SECRET_CLUSTER_DOMAIN}"
ingressClassName: "nginx"
tls: true
extraTls:
- hosts:
- *host
persistence:
enabled: true
storageClass: "rook-ceph-block"
size: 15Gi
storegateway:
enabled: true
resources:
requests:
cpu: 23m
memory: 204M
limits:
memory: 226M
persistence:
enabled: true
storageClass: "rook-ceph-block"
size: 512Mi
ruler:
enabled: false
metrics:
enabled: true
serviceMonitor:
enabled: true
valuesFrom:
- kind: Secret
name: thanos
valuesKey: S3_BUCKET_NAME
targetPath: objstoreConfig.config.bucket
- kind: Secret
name: thanos
valuesKey: S3_BUCKET_HOST
targetPath: objstoreConfig.config.endpoint
- kind: Secret
name: thanos
valuesKey: S3_BUCKET_REGION
targetPath: objstoreConfig.config.region
- kind: Secret
name: thanos
valuesKey: S3_ACCESS_KEY
targetPath: objstoreConfig.config.access_key
- kind: Secret
name: thanos
valuesKey: S3_SECRET_KEY
targetPath: objstoreConfig.config.secret_key

View File

@@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./secret.sops.yaml
- ./helmrelease.yaml

View File

@@ -0,0 +1,67 @@
# Development
## thanos
### S3 Configuration
1. Create `~/.mc/config.json`
```json
{
"version": "10",
"aliases": {
"minio": {
"url": "https://s3.<domain>",
"accessKey": "<access-key>",
"secretKey": "<secret-key>",
"api": "S3v4",
"path": "auto"
}
}
}
```
2. Create the thanos user and password
```sh
mc admin user add minio thanos <super-secret-password>
```
3. Create the thanos bucket
```sh
mc mb minio/thanos
```
4. Create `thanos-user-policy.json`
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"s3:ListBucket",
"s3:PutObject",
"s3:GetObject",
"s3:DeleteObject"
],
"Effect": "Allow",
"Resource": ["arn:aws:s3:::thanos/*", "arn:aws:s3:::thanos"],
"Sid": ""
}
]
}
```
5. Apply the bucket policies
```sh
mc admin policy add minio thanos-private thanos-user-policy.json
```
6. Associate private policy with the user
```sh
mc admin policy set minio thanos-private user=thanos
```

View File

@@ -0,0 +1,32 @@
apiVersion: v1
kind: Secret
type: Opaque
metadata:
name: thanos
namespace: monitoring
stringData:
S3_BUCKET_NAME: ENC[AES256_GCM,data:0q5tjzGN,iv:RYjlKFAJpR6NSjimSAf8JrS2t1mUGSCAjusrYhTyiuw=,tag:AAIwBbmYoflm5M1EVbHM4A==,type:str]
S3_BUCKET_HOST: ENC[AES256_GCM,data:/9U/cHXmbGnbDCNm37zy0PzRbt5RI2LN7g==,iv:LLCrwkc6k3mXbJVWa2FivgEsbQKa9OyJWpe47BwExB8=,tag:qji0SWdaSgp8tNANSSB9Hg==,type:str]
S3_BUCKET_REGION: ""
S3_ACCESS_KEY: ENC[AES256_GCM,data:zTvAiBiukR1RP5eACMfgBsoTbwI=,iv:IIMUgN5SO+0i9/8w8QHpRgiTzQsOELqgMZAsARvcZJQ=,tag:lIvDTJ8i5UiOkZRMLrgV7g==,type:str]
S3_SECRET_KEY: ENC[AES256_GCM,data:mUHk2N4tcbh3si26uZx3J/gkXWH4gqk4/vJfJ3J03mreNsD8VlNePw==,iv:+wS4yLwKrFALFF51BLxXFpP0ROlR7qdBTVpFCJ/tizM=,tag:VJr9s444GB5GPft/8897mw==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age:
- recipient: age1hhurqwmfvl9m3vh3hk8urulfzcdsrep2ax2neazqt435yhpamu3qj20asg
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAxSHQ5b3RRYjdGd3JYQkxh
cnRBTlJuMm9NTU96TFRpSEg0K2UrdnJ1V1VjCkZpRmwvSmZ3ZHJNaGNNS21mUytt
VXRMVzhSemx4NGZYSUtCS3g3Q281dXcKLS0tIC94NCtGVWF2U055NEZJTmtpenVM
L3c2WElEOU4rS0hrU1NPQ1NPZitDVDgKaN3P5xK1O1i9lTSAGJU+GIxbIoTb5OMO
if3medB2nPLEt5BUY2datTbswXiT3E9rFyka/Maq6afZjFiixK5mFQ==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2022-11-22T22:26:04Z"
mac: ENC[AES256_GCM,data:ANDShRftczGroCYNFKa/WdF22PgZ9yA6xhxdfe7/HHs0vQU48Q8nOrOT66P+8HDRV63I5ddodOurVtztFyGc8I0YdU2Bg1P2rnEmStfJsGGidTIqNloopCArsAH2UJj/fxwUA3dxswFURvgIagpjfdWHYGT2vzma44CORrk5vpU=,iv:KiFlpjLy+hj6V2dUoZeBdr3eq22So4G2oAA2QutF3UU=,tag:fkpjbQFU0Habj3d+6mNZLQ==,type:str]
pgp: []
encrypted_regex: ^(data|stringData)$
version: 3.7.3

View File

@@ -0,0 +1,25 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/kustomization_v1beta2.json
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: cluster-apps-thanos-app
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
dependsOn:
- name: cluster-apps-kube-prometheus-stack-app
path: ./kubernetes/apps/monitoring/thanos/app
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: thanos
namespace: monitoring
interval: 30m
retryInterval: 1m
timeout: 5m

View File

@@ -0,0 +1,85 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: &app vector-agent
namespace: monitoring
spec:
interval: 15m
chart:
spec:
chart: vector
version: 0.18.0
sourceRef:
kind: HelmRepository
name: vector
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
dependsOn:
- name: loki
namespace: monitoring
- name: vector-aggregator
namespace: monitoring
values:
image:
repository: timberio/vector
tag: 0.26.0-debian
role: "Agent"
podAnnotations:
configmap.reloader.stakater.com/reload: vector-agent
customConfig:
data_dir: /vector-data-dir
api:
enabled: false
# Sources
sources:
kubernetes_logs:
type: kubernetes_logs
talos_kernel_logs:
type: socket
mode: udp
address: 127.0.0.1:12000
talos_service_logs:
type: socket
mode: udp
address: 127.0.0.1:12001
# Sinks
sinks:
kubernetes_sink:
type: vector
inputs:
- kubernetes_logs
address: "vector-aggregator.monitoring:6000"
version: "2"
talos_kernel_sink:
type: vector
inputs:
- talos_kernel_logs
address: "vector-aggregator.monitoring:6050"
version: "2"
talos_service_sink:
type: vector
inputs:
- talos_service_logs
address: "vector-aggregator.monitoring:6051"
version: "2"
podMonitor:
enabled: true
resources:
requests:
cpu: 23m
memory: 249M
limits:
memory: 918M
service:
enabled: false
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule

View File

@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./helmrelease.yaml

View File

@@ -0,0 +1,160 @@
data_dir: /vector-data-dir
api:
enabled: true
address: 0.0.0.0:8686
enrichment_tables:
geoip_table:
type: geoip
path: /usr/share/GeoIP/GeoLite2-City.mmdb
# Sources
sources:
kubernetes_source:
address: 0.0.0.0:6000
type: vector
version: "2"
opnsense_logs:
address: 0.0.0.0:6001
type: vector
version: "2"
journald_source:
type: vector
address: 0.0.0.0:6002
version: "2"
vector_metrics:
type: internal_metrics
talos_kernel_logs:
address: 0.0.0.0:6050
type: socket
mode: udp
max_length: 102400
decoding:
codec: json
host_key: __host
talos_service_logs:
address: 0.0.0.0:6051
type: socket
mode: udp
max_length: 102400
decoding:
codec: json
host_key: __host
# Transformations
transforms:
talos_kernel_logs_xform:
type: remap
inputs:
- talos_kernel_logs
source: |-
.__host = replace!(.__host, "192.168.8.101", "talos-node-1")
.__host = replace(.__host, "192.168.8.102", "talos-node-2")
.__host = replace(.__host, "192.168.8.103", "talos-node-3")
.__host = replace(.__host, "192.168.8.104", "talos-node-4")
talos_service_logs_xform:
type: remap
inputs:
- talos_service_logs
source: |-
.__host = replace!(.__host, "192.168.8.101", "talos-node-1")
.__host = replace(.__host, "192.168.8.102", "talos-node-2")
.__host = replace(.__host, "192.168.8.103", "talos-node-3")
.__host = replace(.__host, "192.168.8.104", "talos-node-4")
kubernetes_remap:
type: remap
inputs:
- kubernetes_source
source: |
# Standardize 'app' index
.custom_app_name = .pod_labels."app.kubernetes.io/name" || .pod_labels.app || .pod_labels."k8s-app" || "unknown"
# Sinks
sinks:
loki_kubernetes:
type: loki
inputs:
- kubernetes_source
endpoint: http://loki-gateway.monitoring.svc.cluster.local:80
encoding:
codec: json
batch:
max_bytes: 2049000
out_of_order_action: rewrite_timestamp
remove_label_fields: true
remove_timestamp: true
labels:
k8s_app: '{{ custom_app_name }}'
k8s_container: '{{ kubernetes.container_name }}'
k8s_filename: '{{ kubernetes.file }}'
k8s_instance: '{{ kubernetes.pod_labels."app.kubernetes.io/instance" }}'
k8s_namespace: '{{ kubernetes.pod_namespace }}'
k8s_node: '{{ kubernetes.pod_node_name }}'
k8s_pod: '{{ kubernetes.pod_name }}'
loki_opnsense:
type: loki
inputs:
- opnsense_logs
endpoint: http://loki-gateway.monitoring.svc.cluster.local:80
encoding:
codec: json
batch:
max_bytes: 400000
out_of_order_action: rewrite_timestamp
labels:
hostname: '{{ host }}'
syslog_identifier: '{{SYSLOG_IDENTIFIER }}'
loki_journal:
type: loki
inputs:
- journald_source
endpoint: http://loki-gateway.monitoring.svc.cluster.local:80
encoding:
codec: json
batch:
max_bytes: 2049000
out_of_order_action: accept
remove_label_fields: true
remove_timestamp: true
labels:
hostname: '{{ host }}'
talos_kernel:
type: loki
inputs:
- talos_kernel_logs_xform
endpoint: http://loki-gateway.monitoring.svc.cluster.local:80
encoding:
codec: json
except_fields:
- __host
batch:
max_bytes: 1048576
out_of_order_action: rewrite_timestamp
labels:
hostname: '{{ __host }}'
service: '{{ facility }}'
talos_service:
type: loki
inputs:
- talos_service_logs_xform
endpoint: http://loki-gateway.monitoring.svc.cluster.local:80
encoding:
codec: json
except_fields:
- __host
batch:
max_bytes: 524288
out_of_order_action: rewrite_timestamp
labels:
hostname: '{{ __host }}'
service: "talos-service"
namespace: "talos:service"

View File

@@ -0,0 +1,74 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: &app vector-aggregator
namespace: monitoring
spec:
interval: 15m
chart:
spec:
chart: app-template
version: 1.2.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 3
upgrade:
remediation:
retries: 3
values:
controller:
strategy: RollingUpdate
annotations:
reloader.stakater.com/auto: "true"
image:
repository: docker.io/timberio/vector
tag: 0.26.0-debian
args: ["--config", "/etc/vector/vector.yaml"]
service:
main:
type: LoadBalancer
loadBalancerIP: "${CLUSTER_LB_VECTOR}"
externalTrafficPolicy: Local
ports:
http:
port: 8686
kubernetes-logs:
enabled: true
port: 6000
opnsense-logs:
enabled: true
port: 6001
journald-logs:
enabled: true
port: 6002
talos-kernel:
enabled: true
port: 6050
protocol: UDP
talos-service:
enabled: true
port: 6051
protocol: UDP
persistence:
config:
enabled: true
type: configMap
name: vector-aggregator-configmap
subPath: vector.yaml
mountPath: /etc/vector/vector.yaml
readOnly: true
data:
enabled: true
type: emptyDir
mountPath: /vector-data-dir
geoip:
enabled: true
type: emptyDir
mountPath: /usr/share/GeoIP

View File

@@ -0,0 +1,16 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./secret.sops.yaml
- ./helmrelease.yaml
patchesStrategicMerge:
- ./patches/geoip.yaml
configMapGenerator:
- name: vector-aggregator-configmap
files:
- vector.yaml=./config/vector.yaml
generatorOptions:
disableNameSuffixHash: true

View File

@@ -0,0 +1,25 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrelease_v2beta1.json
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: vector-aggregator
namespace: monitoring
spec:
values:
initContainers:
init-geoip:
image: docker.io/maxmindinc/geoipupdate:v4.10
env:
- name: GEOIPUPDATE_EDITION_IDS
value: GeoLite2-City
- name: GEOIPUPDATE_FREQUENCY
value: "0"
- name: GEOIPUPDATE_VERBOSE
value: "true"
envFrom:
- secretRef:
name: vector-aggregator-secret
volumeMounts:
- name: geoip
mountPath: /usr/share/GeoIP

View File

@@ -0,0 +1,30 @@
# yamllint disable
apiVersion: v1
kind: Secret
metadata:
name: vector-aggregator-secret
namespace: monitoring
type: Opaque
stringData:
GEOIPUPDATE_ACCOUNT_ID: ENC[AES256_GCM,data:vBU+Iwuv,iv:cK005QUa8iKK+2M2OsKvCXJAkUyhUgReDw8hBBhcNLQ=,tag:k3vrqqyMkp8cnGWfeLbu0A==,type:str]
GEOIPUPDATE_LICENSE_KEY: ENC[AES256_GCM,data:XuCipRddaBHI2umUb1+SPA==,iv:gwbTaK5KCmTF+8mQNjkmLkTdSqz2uFAINo6rJ6F2R4U=,tag:cvevnXWf7xFcdMkwKRF4pQ==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age:
- recipient: age1hhurqwmfvl9m3vh3hk8urulfzcdsrep2ax2neazqt435yhpamu3qj20asg
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBlUHFQcWJaRTlGT2RLK3R3
YlJDVTMvRThTR1dXdGN5a1RQd2FxTy84SFdNCnFEWEVpU1o3Y2hISkJrNzBMZFYr
emZyeW9ySnZEYnlvMWFQeXpYeHMzeUkKLS0tIEtPTm9JM0o0ZVBKN05oa0JSbHBL
b2pLSXUyS2lCbmZYYmk0WnVpRU9xRUUKAMUoEprOuR/xgtHZDBmDNTrLEyD9vbeb
dvQZ/7KrgRKVq4Eq3wI254CvajnNs3mACp175DhTsLyX0hBO77FZ2A==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2022-09-15T03:04:22Z"
mac: ENC[AES256_GCM,data:rDDMbtb8xSULRF6RUSNl+Pw4KIiCXJZ5kQ70U5Ap3oB3Ci6miw0EXAVCZC699iJ2YS8cqhUe6VwRCdVn+1bYxz4Dbjm1/dAvkXNbBruhe6KhwSpF/sx6viVH2238ReG+jHr7l/AXVDYyWCxH7hzHWn2f2hTqncpuvr1uyyhU0kg=,iv:JN6F4XDLypDyw9UX9WnhJu+UZzR/A9IW+8NtP4QXnWU=,tag:s+F3V/DNNlvTjFWgjxefoA==,type:str]
pgp: []
encrypted_regex: ^(data|stringData)$
version: 3.7.3

View File

@@ -0,0 +1,50 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/kustomization_v1beta2.json
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: cluster-apps-vector-aggregator
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
dependsOn:
- name: cluster-apps-loki-app
path: ./kubernetes/apps/monitoring/vector/aggregator
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: vector-aggregator
namespace: monitoring
interval: 30m
retryInterval: 1m
timeout: 3m
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/kustomization_v1beta2.json
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: cluster-apps-vector-agent
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
dependsOn:
- name: cluster-apps-vector-aggregator
path: ./kubernetes/apps/monitoring/vector/agent
prune: true
sourceRef:
kind: GitRepository
name: home-ops-kubernetes
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
name: vector-agent
namespace: monitoring
interval: 30m
retryInterval: 1m
timeout: 3m