mirror of
https://github.com/auricom/home-cluster.git
synced 2025-09-17 18:24:14 +02:00
feat: kube scrape stuff
This commit is contained in:
@@ -153,28 +153,6 @@ spec:
|
|||||||
existingSecret:
|
existingSecret:
|
||||||
name: thanos-objstore-secret
|
name: thanos-objstore-secret
|
||||||
key: objstore.yml
|
key: objstore.yml
|
||||||
additionalScrapeConfigs:
|
|
||||||
- job_name: "opnsense"
|
|
||||||
scrape_interval: 60s
|
|
||||||
metrics_path: "/metrics"
|
|
||||||
static_configs:
|
|
||||||
- targets: ["${LOCAL_LAN_OPNSENSE}:9273"]
|
|
||||||
labels:
|
|
||||||
app: "opnsense"
|
|
||||||
- job_name: "truenas"
|
|
||||||
scrape_interval: 60s
|
|
||||||
metrics_path: "/metrics"
|
|
||||||
static_configs:
|
|
||||||
- targets: ["192.168.9.10:9273"]
|
|
||||||
labels:
|
|
||||||
app: "truenas"
|
|
||||||
- job_name: "truenas-remote"
|
|
||||||
scrape_interval: 60s
|
|
||||||
metrics_path: "/metrics"
|
|
||||||
static_configs:
|
|
||||||
- targets: ["${LOCAL_LAN_TRUENAS_REMOTE}:9273"]
|
|
||||||
labels:
|
|
||||||
app: "truenas-remote"
|
|
||||||
thanosService:
|
thanosService:
|
||||||
enabled: true
|
enabled: true
|
||||||
thanosServiceMonitor:
|
thanosServiceMonitor:
|
||||||
|
@@ -5,3 +5,5 @@ kind: Kustomization
|
|||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
resources:
|
resources:
|
||||||
- ./helmrelease.yaml
|
- ./helmrelease.yaml
|
||||||
|
- ./prometheusrule.yaml
|
||||||
|
- ./scrapeconfig.yaml
|
||||||
|
@@ -0,0 +1,34 @@
|
|||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: miscellaneous-rules
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: dockerhub
|
||||||
|
rules:
|
||||||
|
- alert: BootstrapRateLimitRisk
|
||||||
|
annotations:
|
||||||
|
summary: Kubernetes cluster at risk of being rate limited by dockerhub on bootstrap
|
||||||
|
expr: count(time() - container_last_seen{image=~"(docker.io).*",container!=""} < 30) > 100
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- name: oom
|
||||||
|
rules:
|
||||||
|
- alert: OOMKilled
|
||||||
|
annotations:
|
||||||
|
summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
|
||||||
|
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- name: zfs
|
||||||
|
rules:
|
||||||
|
- alert: ZfsUnexpectedPoolState
|
||||||
|
annotations:
|
||||||
|
summary: ZFS pool {{$labels.zpool}} on {{$labels.instance}} is in a unexpected state {{$labels.state}}
|
||||||
|
expr: node_zfs_zpool_state{state!="online"} > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
@@ -0,0 +1,59 @@
|
|||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||||
|
apiVersion: monitoring.coreos.com/v1alpha1
|
||||||
|
kind: ScrapeConfig
|
||||||
|
metadata:
|
||||||
|
name: &name node-exporter
|
||||||
|
spec:
|
||||||
|
staticConfigs:
|
||||||
|
- targets:
|
||||||
|
- pikvm.${SECRET_INTERNAL_DOMAIN}:9100
|
||||||
|
- opnsense.${SECRET_INTERNAL_DOMAIN}:9273
|
||||||
|
- storage.${SECRET_INTERNAL_DOMAIN}:9100
|
||||||
|
metricsPath: /metrics
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: job
|
||||||
|
replacement: *name
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||||
|
apiVersion: monitoring.coreos.com/v1alpha1
|
||||||
|
kind: ScrapeConfig
|
||||||
|
metadata:
|
||||||
|
name: &name podman-exporter
|
||||||
|
spec:
|
||||||
|
staticConfigs:
|
||||||
|
- targets: ["storage.${SECRET_INTERNAL_DOMAIN}:9882"]
|
||||||
|
metricsPath: /metrics
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: job
|
||||||
|
replacement: *name
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||||
|
apiVersion: monitoring.coreos.com/v1alpha1
|
||||||
|
kind: ScrapeConfig
|
||||||
|
metadata:
|
||||||
|
name: &name pikvm
|
||||||
|
spec:
|
||||||
|
staticConfigs:
|
||||||
|
- targets: ["pikvm.${SECRET_INTERNAL_DOMAIN}"]
|
||||||
|
metricsPath: /api/export/prometheus/metrics
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: job
|
||||||
|
replacement: *name
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||||
|
apiVersion: monitoring.coreos.com/v1alpha1
|
||||||
|
kind: ScrapeConfig
|
||||||
|
metadata:
|
||||||
|
name: &name zrepl
|
||||||
|
spec:
|
||||||
|
staticConfigs:
|
||||||
|
- targets: ["storage.${SECRET_INTERNAL_DOMAIN}:9811"]
|
||||||
|
metricsPath: /metrics
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: job
|
||||||
|
replacement: *name
|
Reference in New Issue
Block a user