mirror of
https://github.com/auricom/home-cluster.git
synced 2025-09-17 18:24:14 +02:00
feat: kube scrape stuff
This commit is contained in:
@@ -153,28 +153,6 @@ spec:
|
||||
existingSecret:
|
||||
name: thanos-objstore-secret
|
||||
key: objstore.yml
|
||||
additionalScrapeConfigs:
|
||||
- job_name: "opnsense"
|
||||
scrape_interval: 60s
|
||||
metrics_path: "/metrics"
|
||||
static_configs:
|
||||
- targets: ["${LOCAL_LAN_OPNSENSE}:9273"]
|
||||
labels:
|
||||
app: "opnsense"
|
||||
- job_name: "truenas"
|
||||
scrape_interval: 60s
|
||||
metrics_path: "/metrics"
|
||||
static_configs:
|
||||
- targets: ["192.168.9.10:9273"]
|
||||
labels:
|
||||
app: "truenas"
|
||||
- job_name: "truenas-remote"
|
||||
scrape_interval: 60s
|
||||
metrics_path: "/metrics"
|
||||
static_configs:
|
||||
- targets: ["${LOCAL_LAN_TRUENAS_REMOTE}:9273"]
|
||||
labels:
|
||||
app: "truenas-remote"
|
||||
thanosService:
|
||||
enabled: true
|
||||
thanosServiceMonitor:
|
||||
|
@@ -5,3 +5,5 @@ kind: Kustomization
|
||||
namespace: monitoring
|
||||
resources:
|
||||
- ./helmrelease.yaml
|
||||
- ./prometheusrule.yaml
|
||||
- ./scrapeconfig.yaml
|
||||
|
@@ -0,0 +1,34 @@
|
||||
---
|
||||
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: miscellaneous-rules
|
||||
spec:
|
||||
groups:
|
||||
- name: dockerhub
|
||||
rules:
|
||||
- alert: BootstrapRateLimitRisk
|
||||
annotations:
|
||||
summary: Kubernetes cluster at risk of being rate limited by dockerhub on bootstrap
|
||||
expr: count(time() - container_last_seen{image=~"(docker.io).*",container!=""} < 30) > 100
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: oom
|
||||
rules:
|
||||
- alert: OOMKilled
|
||||
annotations:
|
||||
summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
|
||||
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
|
||||
labels:
|
||||
severity: critical
|
||||
- name: zfs
|
||||
rules:
|
||||
- alert: ZfsUnexpectedPoolState
|
||||
annotations:
|
||||
summary: ZFS pool {{$labels.zpool}} on {{$labels.instance}} is in a unexpected state {{$labels.state}}
|
||||
expr: node_zfs_zpool_state{state!="online"} > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
@@ -0,0 +1,59 @@
|
||||
---
|
||||
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ScrapeConfig
|
||||
metadata:
|
||||
name: &name node-exporter
|
||||
spec:
|
||||
staticConfigs:
|
||||
- targets:
|
||||
- pikvm.${SECRET_INTERNAL_DOMAIN}:9100
|
||||
- opnsense.${SECRET_INTERNAL_DOMAIN}:9273
|
||||
- storage.${SECRET_INTERNAL_DOMAIN}:9100
|
||||
metricsPath: /metrics
|
||||
relabelings:
|
||||
- action: replace
|
||||
targetLabel: job
|
||||
replacement: *name
|
||||
---
|
||||
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ScrapeConfig
|
||||
metadata:
|
||||
name: &name podman-exporter
|
||||
spec:
|
||||
staticConfigs:
|
||||
- targets: ["storage.${SECRET_INTERNAL_DOMAIN}:9882"]
|
||||
metricsPath: /metrics
|
||||
relabelings:
|
||||
- action: replace
|
||||
targetLabel: job
|
||||
replacement: *name
|
||||
---
|
||||
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ScrapeConfig
|
||||
metadata:
|
||||
name: &name pikvm
|
||||
spec:
|
||||
staticConfigs:
|
||||
- targets: ["pikvm.${SECRET_INTERNAL_DOMAIN}"]
|
||||
metricsPath: /api/export/prometheus/metrics
|
||||
relabelings:
|
||||
- action: replace
|
||||
targetLabel: job
|
||||
replacement: *name
|
||||
---
|
||||
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ScrapeConfig
|
||||
metadata:
|
||||
name: &name zrepl
|
||||
spec:
|
||||
staticConfigs:
|
||||
- targets: ["storage.${SECRET_INTERNAL_DOMAIN}:9811"]
|
||||
metricsPath: /metrics
|
||||
relabelings:
|
||||
- action: replace
|
||||
targetLabel: job
|
||||
replacement: *name
|
Reference in New Issue
Block a user