diff --git a/cluster/apps/authentication/authelia/config/configuration.yml b/cluster/apps/authentication/authelia/config/configuration.yml index cf345bbf3..4a479b3ff 100644 --- a/cluster/apps/authentication/authelia/config/configuration.yml +++ b/cluster/apps/authentication/authelia/config/configuration.yml @@ -32,12 +32,15 @@ access_control: subject: ["group:admins", "group:users"] networks: - private + # Deny public resources + - domain: ["navidrome.${SECRET_CLUSTER_DOMAIN}"] + resources: ["^/metrics.*$"] + policy: deny # Two factors auth for WAN - domain: - "*.${SECRET_CLUSTER_DOMAIN}" subject: ["group:admins", "group:users"] policy: two_factor - identity_providers: oidc: cors: diff --git a/cluster/apps/authentication/authelia/helm-release.yaml b/cluster/apps/authentication/authelia/helm-release.yaml index 8d860ed97..cba996092 100644 --- a/cluster/apps/authentication/authelia/helm-release.yaml +++ b/cluster/apps/authentication/authelia/helm-release.yaml @@ -47,6 +47,14 @@ spec: metrics: enabled: true port: 8080 + monitor: + enabled: true + endpoints: + - port: metrics + scheme: http + path: /metrics + interval: 1m + scrapeTimeout: 10s ingress: main: enabled: true diff --git a/cluster/apps/authentication/authelia/kustomization.yaml b/cluster/apps/authentication/authelia/kustomization.yaml index 4ae05b305..891e18284 100644 --- a/cluster/apps/authentication/authelia/kustomization.yaml +++ b/cluster/apps/authentication/authelia/kustomization.yaml @@ -5,7 +5,6 @@ namespace: default resources: - secret.sops.yaml - helm-release.yaml - - service-monitor.yaml patchesStrategicMerge: - patches/env.yaml - patches/postgres.yaml diff --git a/cluster/apps/authentication/authelia/service-monitor.yaml b/cluster/apps/authentication/authelia/service-monitor.yaml deleted file mode 100644 index 174542950..000000000 --- a/cluster/apps/authentication/authelia/service-monitor.yaml +++ /dev/null @@ -1,19 +0,0 @@ ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: &app authelia - namespace: default - labels: &labels - app.kubernetes.io/instance: *app - app.kubernetes.io/name: *app -spec: - selector: - matchLabels: - <<: *labels - endpoints: - - port: metrics - scheme: http - path: /metrics - interval: 1m - scrapeTimeout: 10s diff --git a/cluster/apps/home-automation/zigbee2mqtt-exporter/helm-release.yaml b/cluster/apps/home-automation/zigbee2mqtt-exporter/helm-release.yaml index e03abcbc3..9ba4eacfa 100644 --- a/cluster/apps/home-automation/zigbee2mqtt-exporter/helm-release.yaml +++ b/cluster/apps/home-automation/zigbee2mqtt-exporter/helm-release.yaml @@ -46,6 +46,8 @@ spec: ports: http: port: *port + monitor: + enabled: true podSecurityContext: runAsUser: 1000 runAsGroup: 1000 diff --git a/cluster/apps/home-automation/zigbee2mqtt-exporter/kustomization.yaml b/cluster/apps/home-automation/zigbee2mqtt-exporter/kustomization.yaml index 841fd0062..d1c0a463d 100644 --- a/cluster/apps/home-automation/zigbee2mqtt-exporter/kustomization.yaml +++ b/cluster/apps/home-automation/zigbee2mqtt-exporter/kustomization.yaml @@ -3,4 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - helm-release.yaml - - prometheus.yaml + - prometheus-rule.yaml diff --git a/cluster/apps/home-automation/zigbee2mqtt-exporter/prometheus.yaml b/cluster/apps/home-automation/zigbee2mqtt-exporter/prometheus-rule.yaml similarity index 73% rename from cluster/apps/home-automation/zigbee2mqtt-exporter/prometheus.yaml rename to cluster/apps/home-automation/zigbee2mqtt-exporter/prometheus-rule.yaml index 1e3697f2b..f7d49dee9 100644 --- a/cluster/apps/home-automation/zigbee2mqtt-exporter/prometheus.yaml +++ b/cluster/apps/home-automation/zigbee2mqtt-exporter/prometheus-rule.yaml @@ -1,24 +1,5 @@ --- apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: &app zigbee2mqtt-exporter - namespace: default - labels: &labels - app.kubernetes.io/instance: *app - app.kubernetes.io/name: *app -spec: - selector: - matchLabels: - <<: *labels - endpoints: - - port: http - scheme: http - path: /metrics - interval: 1m - scrapeTimeout: 10s ---- -apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: zigbee2mqtt-exporter diff --git a/cluster/apps/home-automation/zwavejs2mqtt/helm-release.yaml b/cluster/apps/home-automation/zwavejs2mqtt/helm-release.yaml index 12cde53ec..10ac7012f 100644 --- a/cluster/apps/home-automation/zwavejs2mqtt/helm-release.yaml +++ b/cluster/apps/home-automation/zwavejs2mqtt/helm-release.yaml @@ -40,6 +40,8 @@ spec: websocket: enabled: true port: 3000 + monitor: + enabled: true probes: liveness: &probes enabled: true diff --git a/cluster/apps/kube-tools/intel-gpu-exporter/helm-release.yaml b/cluster/apps/kube-tools/intel-gpu-exporter/helm-release.yaml index 8bd41bb3b..1e2ce6dda 100644 --- a/cluster/apps/kube-tools/intel-gpu-exporter/helm-release.yaml +++ b/cluster/apps/kube-tools/intel-gpu-exporter/helm-release.yaml @@ -35,6 +35,17 @@ spec: ports: http: port: 8080 + monitor: + enabled: true + endpoints: + - port: http + scheme: http + path: /metrics + interval: 1m + scrapeTimeout: 10s + relabelings: + - sourceLabels: [__meta_kubernetes_pod_node_name] + targetLabel: node securityContext: privileged: true affinity: diff --git a/cluster/apps/kube-tools/intel-gpu-exporter/kustomization.yaml b/cluster/apps/kube-tools/intel-gpu-exporter/kustomization.yaml index d4eef3d12..2fa2de20c 100644 --- a/cluster/apps/kube-tools/intel-gpu-exporter/kustomization.yaml +++ b/cluster/apps/kube-tools/intel-gpu-exporter/kustomization.yaml @@ -3,4 +3,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - helm-release.yaml - - service-monitor.yaml diff --git a/cluster/apps/kube-tools/intel-gpu-exporter/service-monitor.yaml b/cluster/apps/kube-tools/intel-gpu-exporter/service-monitor.yaml deleted file mode 100644 index 3c75fdac4..000000000 --- a/cluster/apps/kube-tools/intel-gpu-exporter/service-monitor.yaml +++ /dev/null @@ -1,21 +0,0 @@ ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: &app intel-gpu-exporter - namespace: default - labels: &labels - app.kubernetes.io/instance: *app - app.kubernetes.io/name: *app -spec: - selector: - matchLabels: - <<: *labels - endpoints: - - port: http - interval: 1m - scrapeTimeout: 10s - path: /metrics - relabelings: - - sourceLabels: [__meta_kubernetes_pod_node_name] - targetLabel: node diff --git a/cluster/apps/media-servers/navidrome/helm-release.yaml b/cluster/apps/media-servers/navidrome/helm-release.yaml index f820718e0..3c0960c96 100644 --- a/cluster/apps/media-servers/navidrome/helm-release.yaml +++ b/cluster/apps/media-servers/navidrome/helm-release.yaml @@ -32,6 +32,7 @@ spec: ND_LOGLEVEL: info ND_MUSICFOLDER: /mnt/storage/music/Artistes ND_PORT: &port 80 + ND_PROMETHEUS_ENABLED: "true" ND_REVERSEPROXYUSERHEADER: "Remote-User" ND_REVERSEPROXYWHITELIST: "${NET_POD_CIDR}" ND_SCANSCHEDULE: "@every 1h" @@ -41,6 +42,8 @@ spec: ports: http: port: *port + monitor: + enabled: true ingress: main: enabled: true diff --git a/cluster/apps/networking/smtp-relay/helm-release.yaml b/cluster/apps/networking/smtp-relay/helm-release.yaml index b5f8148d3..cb030847e 100644 --- a/cluster/apps/networking/smtp-relay/helm-release.yaml +++ b/cluster/apps/networking/smtp-relay/helm-release.yaml @@ -48,6 +48,14 @@ spec: metrics: enabled: true port: 9749 + monitor: + enabled: true + endpoints: + - port: metrics + scheme: http + path: /metrics + interval: 1m + scrapeTimeout: 10s persistence: config: enabled: true diff --git a/cluster/apps/networking/smtp-relay/kustomization.yaml b/cluster/apps/networking/smtp-relay/kustomization.yaml index 5e9dd15ca..917d59d93 100644 --- a/cluster/apps/networking/smtp-relay/kustomization.yaml +++ b/cluster/apps/networking/smtp-relay/kustomization.yaml @@ -4,7 +4,6 @@ kind: Kustomization resources: - helm-release.yaml - secret.sops.yaml - - service-monitor.yaml namespace: default configMapGenerator: - name: smtp-relay diff --git a/cluster/apps/networking/smtp-relay/service-monitor.yaml b/cluster/apps/networking/smtp-relay/service-monitor.yaml deleted file mode 100644 index d50ab19b8..000000000 --- a/cluster/apps/networking/smtp-relay/service-monitor.yaml +++ /dev/null @@ -1,19 +0,0 @@ ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: &app smtp-relay - namespace: default - labels: &labels - app.kubernetes.io/instance: *app - app.kubernetes.io/name: *app -spec: - selector: - matchLabels: - <<: *labels - endpoints: - - port: metrics - scheme: http - path: /metrics - interval: 1m - scrapeTimeout: 10s diff --git a/cluster/core/flux-system/kustomization.yaml b/cluster/core/flux-system/kustomization.yaml index 1c7549edc..f69a08e4a 100644 --- a/cluster/core/flux-system/kustomization.yaml +++ b/cluster/core/flux-system/kustomization.yaml @@ -3,3 +3,5 @@ kind: Kustomization resources: - notifications - webhook + - pod-monitor.yaml + - prometheus-rule.yaml diff --git a/cluster/core/flux-system/pod-monitor.yaml b/cluster/core/flux-system/pod-monitor.yaml new file mode 100644 index 000000000..dc556dc08 --- /dev/null +++ b/cluster/core/flux-system/pod-monitor.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: flux-system + namespace: flux-system + labels: + app.kubernetes.io/part-of: flux +spec: + namespaceSelector: + matchNames: + - flux-system + selector: + matchExpressions: + - key: app + operator: Exists + podMetricsEndpoints: + - port: http-prom + honorLabels: true diff --git a/cluster/core/flux-system/prometheus-rule.yaml b/cluster/core/flux-system/prometheus-rule.yaml new file mode 100644 index 000000000..ee27fe288 --- /dev/null +++ b/cluster/core/flux-system/prometheus-rule.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: flux + namespace: flux-system +spec: + groups: + - name: flux.rules + rules: + - alert: FluxComponentAbsent + annotations: + summary: Flux component has disappeared from Prometheus target discovery. + expr: | + absent(up{job=~".*flux-system.*"} == 1) + for: 15m + labels: + severity: critical