new talos cluster

This commit is contained in:
auricom
2022-11-19 04:47:32 +01:00
parent 42346bd99b
commit 4ac38f95e9
548 changed files with 1642 additions and 2331 deletions

View File

@@ -0,0 +1,84 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: vector-agent
namespace: monitoring
spec:
interval: 30m
chart:
spec:
chart: vector
version: 0.17.0
sourceRef:
kind: HelmRepository
name: vector
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 5
upgrade:
remediation:
retries: 5
dependsOn:
- name: loki
namespace: monitoring
- name: vector-aggregator
namespace: monitoring
values:
image:
repository: timberio/vector
tag: 0.25.1-debian
role: "Agent"
podAnnotations:
configmap.reloader.stakater.com/reload: vector-agent
customConfig:
data_dir: /vector-data-dir
api:
enabled: false
# Sources
sources:
kubernetes_logs:
type: kubernetes_logs
talos_kernel_logs:
type: socket
mode: udp
address: 127.0.0.1:12000
talos_service_logs:
type: socket
mode: udp
address: 127.0.0.1:12001
# Sinks
sinks:
kubernetes_sink:
type: vector
inputs:
- kubernetes_logs
address: "vector-aggregator.monitoring:6000"
version: "2"
talos_kernel_sink:
type: vector
inputs:
- talos_kernel_logs
address: "vector-aggregator.monitoring:6050"
version: "2"
talos_service_sink:
type: vector
inputs:
- talos_service_logs
address: "vector-aggregator.monitoring:6051"
version: "2"
podMonitor:
enabled: true
resources:
requests:
cpu: 23m
memory: 249M
limits:
memory: 918M
service:
enabled: false
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule

View File

@@ -0,0 +1,5 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helm-release.yaml

View File

@@ -0,0 +1,20 @@
#
# IPv4: TCP
# Regex: ^(?P<message>(?P<rule>[^,]*),(?P<sub_rule>[^,]*),(?P<anchor>[^,]*),(?P<tracker>[^,]*),(?P<interface>[^,]*),(?P<reason>[^,]*),(?P<action>[^,]*),(?P<direction>[^,]*),(?P<ip_version>[^,]*),(?P<tos>[^,]*),(?P<ecn>[^,]*),(?P<ttl>[^,]*),(?P<id>[^,]*),(?P<offset>[^,]*),(?P<flags>[^,]*),(?P<protocol_id>[^,]*),(?P<protocol>tcp),(?P<length>[^,]*),(?P<source_ip>[^,]*),(?P<destination_ip>[^,]*),(?P<source_port>[^,]*),(?P<destination_port>[^,]*),(?P<data_length>[^,]*),(?P<tcp_flags>[^,]*),(?P<sequence_number>[^,]*),(?P<ack_number>[^,]*),(?P<tcp_window>[^,]*),(?P<urg>[^,]*),(?P<tcp_options>[^,]*))$
# Example: 94,,,ef794793b2e3764b938bd04cba88e8a3,igb0,match,pass,out,4,0x0,,62,16800,0,DF,6,tcp,60,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,11715,443,0,S,3876953207,,64240,,mss;sackOK;TS;nop;wscale
#
# IPv6: TCP
# Regex: ?
# Example: ?
#
# IPv4 / IPv6: UDP
# Regex: ^(?P<message>(?P<rule>[^,]*),(?P<sub_rule>[^,]*),(?P<anchor>[^,]*),(?P<tracker>[^,]*),(?P<interface>[^,]*),(?P<reason>[^,]*),(?P<action>[^,]*),(?P<direction>[^,]*),(?P<ip_version>[^,]*),(?P<tos>[^,]*),(?P<ecn>[^,]*),(?P<ttl>[^,]*),(?P<id>[^,]*),(?P<offset>[^,]*),(?P<flags>[^,]*),(?P<protocol_id>[^,]*),(?P<protocol>udp),(?P<length>[^,]*),(?P<source_ip>[^,]*),(?P<destination_ip>[^,]*),(?P<source_port>[^,]*),(?P<destination_port>[^,]*),(?P<data_length>[^,]*))$
# Example: 90,,,91e2443ae2e8caf012f9a6e5a8a455c8,lo0,match,pass,in,4,0x4,,255,4660,0,none,17,udp,914,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,5353,5353,894
# Example: 15,,,91515c100a3692cb94121964974ce513,igb1_vlan150,match,block,in,6,0x00,0x00000,255,udp,17,391,xxxx::xxxx:xxxx:xxxx:xxxx,xxxx::xx,5353,5353,391
#
# IPv4: ICMP / IGMP / GRE
# Regex: ^(?P<message>(?P<rule>[^,]*),(?P<sub_rule>[^,]*),(?P<anchor>[^,]*),(?P<tracker>[^,]*),(?P<interface>[^,]*),(?P<reason>[^,]*),(?P<action>[^,]*),(?P<direction>[^,]*),(?P<ip_version>[^,]*),(?P<tos>[^,]*),(?P<ecn>[^,]*),(?P<ttl>[^,]*),(?P<id>[^,]*),(?P<offset>[^,]*),(?P<flags>[^,]*),(?P<protocol_id>[^,]*),(?P<protocol>icmp|igmp|gre),(?P<length>[^,]*),(?P<source_ip>[^,]*),(?P<destination_ip>[^,]*),(?P<data>[^,]*))$
# Example: 94,,,ef794793b2e3764b938bd04cba88e8a3,igb0,match,pass,out,4,0x0,,63,44871,0,DF,1,icmp,84,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,datalength=64
# Example: 16,,,02f4bab031b57d1e30553ce08e0ec131,igb1_vlan150,match,block,in,4,0xc0,,1,15472,0,none,2,igmp,32,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,datalength=8
# Example: 16,,,02f4bab031b57d1e30553ce08e0ec131,igb0,match,block,in,4,0x0,,57,20354,0,DF,47,gre,564,xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx,datalength=544
#

View File

@@ -0,0 +1,218 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: vector-aggregator
namespace: monitoring
spec:
interval: 15m
chart:
spec:
chart: vector
version: 0.17.0
sourceRef:
kind: HelmRepository
name: vector
namespace: flux-system
install:
createNamespace: true
remediation:
retries: 5
upgrade:
remediation:
retries: 5
dependsOn:
- name: loki
namespace: monitoring
values:
image:
repository: timberio/vector
tag: 0.25.1-debian
role: "Stateless-Aggregator"
podAnnotations:
configmap.reloader.stakater.com/reload: vector-aggregator
customConfig:
data_dir: /vector-data-dir
api:
enabled: false
# Sources
sources:
kubernetes_logs:
address: 0.0.0.0:6000
type: vector
version: "2"
opnsense_logs:
address: 0.0.0.0:6001
type: vector
version: "2"
journal_logs:
type: vector
address: 0.0.0.0:6002
version: "2"
vector_metrics:
type: internal_metrics
talos_kernel_logs:
address: 0.0.0.0:6050
type: socket
mode: udp
max_length: 102400
decoding:
codec: json
host_key: __host
talos_service_logs:
address: 0.0.0.0:6051
type: socket
mode: udp
max_length: 102400
decoding:
codec: json
host_key: __host
# Transformations
transforms:
talos_kernel_logs_xform:
type: remap
inputs:
- talos_kernel_logs
source: |-
.__host = replace!(.__host, "192.168.9.101", "talos-node-1")
.__host = replace(.__host, "192.168.9.102", "talos-node-2")
.__host = replace(.__host, "192.168.9.103", "talos-node-3")
.__host = replace(.__host, "192.168.9.104", "talos-node-4")
talos_service_logs_xform:
type: remap
inputs:
- talos_service_logs
source: |-
.__host = replace!(.__host, "192.168.9.101", "talos-node-1")
.__host = replace(.__host, "192.168.9.102", "talos-node-2")
.__host = replace(.__host, "192.168.9.103", "talos-node-3")
.__host = replace(.__host, "192.168.9.104", "talos-node-4")
# Sinks
sinks:
loki_kubernetes:
type: loki
inputs:
- kubernetes_logs
endpoint: http://loki-gateway.monitoring:80
encoding:
codec: json
batch:
max_bytes: 2049000
out_of_order_action: rewrite_timestamp
remove_label_fields: true
remove_timestamp: true
labels:
k8s_app: >-
{{`{{ "kubernetes.pod_labels.app\.kubernetes\.io/name" }}`}}
k8s_container: >-
{{`{{ "kubernetes.container_name" }}`}}
k8s_filename: >-
{{`{{ "kubernetes.file" }}`}}
k8s_instance: >-
{{`{{ "kubernetes.pod_labels.app\.kubernetes\.io/instance" }}`}}
k8s_namespace: >-
{{`{{ "kubernetes.pod_namespace" }}`}}
k8s_node: >-
{{`{{ "kubernetes.pod_node_name" }}`}}
k8s_pod: >-
{{`{{ "kubernetes.pod_name" }}`}}
loki_opnsense:
type: loki
inputs:
- opnsense_logs
endpoint: http://loki-gateway.monitoring:80
encoding:
codec: json
batch:
max_bytes: 400000
out_of_order_action: rewrite_timestamp
labels:
hostname: >-
{{`{{ host }}`}}
syslog_identifier: >-
{{`{{ SYSLOG_IDENTIFIER }}`}}
loki_journal:
type: loki
inputs:
- journal_logs
endpoint: http://loki-gateway.monitoring:80
encoding:
codec: json
batch:
max_bytes: 2049000
out_of_order_action: accept
remove_label_fields: true
remove_timestamp: true
labels:
hostname: >-
{{`{{ host }}`}}
talos_kernel:
type: loki
inputs:
- talos_kernel_logs_xform
endpoint: http://loki-gateway.monitoring:80
encoding:
codec: json
except_fields:
- __host
batch:
max_bytes: 1048576
out_of_order_action: rewrite_timestamp
labels:
hostname: >-
{{`{{ __host }}`}}
service: >-
{{`{{ facility }}`}}
talos_service:
type: loki
inputs:
- talos_service_logs_xform
endpoint: http://loki-gateway.monitoring:80
encoding:
codec: json
except_fields:
- __host
batch:
max_bytes: 524288
out_of_order_action: rewrite_timestamp
labels:
hostname: >-
{{`{{ __host }}`}}
service: >-
{{`{{ "talos-service" }}`}}
namespace: "talos:service"
extraVolumeMounts:
- name: geoip
mountPath: /geoip
extraVolumes:
- name: geoip
persistentVolumeClaim:
claimName: vector-geoipupdate-config
podMonitor:
enabled: true
jobLabel: vector-aggregator
port: prometheus-sink
resources:
requests:
cpu: 35m
memory: 381M
limits:
memory: 726M
service:
enabled: true
type: LoadBalancer
annotations:
coredns.io/hostname: "vector.${SECRET_CLUSTER_DOMAIN}"
postRenderers:
- kustomize:
patchesJson6902:
- target:
kind: Service
name: vector-aggregator
patch:
- op: add
path: /spec/loadBalancerIP
value: ${CLUSTER_LB_VECTOR}
- op: replace
path: /spec/externalTrafficPolicy
value: Local

View File

@@ -0,0 +1,5 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helm-release.yaml

View File

@@ -0,0 +1,46 @@
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: &app vector-geoipupdate
namespace: monitoring
spec:
schedule: "@daily"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 2
jobTemplate:
spec:
ttlSecondsAfterFinished: 86400
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
containers:
- name: *app
image: docker.io/maxmindinc/geoipupdate:v4.10
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- rm -rf /usr/share/GeoIP/.geoipupdate.lock
env:
- name: GEOIPUPDATE_EDITION_IDS
value: GeoLite2-City
- name: GEOIPUPDATE_FREQUENCY
value: "0"
- name: GEOIPUPDATE_VERBOSE
value: "true"
envFrom:
- secretRef:
name: *app
volumeMounts:
- name: *app
mountPath: /usr/share/GeoIP
volumes:
- name: *app
persistentVolumeClaim:
claimName: vector-geoipupdate-config

View File

@@ -0,0 +1,7 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- cron-job.yaml
- volume.yaml
- secret.sops.yaml

View File

@@ -0,0 +1,30 @@
# yamllint disable
apiVersion: v1
kind: Secret
metadata:
name: vector-geoipupdate
namespace: monitoring
type: Opaque
stringData:
GEOIPUPDATE_ACCOUNT_ID: ENC[AES256_GCM,data:vBU+Iwuv,iv:cK005QUa8iKK+2M2OsKvCXJAkUyhUgReDw8hBBhcNLQ=,tag:k3vrqqyMkp8cnGWfeLbu0A==,type:str]
GEOIPUPDATE_LICENSE_KEY: ENC[AES256_GCM,data:XuCipRddaBHI2umUb1+SPA==,iv:gwbTaK5KCmTF+8mQNjkmLkTdSqz2uFAINo6rJ6F2R4U=,tag:cvevnXWf7xFcdMkwKRF4pQ==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age:
- recipient: age1hhurqwmfvl9m3vh3hk8urulfzcdsrep2ax2neazqt435yhpamu3qj20asg
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBlUHFQcWJaRTlGT2RLK3R3
YlJDVTMvRThTR1dXdGN5a1RQd2FxTy84SFdNCnFEWEVpU1o3Y2hISkJrNzBMZFYr
emZyeW9ySnZEYnlvMWFQeXpYeHMzeUkKLS0tIEtPTm9JM0o0ZVBKN05oa0JSbHBL
b2pLSXUyS2lCbmZYYmk0WnVpRU9xRUUKAMUoEprOuR/xgtHZDBmDNTrLEyD9vbeb
dvQZ/7KrgRKVq4Eq3wI254CvajnNs3mACp175DhTsLyX0hBO77FZ2A==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2022-09-15T03:04:22Z"
mac: ENC[AES256_GCM,data:rDDMbtb8xSULRF6RUSNl+Pw4KIiCXJZ5kQ70U5Ap3oB3Ci6miw0EXAVCZC699iJ2YS8cqhUe6VwRCdVn+1bYxz4Dbjm1/dAvkXNbBruhe6KhwSpF/sx6viVH2238ReG+jHr7l/AXVDYyWCxH7hzHWn2f2hTqncpuvr1uyyhU0kg=,iv:JN6F4XDLypDyw9UX9WnhJu+UZzR/A9IW+8NtP4QXnWU=,tag:s+F3V/DNNlvTjFWgjxefoA==,type:str]
pgp: []
encrypted_regex: ^(data|stringData)$
version: 3.7.3

View File

@@ -0,0 +1,15 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: vector-geoipupdate-config
namespace: monitoring
labels:
excluded_from_alerts: "true"
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
storageClassName: rook-ceph-filesystem

View File

@@ -0,0 +1,7 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- geoipupdate
- agent
- aggregator