diff --git a/.archive/kubernetes/cloudnative-pg/cluster/backups/external-backup.yaml b/.archive/kubernetes/cloudnative-pg/cluster/backups/external-backup.yaml deleted file mode 100644 index 45ef89b7e..000000000 --- a/.archive/kubernetes/cloudnative-pg/cluster/backups/external-backup.yaml +++ /dev/null @@ -1,53 +0,0 @@ ---- -apiVersion: batch/v1 -kind: CronJob -metadata: - name: &app cloudnative-pg-external-backup - namespace: default -spec: - schedule: "@daily" - jobTemplate: - spec: - ttlSecondsAfterFinished: 86400 - template: - spec: - automountServiceAccountToken: false - restartPolicy: OnFailure - containers: - - name: *app - image: prodrigestivill/postgres-backup-local:15-alpine@sha256:30473b9a3f2e884b7a17d6da1287e962933ab9d69ea5ba604c2e2afb89d16bb9 - env: - - name: POSTGRES_HOST - value: ${POSTGRES_HOST} - - name: POSTGRES_DB - value: "atuin,authelia,bazarr,freshrss,gatus,ghostfolio,lidarr_main,immich,invidious,joplin,kresus,linkding,lldap,lychee,outline,paperless,prowlarr_main,pushover-notifier,radarr_main,sharry,tandoor,vaultwarden,vikunja,wallabag" - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: cloudnative-pg-secret - key: username - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: cloudnative-pg-secret - key: password - - name: POSTGRES_EXTRA_OPTS - value: "-Z9 --schema=public --blobs" - - name: BACKUP_KEEP_DAYS - value: "7" - - name: BACKUP_KEEP_WEEKS - value: "4" - - name: BACKUP_KEEP_MONTHS - value: "3" - - name: HEALTHCHECK_PORT - value: "8080" - command: - - "/backup.sh" - volumeMounts: - - name: backups - mountPath: /backups - volumes: - - name: backups - nfs: - server: "${LOCAL_LAN_TRUENAS}" - path: /mnt/storage/backups/postgresql diff --git a/.archive/kubernetes/cloudnative-pg/cluster/backups/kustomization.yaml b/.archive/kubernetes/cloudnative-pg/cluster/backups/kustomization.yaml deleted file mode 100644 index 01314da39..000000000 --- a/.archive/kubernetes/cloudnative-pg/cluster/backups/kustomization.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: default -resources: - - ./external-backup.yaml diff --git a/.archive/kubernetes/cloudnative-pg/app/externalsecret.yaml b/kubernetes/apps/database/cloudnative-pg/app/externalsecret.yaml similarity index 97% rename from .archive/kubernetes/cloudnative-pg/app/externalsecret.yaml rename to kubernetes/apps/database/cloudnative-pg/app/externalsecret.yaml index 18bdb84e6..2b05de566 100644 --- a/.archive/kubernetes/cloudnative-pg/app/externalsecret.yaml +++ b/kubernetes/apps/database/cloudnative-pg/app/externalsecret.yaml @@ -4,7 +4,6 @@ apiVersion: external-secrets.io/v1beta1 kind: ExternalSecret metadata: name: cloudnative-pg - namespace: default spec: secretStoreRef: kind: ClusterSecretStore diff --git a/.archive/kubernetes/cloudnative-pg/app/helmrelease.yaml b/kubernetes/apps/database/cloudnative-pg/app/helmrelease.yaml similarity index 80% rename from .archive/kubernetes/cloudnative-pg/app/helmrelease.yaml rename to kubernetes/apps/database/cloudnative-pg/app/helmrelease.yaml index 18adfa106..bdec28bf5 100644 --- a/.archive/kubernetes/cloudnative-pg/app/helmrelease.yaml +++ b/kubernetes/apps/database/cloudnative-pg/app/helmrelease.yaml @@ -4,7 +4,6 @@ apiVersion: helm.toolkit.fluxcd.io/v2beta2 kind: HelmRelease metadata: name: cloudnative-pg - namespace: default spec: interval: 30m chart: @@ -15,19 +14,21 @@ spec: kind: HelmRepository name: cloudnative-pg namespace: flux-system - maxHistory: 2 install: - createNamespace: true - crds: CreateReplace remediation: retries: 3 upgrade: cleanupOnFail: true - crds: CreateReplace remediation: retries: 3 uninstall: keepHistory: false + dependsOn: + - name: local-path-provisioner + namespace: kube-system values: crds: create: true + config: + data: + INHERITED_ANNOTATIONS: kyverno.io/ignore diff --git a/.archive/kubernetes/cloudnative-pg/app/kustomization.yaml b/kubernetes/apps/database/cloudnative-pg/app/kustomization.yaml similarity index 100% rename from .archive/kubernetes/cloudnative-pg/app/kustomization.yaml rename to kubernetes/apps/database/cloudnative-pg/app/kustomization.yaml diff --git a/.archive/kubernetes/cloudnative-pg/cluster/cluster.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/cluster16.yaml similarity index 54% rename from .archive/kubernetes/cloudnative-pg/cluster/cluster.yaml rename to kubernetes/apps/database/cloudnative-pg/cluster/cluster16.yaml index 8040bf104..2643090e7 100644 --- a/.archive/kubernetes/cloudnative-pg/cluster/cluster.yaml +++ b/kubernetes/apps/database/cloudnative-pg/cluster/cluster16.yaml @@ -2,29 +2,46 @@ apiVersion: postgresql.cnpg.io/v1 kind: Cluster metadata: - name: postgres - namespace: default + name: postgres16 spec: instances: 3 - imageName: ghcr.io/bo0tzz/cnpgvecto.rs:15.5@sha256:a02838b9531af52dea5f8978bc0961640eea7ff2ad1b090978c561e6c52f1394 + imageName: ghcr.io/bo0tzz/cnpgvecto.rs:16.1 primaryUpdateStrategy: unsupervised - enableSuperuserAccess: true storage: size: 50Gi - storageClass: rook-ceph-block + storageClass: local-hostpath superuserSecret: name: cloudnative-pg-secret + enableSuperuserAccess: true + resources: + requests: + cpu: 500m + limits: + memory: 4Gi + postgresql: + parameters: + max_connections: "600" + max_slot_wal_keep_size: 10GB + shared_buffers: 512MB monitoring: enablePodMonitor: true + # Ref: https://github.com/cloudnative-pg/cloudnative-pg/issues/2501 + podMonitorMetricRelabelings: + - { sourceLabels: ["cluster"], targetLabel: cnpg_cluster, action: replace } + - { regex: cluster, action: labeldrop } backup: - retentionPolicy: 7d + retentionPolicy: 30d barmanObjectStore: + data: + compression: bzip2 wal: compression: bzip2 maxParallel: 8 destinationPath: s3://postgresql/ - endpointURL: https://.${SECRET_DOMAIN}:9000 - serverName: postgres-v8 + endpointURL: https://minio.${SECRET_DOMAIN}:9000 + # Note: serverName version needs to be inclemented + # when recovering from an existing cnpg cluster + serverName: postgres16-v1 s3Credentials: accessKeyId: name: cloudnative-pg-secret @@ -32,6 +49,8 @@ spec: secretAccessKey: name: cloudnative-pg-secret key: aws-secret-access-key + # # Note: previousCluster needs to be set to the name of the previous + # # cluster when recovering from an existing cnpg cluster # bootstrap: # recovery: # source: postgres-v6 diff --git a/.archive/kubernetes/cloudnative-pg/cluster/kustomization.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/kustomization.yaml similarity index 84% rename from .archive/kubernetes/cloudnative-pg/cluster/kustomization.yaml rename to kubernetes/apps/database/cloudnative-pg/cluster/kustomization.yaml index b07b473d1..8d5193f11 100644 --- a/.archive/kubernetes/cloudnative-pg/cluster/kustomization.yaml +++ b/kubernetes/apps/database/cloudnative-pg/cluster/kustomization.yaml @@ -4,6 +4,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: default resources: - - backups - - ./cluster.yaml + - ./cluster16.yaml + - ./prometheusrule.yaml - ./scheduledbackup.yaml diff --git a/kubernetes/apps/database/cloudnative-pg/cluster/prometheusrule.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/prometheusrule.yaml new file mode 100644 index 000000000..9c1d6a8db --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/cluster/prometheusrule.yaml @@ -0,0 +1,67 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: cloudnative-pg-rules + labels: + prometheus: k8s + role: alert-rules +spec: + groups: + - name: cloudnative-pg.rules + rules: + - alert: LongRunningTransaction + annotations: + description: Pod {{ $labels.pod }} is taking more than 5 minutes (300 seconds) for a query. + summary: A query is taking longer than 5 minutes. + expr: |- + cnpg_backends_max_tx_duration_seconds > 300 + for: 1m + labels: + severity: warning + - alert: BackendsWaiting + annotations: + description: Pod {{ $labels.pod }} has been waiting for longer than 5 minutes + summary: If a backend is waiting for longer than 5 minutes + expr: |- + cnpg_backends_waiting_total > 300 + for: 1m + labels: + severity: warning + - alert: PGDatabase + annotations: + description: Over 150,000,000 transactions from frozen xid on pod {{ $labels.pod }} + summary: Number of transactions from the frozen XID to the current one + expr: |- + cnpg_pg_database_xid_age > 150000000 + for: 1m + labels: + severity: warning + - alert: PGReplication + annotations: + description: Standby is lagging behind by over 300 seconds (5 minutes) + summary: The standby is lagging behind the primary + expr: |- + cnpg_pg_replication_lag > 300 + for: 1m + labels: + severity: warning + - alert: LastFailedArchiveTime + annotations: + description: Archiving failed for {{ $labels.pod }} + summary: Checks the last time archiving failed. Will be < 0 when it has not failed. + expr: |- + (cnpg_pg_stat_archiver_last_failed_time - cnpg_pg_stat_archiver_last_archived_time) > 1 + for: 1m + labels: + severity: warning + - alert: DatabaseDeadlockConflicts + annotations: + description: There are over 10 deadlock conflicts in {{ $labels.pod }} + summary: Checks the number of database conflicts + expr: |- + cnpg_pg_stat_database_deadlocks > 10 + for: 1m + labels: + severity: warning diff --git a/.archive/kubernetes/cloudnative-pg/cluster/scheduledbackup.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/scheduledbackup.yaml similarity index 81% rename from .archive/kubernetes/cloudnative-pg/cluster/scheduledbackup.yaml rename to kubernetes/apps/database/cloudnative-pg/cluster/scheduledbackup.yaml index 835fbfb68..369c15d45 100644 --- a/.archive/kubernetes/cloudnative-pg/cluster/scheduledbackup.yaml +++ b/kubernetes/apps/database/cloudnative-pg/cluster/scheduledbackup.yaml @@ -3,10 +3,9 @@ apiVersion: postgresql.cnpg.io/v1 kind: ScheduledBackup metadata: name: postgres - namespace: default spec: schedule: "@daily" immediate: true backupOwnerReference: self cluster: - name: postgres + name: postgres16 diff --git a/.archive/kubernetes/cloudnative-pg/ks.yaml b/kubernetes/apps/database/cloudnative-pg/ks.yaml similarity index 64% rename from .archive/kubernetes/cloudnative-pg/ks.yaml rename to kubernetes/apps/database/cloudnative-pg/ks.yaml index 18722795b..30d2734c8 100644 --- a/.archive/kubernetes/cloudnative-pg/ks.yaml +++ b/kubernetes/apps/database/cloudnative-pg/ks.yaml @@ -3,30 +3,16 @@ apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: cluster-apps-cloudnative-pg-app + name: &app cloudnative-pg namespace: flux-system spec: + targetNamespace: database + commonMetadata: + labels: + app.kubernetes.io/name: *app dependsOn: - name: external-secrets-stores - path: ./kubernetes/apps/default/cloudnative-pg/app - prune: true - sourceRef: - kind: GitRepository - name: home-ops-kubernetes - interval: 30m - retryInterval: 1m - timeout: 3m ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json -apiVersion: kustomize.toolkit.fluxcd.io/v1 -kind: Kustomization -metadata: - name: cluster-apps-cloudnative-pg-cluster - namespace: flux-system -spec: - dependsOn: - - name: cluster-apps-cloudnative-pg-app - path: ./kubernetes/apps/default/cloudnative-pg/cluster + path: ./kubernetes/apps/database/cloudnative-pg/app prune: true sourceRef: kind: GitRepository @@ -34,4 +20,27 @@ spec: wait: true interval: 30m retryInterval: 1m - timeout: 3m + timeout: 5m +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: cloudnative-pg-cluster + namespace: flux-system +spec: + targetNamespace: database + commonMetadata: + labels: + app.kubernetes.io/name: cloudnative-pg + dependsOn: + - name: cloudnative-pg + path: ./kubernetes/apps/database/cloudnative-pg/cluster + prune: true + sourceRef: + kind: GitRepository + name: home-ops-kubernetes + wait: true + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/.archive/kubernetes/cloudnative-pg/readme.md b/kubernetes/apps/database/cloudnative-pg/readme.md similarity index 100% rename from .archive/kubernetes/cloudnative-pg/readme.md rename to kubernetes/apps/database/cloudnative-pg/readme.md diff --git a/kubernetes/apps/database/kustomization.yaml b/kubernetes/apps/database/kustomization.yaml new file mode 100644 index 000000000..273b509f9 --- /dev/null +++ b/kubernetes/apps/database/kustomization.yaml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + # Pre Flux-Kustomizations + - ./namespace.yaml + # Flux-Kustomizations + - ./cloudnative-pg/ks.yaml diff --git a/kubernetes/apps/database/namespace.yaml b/kubernetes/apps/database/namespace.yaml new file mode 100644 index 000000000..5cad28606 --- /dev/null +++ b/kubernetes/apps/database/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: database + labels: + kustomize.toolkit.fluxcd.io/prune: disabled diff --git a/kubernetes/apps/kustomization.yaml b/kubernetes/apps/kustomization.yaml index 0cf7fdcce..213527cb1 100644 --- a/kubernetes/apps/kustomization.yaml +++ b/kubernetes/apps/kustomization.yaml @@ -5,6 +5,7 @@ kind: Kustomization resources: - ./actions-runner-system - ./cert-manager + - ./database - ./default - ./flux-system - ./kube-system