diff --git a/kubernetes/apps/monitoring/kube-prometheus-stack/app/helmrelease.yaml b/kubernetes/apps/monitoring/kube-prometheus-stack/app/helmrelease.yaml index a1c955793..93d3e00a4 100644 --- a/kubernetes/apps/monitoring/kube-prometheus-stack/app/helmrelease.yaml +++ b/kubernetes/apps/monitoring/kube-prometheus-stack/app/helmrelease.yaml @@ -29,6 +29,11 @@ spec: retries: 3 uninstall: keepHistory: false + dependsOn: + - name: openebs + namespace: openebs-system + - name: thanos + namespace: monitoring values: ### ### Component values @@ -129,6 +134,9 @@ spec: - hosts: - "prometheus.${SECRET_CLUSTER_DOMAIN}" prometheusSpec: + podMetadata: + annotations: + secret.reloader.stakater.com/reload: &secret thanos-objstore-config replicas: 2 replicaExternalLabelName: replica scrapeInterval: 1m # Must match interval in Grafana Helm chart @@ -138,6 +146,7 @@ spec: probeSelectorNilUsesHelmValues: false scrapeConfigSelectorNilUsesHelmValues: false retention: 2d + retentionSize: 15GB enableAdminAPI: true walCompression: true storageSpec: @@ -148,30 +157,16 @@ spec: requests: storage: 20Gi thanos: - image: quay.io/thanos/thanos:v0.35.0@sha256:fa1d28718df00b68d6ad85d7c7d4703bd9f59e5cd8be8da6540ea398cf701a1f - # renovate: datasource=docker depName=quay.io/thanos/thanos - version: "v0.35.0" + image: quay.io/thanos/thanos:${THANOS_VERSION} + version: "${THANOS_VERSION#v}" objectStorageConfig: existingSecret: - name: thanos-objstore-secret - key: objstore.yml + name: *secret + key: config thanosService: enabled: true thanosServiceMonitor: enabled: true - thanosIngress: - enabled: true - pathType: Prefix - ingressClassName: "nginx" - annotations: - nginx.ingress.kubernetes.io/ssl-redirect: "true" - nginx.ingress.kubernetes.io/backend-protocol: "GRPC" - hajimari.io/enable: "false" - hosts: - - &thanosHost "thanos-sidecar.${SECRET_DOMAIN}" - tls: - - hosts: - - *thanosHost alertmanager: config: global: diff --git a/kubernetes/apps/monitoring/kube-prometheus-stack/ks.yaml b/kubernetes/apps/monitoring/kube-prometheus-stack/ks.yaml index f8f366b7f..f090b834f 100644 --- a/kubernetes/apps/monitoring/kube-prometheus-stack/ks.yaml +++ b/kubernetes/apps/monitoring/kube-prometheus-stack/ks.yaml @@ -25,3 +25,5 @@ spec: postBuild: substitute: APP: *app + # renovate: datasource=docker depName=quay.io/thanos/thanos + THANOS_VERSION: v0.35.0 diff --git a/kubernetes/apps/monitoring/thanos/app/externalsecret.yaml b/kubernetes/apps/monitoring/thanos/app/externalsecret.yaml deleted file mode 100644 index 21dff53de..000000000 --- a/kubernetes/apps/monitoring/thanos/app/externalsecret.yaml +++ /dev/null @@ -1,21 +0,0 @@ ---- -# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/external-secrets.io/externalsecret_v1beta1.json -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: thanos - namespace: flux-system -spec: - secretStoreRef: - kind: ClusterSecretStore - name: onepassword-connect - target: - name: thanos-secret - template: - engineVersion: v2 - data: - S3_ACCESS_KEY: "{{ .THANOS_S3_ACCESS_KEY }}" - S3_SECRET_KEY: "{{ .THANOS_S3_SECRET_KEY }}" - dataFrom: - - extract: - key: thanos diff --git a/kubernetes/apps/monitoring/thanos/app/helmrelease.yaml b/kubernetes/apps/monitoring/thanos/app/helmrelease.yaml index 218bd7453..7f5541a5b 100644 --- a/kubernetes/apps/monitoring/thanos/app/helmrelease.yaml +++ b/kubernetes/apps/monitoring/thanos/app/helmrelease.yaml @@ -7,122 +7,113 @@ metadata: namespace: monitoring spec: interval: 30m + timeout: 15m chart: spec: chart: thanos - version: 15.4.4 + version: 1.17.0 sourceRef: kind: HelmRepository - name: bitnami + name: stevehipwell namespace: flux-system - maxHistory: 2 install: - createNamespace: true remediation: retries: 3 upgrade: cleanupOnFail: true remediation: + strategy: rollback retries: 3 - uninstall: - keepHistory: false + dependsOn: + - name: openebs + namespace: openebs-system + - name: rook-ceph-cluster + namespace: rook-ceph + valuesFrom: + - targetPath: objstoreConfig.value.config.bucket + kind: ConfigMap + name: thanos-bucket + valuesKey: BUCKET_NAME + - targetPath: objstoreConfig.value.config.endpoint + kind: ConfigMap + name: thanos-bucket + valuesKey: BUCKET_HOST + - targetPath: objstoreConfig.value.config.region + kind: ConfigMap + name: thanos-bucket + valuesKey: BUCKET_REGION + - targetPath: objstoreConfig.value.config.access_key + kind: Secret + name: thanos-bucket + valuesKey: AWS_ACCESS_KEY_ID + - targetPath: objstoreConfig.value.config.secret_key + kind: Secret + name: thanos-bucket + valuesKey: AWS_SECRET_ACCESS_KEY values: - image: - registry: quay.io - repository: thanos/thanos - tag: v0.35.0@sha256:fa1d28718df00b68d6ad85d7c7d4703bd9f59e5cd8be8da6540ea398cf701a1f objstoreConfig: - type: s3 - config: - bucket: thanos - endpoint: "s3.${SECRET_INTERNAL_DOMAIN}" - region: "" - # insecure: true - query: + value: + type: s3 + config: + insecure: true + additionalEndpoints: + - dnssrv+_grpc._tcp.kube-prometheus-stack-thanos-discovery.monitoring.svc.cluster.local + additionalReplicaLabels: ["__replica__"] + serviceMonitor: enabled: true - replicaCount: 2 - podAntiAffinityPreset: hard - replicaLabels: - - replica - dnsDiscovery: - sidecarsService: kube-prometheus-stack-thanos-discovery - sidecarsNamespace: monitoring - stores: - - "dnssrv+_grpc._tcp.kube-prometheus-stack-thanos-discovery" - - "thanos-store.${SECRET_DOMAIN}:443" - ingress: + compact: + enabled: true + extraArgs: + - --compact.concurrency=4 + - --delete-delay=30m + - --retention.resolution-raw=14d + - --retention.resolution-5m=30d + - --retention.resolution-1h=60d + persistence: &persistence enabled: true - hostname: &host "thanos-query.${SECRET_CLUSTER_DOMAIN}" - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-url: http://authelia.default.svc.cluster.local.:8888/api/verify - nginx.ingress.kubernetes.io/auth-signin: https://auth.${SECRET_CLUSTER_DOMAIN}?rm=$request_method - nginx.ingress.kubernetes.io/auth-response-headers: Remote-User,Remote-Name,Remote-Groups,Remote-Email - nginx.ingress.kubernetes.io/auth-snippet: proxy_set_header X-Forwarded-Method $request_method; - hajimari.io/enable: "false" - ingressClassName: "nginx" - tls: true - extraTls: - - hosts: - - *host - resources: - requests: - cpu: 15m - memory: 64M - limits: - memory: 99M + storageClass: openebs-hostpath + size: 10Gi + query: + replicas: 3 + extraArgs: ["--alert.query-url=https://thanos.${SECRET_CLUSTER_DOMAIN}"] + additionalStores: ["thanos.turbo.ac:10901"] queryFrontend: enabled: true - bucketweb: - enabled: true - refresh: "10m" - compactor: - enabled: true - extraFlags: - - "--compact.concurrency" - - "4" - retentionResolutionRaw: 14d - retentionResolution5m: 14d - retentionResolution1h: 30d + replicas: 3 + extraEnv: &extraEnv + - name: THANOS_CACHE_CONFIG + valueFrom: + configMapKeyRef: + name: &configMap thanos-cache-configmap + key: cache.yaml + extraArgs: ["--query-range.response-cache-config=$(THANOS_CACHE_CONFIG)"] ingress: enabled: true - hostname: &host "thanos-compactor.${SECRET_CLUSTER_DOMAIN}" - ingressClassName: "nginx" - annotations: - hajimari.io/enable: "false" - tls: true - extraTls: - - hosts: - - *host - persistence: - enabled: true - storageClass: "rook-ceph-block" - size: 15Gi - resourcesPreset: small - storegateway: + ingressClassName: internal + hosts: + - thanos.devbu.io + podAnnotations: &podAnnotations + configmap.reloader.stakater.com/reload: *configMap + rule: enabled: true - resources: - requests: - cpu: 23m - memory: 204M - limits: - memory: 226M - persistence: - enabled: true - storageClass: "rook-ceph-block" - size: 4Gi - ruler: - enabled: false - metrics: - enabled: true - serviceMonitor: - enabled: true - valuesFrom: - - kind: Secret - name: thanos-secret - valuesKey: S3_ACCESS_KEY - targetPath: objstoreConfig.config.access_key - - kind: Secret - name: thanos-secret - valuesKey: S3_SECRET_KEY - targetPath: objstoreConfig.config.secret_key + replicas: 3 + extraArgs: ["--web.prefix-header=X-Forwarded-Prefix"] + alertmanagersConfig: + value: |- + alertmanagers: + - api_version: v2 + static_configs: + - dnssrv+_http-web._tcp.alertmanager-operated.monitoring.svc.cluster.local + rules: + value: |- + groups: + - name: PrometheusWatcher + rules: + - alert: PrometheusDown + annotations: + summary: A Prometheus has disappeared from Prometheus target discovery + expr: absent(up{job="kube-prometheus-stack-prometheus"}) + for: 5m + labels: + severity: critical + persistence: *persistence diff --git a/kubernetes/apps/monitoring/thanos/app/kustomization.yaml b/kubernetes/apps/monitoring/thanos/app/kustomization.yaml index 5358f1cd5..f27c08755 100644 --- a/kubernetes/apps/monitoring/thanos/app/kustomization.yaml +++ b/kubernetes/apps/monitoring/thanos/app/kustomization.yaml @@ -4,5 +4,12 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: monitoring resources: - - ./externalsecret.yaml + - ./objectbucketclaim.yaml - ./helmrelease.yaml + - ./pushsecret.yaml +configMapGenerator: + - name: thanos-cache-configmap + files: + - cache.yaml=./resources/cache.yaml +generatorOptions: + disableNameSuffixHash: true diff --git a/kubernetes/apps/monitoring/thanos/app/objectbucketclaim.yaml b/kubernetes/apps/monitoring/thanos/app/objectbucketclaim.yaml new file mode 100644 index 000000000..cc9b090d9 --- /dev/null +++ b/kubernetes/apps/monitoring/thanos/app/objectbucketclaim.yaml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/objectbucket.io/objectbucketclaim_v1alpha1.json +apiVersion: objectbucket.io/v1alpha1 +kind: ObjectBucketClaim +metadata: + name: thanos-bucket +spec: + bucketName: thanos + storageClassName: rook-ceph-bucket diff --git a/kubernetes/apps/monitoring/thanos/app/pushsecret.yaml b/kubernetes/apps/monitoring/thanos/app/pushsecret.yaml new file mode 100644 index 000000000..7761503ed --- /dev/null +++ b/kubernetes/apps/monitoring/thanos/app/pushsecret.yaml @@ -0,0 +1,25 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/external-secrets.io/pushsecret_v1alpha1.json +apiVersion: external-secrets.io/v1alpha1 +kind: PushSecret +metadata: + name: thanos +spec: + refreshInterval: 1h + secretStoreRefs: + - name: onepassword-connect + kind: ClusterSecretStore + selector: + secret: + name: thanos-bucket + data: + - match: + secretKey: &key AWS_ACCESS_KEY_ID + remoteRef: + remoteKey: thanos + property: *key + - match: + secretKey: &key AWS_SECRET_ACCESS_KEY + remoteRef: + remoteKey: thanos + property: *key diff --git a/kubernetes/apps/monitoring/thanos/app/resources/cache.yaml b/kubernetes/apps/monitoring/thanos/app/resources/cache.yaml new file mode 100644 index 000000000..df31f345e --- /dev/null +++ b/kubernetes/apps/monitoring/thanos/app/resources/cache.yaml @@ -0,0 +1,5 @@ +--- +type: REDIS +config: + addr: dragonfly.database.svc.cluster.local:6379 + db: 1 diff --git a/kubernetes/flux/repositories/helm/bitnami.yaml b/kubernetes/flux/repositories/helm/bitnami.yaml deleted file mode 100644 index 2010fa83b..000000000 --- a/kubernetes/flux/repositories/helm/bitnami.yaml +++ /dev/null @@ -1,11 +0,0 @@ ---- -# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/helmrepository_v1beta2.json -apiVersion: source.toolkit.fluxcd.io/v1beta2 -kind: HelmRepository -metadata: - name: bitnami - namespace: flux-system -spec: - interval: 2h - url: https://charts.bitnami.com/bitnami - timeout: 3m diff --git a/kubernetes/flux/repositories/helm/kustomization.yaml b/kubernetes/flux/repositories/helm/kustomization.yaml index 112e95e59..b0c62e96c 100644 --- a/kubernetes/flux/repositories/helm/kustomization.yaml +++ b/kubernetes/flux/repositories/helm/kustomization.yaml @@ -6,7 +6,6 @@ resources: - ./actions-runner-controller.yaml - ./aqua.yaml - ./backube.yaml - - ./bitnami.yaml - ./bjw-s.yaml - ./cert-manager-webhook-ovh.yaml - ./cilium.yaml @@ -33,6 +32,7 @@ resources: - ./prometheus-community.yaml - ./rook-ceph.yaml - ./stakater.yaml + - ./stevehipwell.yaml - ./vector.yaml - ./weaveworks.yaml - ./xenitab.yaml diff --git a/kubernetes/flux/repositories/helm/stevehipwell.yaml b/kubernetes/flux/repositories/helm/stevehipwell.yaml new file mode 100644 index 000000000..c27dc6958 --- /dev/null +++ b/kubernetes/flux/repositories/helm/stevehipwell.yaml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/source.toolkit.fluxcd.io/helmrepository_v1.json +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: stevehipwell + namespace: flux-system +spec: + type: oci + interval: 5m + url: oci://ghcr.io/stevehipwell/helm-charts