Files
auricom-home-cluster/.taskfiles/volsync/Taskfile.yaml
2025-01-12 02:00:10 +01:00

194 lines
7.7 KiB
YAML

---
# yaml-language-server: $schema=https://taskfile.dev/schema.json
version: "3"
# This taskfile is used to manage certain VolSync tasks for a given application, limitations are described below.
# 1. Fluxtomization, HelmRelease, PVC, ReplicationSource all have the same name (e.g. plex)
# 2. ReplicationSource and ReplicationDestination are a Restic repository
# 3. Applications are deployed as either a Kubernetes Deployment or StatefulSet
# 4. Each application only has one PVC that is being replicated
x-env: &env
app: '{{.app}}'
controller: '{{.controller}}'
claim: '{{.claim}}'
puid: '{{.puid}}'
pgid: '{{.pgid}}'
ns: '{{.ns}}'
previous: '{{.previous}}'
ts: '{{.ts}}'
vars:
scriptsDir: '{{.ROOT_DIR}}/.taskfiles/volsync/scripts'
templatesDir: '{{.ROOT_DIR}}/.taskfiles/volsync/templates'
ts: '{{now | date "150405"}}'
tasks:
list:
desc: List snapshots for an application
summary: |
Args:
ns: Namespace the PVC is in (default: default)
app: Application to list snapshots for (required)
cmds:
- envsubst < <(cat {{.templatesDir}}/list.tmpl.yaml) | kubectl apply -f -
- bash {{.scriptsDir}}/wait.sh list-{{.app}}-{{.ts}} {{.ns}}
- kubectl -n {{.ns}} wait job/list-{{.app}}-{{.ts}} --for condition=complete --timeout=1m
- kubectl -n {{.ns}} logs job/list-{{.app}}-{{.ts}} --container list
- kubectl -n {{.ns}} delete job list-{{.app}}-{{.ts}}
env: *env
requires:
vars: ["app"]
vars:
ns: '{{.ns | default "default"}}'
preconditions:
- { msg: "Wait script not found", sh: "test -f {{.scriptsDir}}/wait.sh" }
- { msg: "List template not found", sh: "test -f {{.templatesDir}}/list.tmpl.yaml" }
silent: true
unlock:
desc: Unlock a Restic repository for an application
summary: |
Args:
ns: Namespace the PVC is in (default: default)
app: Application to unlock (required)
cmds:
- envsubst < <(cat {{.templatesDir}}/unlock.tmpl.yaml) | kubectl apply -f -
- bash {{.scriptsDir}}/wait.sh unlock-{{.app}}-{{.ts}} {{.ns}}
- kubectl -n {{.ns}} wait job/unlock-{{.app}}-{{.ts}} --for condition=complete --timeout=1m
- kubectl -n {{.ns}} logs job/{{.app}}-unlock-snapshots-{{.ts}} --container unlock-minio
- kubectl -n {{.ns}} logs job/{{.app}}-unlock-snapshots-{{.ts}} --container unlock-r2
- kubectl -n {{.ns}} delete job unlock-{{.app}}-{{.ts}}
env: *env
requires:
vars: ["app"]
vars:
ns: '{{.ns | default "default"}}'
preconditions:
- { msg: "Wait script not found", sh: "test -f {{.scriptsDir}}/wait.sh" }
- { msg: "Unlock template not found", sh: "test -f {{.templatesDir}}/unlock.tmpl.yaml" }
silent: true
# To run backup jobs in parallel for all replicationsources:
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot app=$0 ns=$1'
snapshot:
desc: Snapshot a PVC for an application
summary: |
Args:
ns: Namespace the PVC is in (default: default)
app: Application to snapshot (required)
cmds:
# # TODO: Only suspend when a arg is passed to the task
# # Ref: https://github.com/go-task/task/issues/608
# - task: .suspend
# vars: *env
- kubectl -n {{.ns}} patch replicationsources {{.app}} --type merge -p '{"spec":{"trigger":{"manual":"{{.ts}}"}}}'
- bash {{.scriptsDir}}/wait.sh volsync-src-{{.app}} {{.ns}}
- kubectl -n {{.ns}} wait job/volsync-src-{{.app}} --for condition=complete --timeout=120m
env: *env
requires:
vars: ["app"]
vars:
ns: '{{.ns | default "default"}}'
controller:
sh: true && {{.scriptsDir}}/controller.sh {{.app}} {{.ns}}
preconditions:
- { msg: "Controller script not found", sh: "test -f {{.scriptsDir}}/controller.sh" }
- { msg: "Wait script not found", sh: "test -f {{.scriptsDir}}/wait.sh" }
- { msg: "RepositorySource not found", sh: "kubectl -n {{.ns}} get replicationsources {{.app}}" }
# To run restore jobs in parallel for all replicationdestinations:
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:restore app=$0 ns=$1'
restore:
desc: Restore a PVC for an application
summary: |
Args:
ns: Namespace the PVC is in (default: default)
app: Application to restore (required)
previous: Previous number of snapshots to restore (default: 2)
cmds:
- task: .suspend
vars: *env
- task: .wipe
vars: *env
- task: .restore
vars: *env
- task: .resume
vars: *env
env: *env
requires:
vars: ["app"]
vars:
ns: '{{.ns | default "default"}}'
previous: '{{.previous | default 2}}'
controller:
sh: "{{.scriptsDir}}/controller.sh {{.app}} {{.ns}}"
claim:
sh: kubectl -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.sourcePVC}"
puid:
sh: kubectl -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsUser}"
pgid:
sh: kubectl -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsGroup}"
preconditions:
- { msg: "Controller script not found", sh: "test -f {{.scriptsDir}}/controller.sh" }
- { msg: "Wait script not found", sh: "test -f {{.scriptsDir}}/wait.sh" }
- { msg: "ReplicationDestination script not found", sh: "test -f {{.templatesDir}}/replicationdestination.tmpl.yaml" }
- { msg: "Wipe template not found", sh: "test -f {{.templatesDir}}/wipe.tmpl.yaml" }
cleanup:
desc: Delete volume populator PVCs in all namespaces
cmds:
- for: { var: dest }
cmd: |
{{- $items := (split "/" .ITEM) }}
kubectl delete pvc -n {{ $items._0 }} {{ $items._1 }}
- for: { var: cache }
cmd: |
{{- $items := (split "/" .ITEM) }}
kubectl delete pvc -n {{ $items._0 }} {{ $items._1 }}
env: *env
vars:
dest:
sh: kubectl get pvc --all-namespaces --no-headers | grep "dst-dest" | awk '{print $1 "/" $2}'
cache:
sh: kubectl get pvc --all-namespaces --no-headers | grep "dst-cache" | awk '{print $1 "/" $2}'
# Suspend the Flux ks and hr
.suspend:
internal: true
cmds:
- flux -n flux-system suspend kustomization {{.app}}
- flux -n {{.ns}} suspend helmrelease {{.app}}
- kubectl -n {{.ns}} scale {{.controller}} --replicas 0
- kubectl -n {{.ns}} wait pod --for delete --selector="app.kubernetes.io/name={{.app}}" --timeout=2m
env: *env
# Wipe the PVC of all data
.wipe:
internal: true
cmds:
- envsubst < <(cat {{.templatesDir}}/wipe.tmpl.yaml) | kubectl apply -f -
- bash {{.scriptsDir}}/wait.sh wipe-{{.app}}-{{.ts}} {{.ns}}
- kubectl -n {{.ns}} wait job/wipe-{{.app}}-{{.ts}} --for condition=complete --timeout=120m
- kubectl -n {{.ns}} logs job/wipe-{{.app}}-{{.ts}} --container wipe
- kubectl -n {{.ns}} delete job wipe-{{.app}}-{{.ts}}
env: *env
# Create VolSync replicationdestination CR to restore data
.restore:
internal: true
cmds:
- envsubst < <(cat {{.templatesDir}}/replicationdestination.tmpl.yaml) | kubectl apply -f -
- bash {{.scriptsDir}}/wait.sh volsync-dst-{{.app}}-{{.ts}} {{.ns}}
- kubectl -n {{.ns}} wait job/volsync-dst-{{.app}}-{{.ts}} --for condition=complete --timeout=120m
- kubectl -n {{.ns}} delete replicationdestination {{.app}}-{{.ts}}
env: *env
# Resume Flux ks and hr
.resume:
internal: true
cmds:
- flux -n {{.ns}} resume helmrelease {{.app}}
- flux -n flux-system resume kustomization {{.app}}
env: *env