feat: just

This commit is contained in:
auricom
2025-08-29 20:28:48 +02:00
parent 72341e3fb3
commit b9ba0124aa
28 changed files with 604 additions and 0 deletions

89
.just/bootstrap.just Normal file
View File

@@ -0,0 +1,89 @@
set quiet
set shell := ['bash', '-eu', '-o', 'pipefail', '-c']
bootstrap_dir := justfile_dir() + '/bootstrap'
kubernetes_dir := justfile_dir() + '/kubernetes'
controller := `talosctl config info -o yaml | yq -e '.endpoints[0]'`
nodes := `talosctl config info -o yaml | yq -e '.nodes | join (" ")'`
[doc('Bootstrap Cluster')]
default: talos kubernetes kubeconfig wait namespaces resources crds apps
[doc('Install Talos')]
talos: (with-banner "Installing Talos")
for node in {{nodes}}; do \
if ! output=$(just talos apply-node "${node}" --insecure 2>&1); then \
if [[ "${output}" == *"certificate required"* ]]; then \
just log info "Talos node is already configured, skipping apply of config" "node" "${node}"; \
continue; \
fi; \
just log fatal "Failed to apply Talos node configuration" "node" "${node}" "output" "${output}"; \
fi; \
done
[doc('Install Kubernetes')]
kubernetes: (with-banner "Installing Kubernetes")
until output=$(talosctl -n "{{controller}}" bootstrap 2>&1 || true) && [[ "${output}" == *"AlreadyExists"* ]]; do \
just log info "Talos bootstrap in progress, waiting 5 seconds..."; \
sleep 5; \
done
[doc('Fetch Kubeconfig')]
kubeconfig: (with-banner "Fetching kubeconfig")
if ! talosctl -n "{{controller}}" kubeconfig -f --force-context-name main "{{justfile_dir()}}" &>/dev/null; then \
just log fatal "Failed to fetch kubeconfig"; \
fi
[doc('Wait for nodes to be not-ready')]
wait: (with-banner "Waiting for nodes to be not-ready")
if ! kubectl wait nodes --for=condition=Ready=True --all --timeout=10s &>/dev/null; then \
until kubectl wait nodes --for=condition=Ready=False --all --timeout=10s &>/dev/null; do \
just log info "Nodes are not available, waiting for nodes to be available. Retrying in 5 seconds..."; \
sleep 5; \
done \
fi
[doc('Apply Namespaces')]
namespaces: (with-banner "Applying Namespaces")
find "{{kubernetes_dir}}/apps" -mindepth 1 -maxdepth 1 -type d -printf "%f\n" | while IFS= read -r namespace; do \
if kubectl get namespace "${namespace}" &>/dev/null; then \
continue; \
fi; \
if ! kubectl create namespace "${namespace}" --dry-run=client -o yaml | kubectl apply --server-side -f - &>/dev/null; then \
just log error "Failed to apply namespace" "namespace" "${namespace}"; \
fi; \
just log info "Namespace applied successfully" "namespace" "${namespace}"; \
done
[doc('Apply Resources')]
resources: (with-banner "Applying Resources")
if ! resources=$(op inject -i "{{bootstrap_dir}}/resources.yaml") || [[ -z "${resources}" ]]; then \
just log fatal "Failed to render resources"; \
fi; \
if ! echo "${resources}" | kubectl diff -f - &>/dev/null; then \
if ! echo "${resources}" | kubectl apply --server-side -f - &>/dev/null; then \
just log fatal "Failed to apply resources"; \
fi \
fi
[doc('Apply CRDs')]
crds: (with-banner "Applying CRDs")
if ! crds=$(helmfile --file "{{bootstrap_dir}}/helmfile.d/00-crds.yaml" template -q | yq ea -e 'select(.kind == "CustomResourceDefinition")') || [[ -z "${crds}" ]]; then \
just log fatal "Failed to render CRDs from Helmfile"; \
fi; \
if ! echo "${crds}" | kubectl diff --filename - &>/dev/null; then \
if ! echo "${crds}" | kubectl apply --server-side --filename - &>/dev/null; then \
just log fatal "Failed to apply crds from Helmfile"; \
fi; \
fi
[doc('Apply Apps')]
apps: (with-banner "Applying Apps")
if ! helmfile --file "{{bootstrap_dir}}/helmfile.d/01-apps.yaml" sync --hide-notes; then \
just log fatal "Failed to apply apps from Helmfile"; \
fi
[private]
with-banner msg:
just log info "{{msg}}"

16
.just/kube.just Normal file
View File

@@ -0,0 +1,16 @@
set quiet
set shell := ['bash', '-eu', '-o', 'pipefail', '-c']
[private]
default:
just --list kube
[doc('Spawn a shell on a node')]
node-shell node:
kubectl node-shell -n kube-system -x {{node}}
[doc('Prune all unused Pods')]
prune-pods:
for phase in Failed Pending Succeeded; do \
kubectl delete pods -A --field-selector status.phase="${phase}" --ignore-not-found=true; \
done

36
.just/sync.just Normal file
View File

@@ -0,0 +1,36 @@
set quiet
set shell := ['bash', '-eu', '-o', 'pipefail', '-c']
[private]
default:
just --list sync
[doc('Sync ExternalSecrets')]
es:
kubectl get es -A --no-headers | while read -r ns name _; do \
kubectl -n "${ns}" annotate --field-manager flux-client-side-apply --overwrite es "${name}" force-sync="$(date +%s)"; \
done
[doc('Sync GitRepositories')]
git:
kubectl get gitrepo -A --no-headers | while read -r ns name _; do \
kubectl -n "${ns}" annotate --field-manager flux-client-side-apply --overwrite gitrepo "${name}" reconcile.fluxcd.io/requestedAt="$(date +%s)"; \
done
[doc('Sync HelmReleases')]
hr:
kubectl get hr -A --no-headers | while read -r ns name _; do \
kubectl -n "${ns}" annotate --field-manager flux-client-side-apply --overwrite hr "${name}" reconcile.fluxcd.io/requestedAt="$(date +%s)" reconcile.fluxcd.io/forceAt="$(date +%s)"; \
done
[doc('Sync Kustomizations')]
ks:
kubectl get ks -A --no-headers | while read -r ns name _; do \
kubectl -n "${ns}" annotate --field-manager flux-client-side-apply --overwrite ks "${name}" reconcile.fluxcd.io/requestedAt="$(date +%s)"; \
done
[doc('Sync OCIRepositories')]
oci:
kubectl get ocirepo -A --no-headers | while read -r ns name _; do \
kubectl -n "${ns}" annotate --field-manager flux-client-side-apply --overwrite ocirepo "${name}" reconcile.fluxcd.io/requestedAt="$(date +%s)"; \
done

70
.just/talos.just Normal file
View File

@@ -0,0 +1,70 @@
set quiet
set shell := ['bash', '-eu', '-o', 'pipefail', '-c']
scripts_dir := justfile_dir() + '/scripts'
talos_dir := justfile_dir() + '/talos'
temp_dir := `mktemp -d`
controller := `talosctl config info -o yaml | yq -e '.endpoints[0]'`
[private]
default:
just --list talos
[doc('Apply Node')]
apply-node node *args:
just talos render-config "{{node}}" | talosctl -n "{{node}}" apply-config -f /dev/stdin {{args}}
[doc('Download Image')]
download-image version schematic:
gum spin -s line --title "Downloading Talos {{version}} ..." -- \
curl -sfL --remove-on-error --retry 5 --retry-delay 5 --retry-all-errors \
-o "{{talos_dir}}/talos-{{version}}-{{replace_regex(schematic, '^(.{8}).*', '$1')}}.iso" \
"https://factory.talos.dev/image/{{schematic}}/{{version}}/metal-amd64.iso"
just log info "Downloaded Talos" version "{{version}}" schematic "{{schematic}}"
[doc('Generate Kubeconfig')]
generate-kubeconfig:
talosctl kubeconfig -n "{{controller}}" -f --force-context-name main {{justfile_dir()}}
[doc('Generate Schematic Id')]
generate-schematic-id:
curl -sX POST --data-binary "@{{talos_dir}}/schematic.yaml" \
"https://factory.talos.dev/schematics" | jq -r '.id'
[doc('Reboot Node')]
reboot-node node:
talosctl -n {{node}} reboot -m powercycle
[doc('Render Config')]
render-config node:
minijinja-cli -D "machinetype=$(just talos machine-type {{node}})" \
"{{talos_dir}}/machineconfig.yaml.j2" | op inject >"{{temp_dir}}/base.yaml"
minijinja-cli -D "machinetype=$(just talos machine-type {{node}})" \
"{{talos_dir}}/nodes/{{node}}.yaml.j2" | op inject >"{{temp_dir}}/patch.yaml"
talosctl machineconfig patch "{{temp_dir}}/base.yaml" -p "{{temp_dir}}/patch.yaml"
[doc('Reset Node')]
[confirm]
reset-node node:
talosctl -n "{{node}}" reset --graceful=false
[doc('Shutdown Node')]
shutdown-node node:
talosctl -n "{{node}}" shutdown --force
[doc('Upgrade Kubernetes')]
upgrade-k8s version:
talosctl -n "{{controller}}" upgrade-k8s --to {{version}}
[doc('Upgrade Node')]
upgrade-node node:
talosctl -n "{{node}}" upgrade -i "$(just talos machine-image)" -m powercycle --timeout=10m
[private]
machine-type node:
minijinja-cli "{{talos_dir}}/nodes/{{node}}.yaml.j2" | yq -e '.machine.type // ""'
[private]
machine-image:
minijinja-cli "{{talos_dir}}/machineconfig.yaml.j2" | yq -e '.machine.install.image // ""'

231
.just/volsync.just Normal file
View File

@@ -0,0 +1,231 @@
set quiet
set shell := ['bash', '-eu', '-o', 'pipefail', '-c']
# This justfile is used to manage certain VolSync tasks for a given application, limitations are described below.
# 1. Fluxtomization, HelmRelease, PVC, ReplicationSource all have the same name (e.g. plex)
# 2. ReplicationSource and ReplicationDestination are a Restic repository
# 3. Applications are deployed as either a Kubernetes Deployment or StatefulSet
# 4. Each application only has one PVC that is being replicated
scripts_dir := justfile_directory() + '/.just/volsync/scripts'
templates_dir := justfile_directory() + '/.just/volsync/templates'
[private]
default:
just --list volsync
[doc('List snapshots for an application')]
list app ns="default":
#!/usr/bin/env bash
set -euo pipefail
# Check preconditions
if [[ ! -f "{{scripts_dir}}/wait.sh" ]]; then
echo "Wait script not found"
exit 1
fi
if [[ ! -f "{{templates_dir}}/list.tmpl.yaml" ]]; then
echo "List template not found"
exit 1
fi
ts=$(date +%H%M%S)
export app="{{app}}"
export controller=""
export claim=""
export puid=""
export pgid=""
export ns="{{ns}}"
export previous=""
export ts="${ts}"
envsubst < <(cat {{templates_dir}}/list.tmpl.yaml) | kubectl apply -f -
bash {{scripts_dir}}/wait.sh list-{{app}}-${ts} {{ns}}
kubectl -n {{ns}} wait job/list-{{app}}-${ts} --for condition=complete --timeout=1m
kubectl -n {{ns}} logs job/list-{{app}}-${ts} --container list
kubectl -n {{ns}} delete job list-{{app}}-${ts}
[doc('Unlock a Restic repository for an application')]
unlock app ns="default":
#!/usr/bin/env bash
set -euo pipefail
# Check preconditions
if [[ ! -f "{{scripts_dir}}/wait.sh" ]]; then
echo "Wait script not found"
exit 1
fi
if [[ ! -f "{{templates_dir}}/unlock.tmpl.yaml" ]]; then
echo "Unlock template not found"
exit 1
fi
ts=$(date +%H%M%S)
export app="{{app}}"
export controller=""
export claim=""
export puid=""
export pgid=""
export ns="{{ns}}"
export previous=""
export ts="${ts}"
envsubst < <(cat {{templates_dir}}/unlock.tmpl.yaml) | kubectl apply -f -
bash {{scripts_dir}}/wait.sh unlock-{{app}}-${ts} {{ns}}
kubectl -n {{ns}} wait job/unlock-{{app}}-${ts} --for condition=complete --timeout=1m
kubectl -n {{ns}} logs job/{{app}}-unlock-snapshots-${ts} --container unlock-minio
kubectl -n {{ns}} logs job/{{app}}-unlock-snapshots-${ts} --container unlock-r2
kubectl -n {{ns}} delete job unlock-{{app}}-${ts}
[doc('Snapshot a PVC for an application')]
snapshot app ns="default":
#!/usr/bin/env bash
set -euo pipefail
# Check preconditions
if [[ ! -f "{{scripts_dir}}/controller.sh" ]]; then
echo "Controller script not found"
exit 1
fi
if [[ ! -f "{{scripts_dir}}/wait.sh" ]]; then
echo "Wait script not found"
exit 1
fi
if ! kubectl -n {{ns}} get replicationsources {{app}} &>/dev/null; then
echo "RepositorySource not found"
exit 1
fi
ts=$(date +%H%M%S)
controller=$({{scripts_dir}}/controller.sh {{app}} {{ns}})
export app="{{app}}"
export controller="${controller}"
export claim=""
export puid=""
export pgid=""
export ns="{{ns}}"
export previous=""
export ts="${ts}"
kubectl -n {{ns}} patch replicationsources {{app}} --type merge -p '{"spec":{"trigger":{"manual":"'${ts}'"}}}'
bash {{scripts_dir}}/wait.sh volsync-src-{{app}} {{ns}}
kubectl -n {{ns}} wait job/volsync-src-{{app}} --for condition=complete --timeout=120m
[doc('Restore a PVC for an application')]
restore app ns="default" previous="2":
#!/usr/bin/env bash
set -euo pipefail
# Check preconditions
if [[ ! -f "{{scripts_dir}}/controller.sh" ]]; then
echo "Controller script not found"
exit 1
fi
if [[ ! -f "{{scripts_dir}}/wait.sh" ]]; then
echo "Wait script not found"
exit 1
fi
if [[ ! -f "{{templates_dir}}/replicationdestination.tmpl.yaml" ]]; then
echo "ReplicationDestination script not found"
exit 1
fi
if [[ ! -f "{{templates_dir}}/wipe.tmpl.yaml" ]]; then
echo "Wipe template not found"
exit 1
fi
ts=$(date +%H%M%S)
controller=$({{scripts_dir}}/controller.sh {{app}} {{ns}})
claim=$(kubectl -n {{ns}} get replicationsources/{{app}} -o jsonpath="{.spec.sourcePVC}")
puid=$(kubectl -n {{ns}} get replicationsources/{{app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsUser}")
pgid=$(kubectl -n {{ns}} get replicationsources/{{app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsGroup}")
export app="{{app}}"
export controller="${controller}"
export claim="${claim}"
export puid="${puid}"
export pgid="${pgid}"
export ns="{{ns}}"
export previous="{{previous}}"
export ts="${ts}"
just volsync suspend {{app}} {{ns}} "${controller}"
just volsync wipe {{app}} {{ns}} "${claim}" "${puid}" "${pgid}" "${ts}"
just volsync restore-internal {{app}} {{ns}} "${claim}" "${puid}" "${pgid}" "${ts}"
just volsync resume {{app}} {{ns}}
[doc('Delete volume populator PVCs in all namespaces')]
cleanup:
#!/usr/bin/env bash
set -euo pipefail
# Delete destination PVCs
kubectl get pvc --all-namespaces --no-headers | grep "dst-dest" | awk '{print $1 "/" $2}' | while IFS='/' read -r namespace pvc; do
kubectl delete pvc -n "${namespace}" "${pvc}"
done
# Delete cache PVCs
kubectl get pvc --all-namespaces --no-headers | grep "dst-cache" | awk '{print $1 "/" $2}' | while IFS='/' read -r namespace pvc; do
kubectl delete pvc -n "${namespace}" "${pvc}"
done
[private]
[doc('Suspend the Flux ks and hr')]
suspend app ns controller:
#!/usr/bin/env bash
set -euo pipefail
flux -n {{ns}} suspend kustomization {{app}}
flux -n {{ns}} suspend helmrelease {{app}}
kubectl -n {{ns}} scale {{controller}} --replicas 0
kubectl -n {{ns}} wait pod --for delete --selector="app.kubernetes.io/name={{app}}" --timeout=2m
[private]
[doc('Wipe the PVC of all data')]
wipe app ns claim puid pgid ts:
#!/usr/bin/env bash
set -euo pipefail
export app="{{app}}"
export controller=""
export claim="{{claim}}"
export puid="{{puid}}"
export pgid="{{pgid}}"
export ns="{{ns}}"
export previous=""
export ts="{{ts}}"
envsubst < <(cat {{templates_dir}}/wipe.tmpl.yaml) | kubectl apply -f -
bash {{scripts_dir}}/wait.sh wipe-{{app}}-{{ts}} {{ns}}
kubectl -n {{ns}} wait job/wipe-{{app}}-{{ts}} --for condition=complete --timeout=120m
kubectl -n {{ns}} logs job/wipe-{{app}}-{{ts}} --container wipe
kubectl -n {{ns}} delete job wipe-{{app}}-{{ts}}
[private]
[doc('Create VolSync replicationdestination CR to restore data')]
restore-internal app ns claim puid pgid ts:
#!/usr/bin/env bash
set -euo pipefail
export app="{{app}}"
export controller=""
export claim="{{claim}}"
export puid="{{puid}}"
export pgid="{{pgid}}"
export ns="{{ns}}"
export previous=""
export ts="{{ts}}"
envsubst < <(cat {{templates_dir}}/replicationdestination.tmpl.yaml) | kubectl apply -f -
bash {{scripts_dir}}/wait.sh volsync-dst-{{app}}-{{ts}} {{ns}}
kubectl -n {{ns}} wait job/volsync-dst-{{app}}-{{ts}} --for condition=complete --timeout=120m
kubectl -n {{ns}} delete replicationdestination {{app}}-{{ts}}
[private]
[doc('Resume Flux ks and hr')]
resume app ns:
#!/usr/bin/env bash
set -euo pipefail
flux -n {{ns}} resume helmrelease {{app}}
flux -n {{ns}} resume kustomization {{app}}

View File

@@ -0,0 +1,21 @@
#!/usr/bin/env bash
APP=$1
NAMESPACE="${2:-default}"
is_deployment() {
kubectl -n "${NAMESPACE}" get deployment "${APP}" >/dev/null 2>&1
}
is_statefulset() {
kubectl -n "${NAMESPACE}" get statefulset "${APP}" >/dev/null 2>&1
}
if is_deployment; then
echo "deployment.apps/${APP}"
elif is_statefulset; then
echo "statefulset.apps/${APP}"
else
echo "No deployment or statefulset found for ${APP}"
exit 1
fi

14
.just/volsync/scripts/wait.sh Executable file
View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
JOB=$1
NAMESPACE="${2:-default}"
CLUSTER="${3:-main}"
[[ -z "${JOB}" ]] && echo "Job name not specified" && exit 1
while true; do
STATUS="$(kubectl -n "${NAMESPACE}" get pod -l job-name="${JOB}" -o jsonpath='{.items[*].status.phase}')"
if [ "${STATUS}" == "Pending" ]; then
break
fi
sleep 1
done

View File

@@ -0,0 +1,19 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "list-${app}-${ts}"
namespace: "${ns}"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: OnFailure
containers:
- name: list
image: docker.io/restic/restic:0.18.0@sha256:4cf4a61ef9786f4de53e9de8c8f5c040f33830eb0a10bf3d614410ee2fcb6120
args: ["snapshots"]
envFrom:
- secretRef:
name: "${app}-volsync-secret"

View File

@@ -0,0 +1,31 @@
---
apiVersion: volsync.backube/v1alpha1
kind: ReplicationDestination
metadata:
name: "${app}-${ts}"
namespace: "${ns}"
spec:
trigger:
manual: restore-once
restic:
repository: "${app}-volsync-secret"
destinationPVC: "${claim}"
copyMethod: Direct
storageClassName: rook-ceph-block
# storageClassName: ceph-filesystem
# accessModes: ["ReadWriteMany"]
# IMPORTANT NOTE:
# Set to the last X number of snapshots to restore from
previous: ${previous}
# OR;
# IMPORTANT NOTE:
# On bootstrap set `restoreAsOf` to the time the old cluster was destroyed.
# This will essentially prevent volsync from trying to restore a backup
# from a application that started with default data in the PVC.
# Do not restore snapshots made after the following RFC3339 Timestamp.
# date --rfc-3339=seconds (--utc)
# restoreAsOf: "2022-12-10T16:00:00-05:00"
moverSecurityContext:
runAsUser: ${puid}
runAsGroup: ${pgid}
fsGroup: ${pgid}

View File

@@ -0,0 +1,25 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "unlock-${app}-${ts}"
namespace: "${ns}"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: OnFailure
containers:
- name: unlock-minio
image: docker.io/restic/restic:0.18.0@sha256:4cf4a61ef9786f4de53e9de8c8f5c040f33830eb0a10bf3d614410ee2fcb6120
args: ["unlock", "--remove-all"]
envFrom:
- secretRef:
name: "${app}-volsync-secret"
- name: unlock-r2
image: docker.io/restic/restic:0.18.0@sha256:4cf4a61ef9786f4de53e9de8c8f5c040f33830eb0a10bf3d614410ee2fcb6120
args: ["unlock", "--remove-all"]
envFrom:
- secretRef:
name: "${app}-volsync-r2-secret"

View File

@@ -0,0 +1,25 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "wipe-${app}-${ts}"
namespace: "${ns}"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: OnFailure
containers:
- name: wipe
image: docker.io/library/busybox:latest
command: ["/bin/sh", "-c", "cd /config; find . -delete"]
volumeMounts:
- name: config
mountPath: /config
securityContext:
privileged: true
volumes:
- name: config
persistentVolumeClaim:
claimName: "${claim}"

27
.justfile Normal file
View File

@@ -0,0 +1,27 @@
#!/usr/bin/env -S just --justfile
set quiet
set shell := ['bash', '-eu', '-o', 'pipefail', '-c']
[doc('Bootstrap Recipes')]
mod bootstrap '.just/bootstrap.just'
[doc('Kubernetes Recipes')]
mod kube '.just/kube.just'
[doc('Sync Recipes')]
mod sync '.just/sync.just'
[doc('Talos Recipes')]
mod talos '.just/talos.just'
[doc('Volsync')]
mod volsync '.just/volsync.just'
[private]
default:
just --list
[positional-arguments, private]
log lvl msg *args:
gum log -t rfc3339 -s -l "{{lvl}}" "{{msg}}" {{args}}