From 15a5c493246d3d2ae1b5baf7a1ab0639c13eae19 Mon Sep 17 00:00:00 2001 From: Jennie Robinson Faber Date: Sun, 24 May 2026 15:28:45 +0100 Subject: [PATCH] Add bind-mount liveness guard to cron container DokPloy rm-rf's the host code dir on every redeploy, orphaning the long-running cron container's ./content and ./.git bind mounts (pinned to the now-deleted inode). The wiki export then spins forever in fs.mkdir against the dead directory and never commits. mount-guard.sh detects a stale mount (directory hard-link count < 2 = a deleted inode) and re-binds it via docker restart, run every 15 min from cron and once at the top of export-content-cron.sh as a fast-fail. --- cron/crontab | 4 ++++ scripts/export-content-cron.sh | 6 ++++++ scripts/mount-guard.sh | 38 ++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100755 scripts/mount-guard.sh diff --git a/cron/crontab b/cron/crontab index 758c67d..0e00210 100644 --- a/cron/crontab +++ b/cron/crontab @@ -4,3 +4,7 @@ # Wiki content export to git — daily at 4 AM UTC 0 4 * * * . /etc/environment.sh; /app/scripts/export-content-cron.sh >> /var/log/wiki-export.log 2>&1 +# Bind-mount liveness guard — re-binds ./content + ./.git if a DokPloy redeploy +# orphaned them (every 15 min, offset off :00 to avoid racing the jobs above). +7,22,37,52 * * * * . /etc/environment.sh; /app/scripts/mount-guard.sh >> /var/log/mount-guard.log 2>&1 + diff --git a/scripts/export-content-cron.sh b/scripts/export-content-cron.sh index 74491d7..44e4d38 100755 --- a/scripts/export-content-cron.sh +++ b/scripts/export-content-cron.sh @@ -7,6 +7,12 @@ set -euo pipefail REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)" cd "$REPO_DIR" +# Bail out (and self-heal) if a DokPloy redeploy left our bind mounts pinned to +# a deleted inode — otherwise the fs.mkdir in export-content.js spins forever. +# mount-guard.sh restarts the container to re-bind; this run is then aborted and +# the next scheduled export runs against fresh mounts. +"$(dirname "$0")/mount-guard.sh" + # Source env vars from outline.env if it exists (for host-based cron), # otherwise rely on env vars from docker-compose env_file if [[ -f outline.env ]]; then diff --git a/scripts/mount-guard.sh b/scripts/mount-guard.sh new file mode 100755 index 0000000..3b7ca65 --- /dev/null +++ b/scripts/mount-guard.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ============================================================================= +# Bind-mount liveness guard for the cron container. +# +# DokPloy rm-rf's the host code dir on every redeploy, replacing the inode +# behind ./content and ./.git. This long-running container keeps its bind mount +# pinned to the now-deleted inode (mountinfo shows ".../content//deleted"), and +# operations on it break — notably fs.mkdir(recursive) in the export spins +# forever on the orphaned directory, never reaching the commit/push. +# +# Detection: a live directory always has >= 2 hard links (itself + its parent); +# a deleted/unlinked inode reports 0. So a link count < 2 means the mount is +# stale. Re-binding requires a container restart — restart re-resolves the bind +# source to the current host inode (verified); a plain remount from inside the +# namespace is not possible. +# +# Run periodically from cron, and once at the top of export-content-cron.sh. +# ============================================================================= + +is_stale() { + local dir="$1" links + links=$(stat -c %h "$dir" 2>/dev/null || echo 0) + [ "$links" -lt 2 ] +} + +for dir in /app/content /app/.git; do + if is_stale "$dir"; then + echo "$(date -u +%FT%TZ) STALE mount at $dir (hard-link count < 2) — restarting container to re-bind" + docker restart "$(cat /etc/hostname)" + # Reached only if the restart did not take effect (e.g. socket error). + # Sleep so a successful restart kills us here rather than falling through. + sleep 10 + echo "$(date -u +%FT%TZ) ERROR: restart did not take effect for $dir" >&2 + exit 1 + fi +done