Add bind-mount liveness guard to cron container

DokPloy rm-rf's the host code dir on every redeploy, orphaning the
long-running cron container's ./content and ./.git bind mounts (pinned to
the now-deleted inode). The wiki export then spins forever in fs.mkdir
against the dead directory and never commits.

mount-guard.sh detects a stale mount (directory hard-link count < 2 = a
deleted inode) and re-binds it via docker restart, run every 15 min from
cron and once at the top of export-content-cron.sh as a fast-fail.
This commit is contained in:
Jennie Robinson Faber 2026-05-24 15:28:45 +01:00
parent 9ed1518d83
commit 15a5c49324
3 changed files with 48 additions and 0 deletions

View file

@ -4,3 +4,7 @@
# Wiki content export to git — daily at 4 AM UTC # Wiki content export to git — daily at 4 AM UTC
0 4 * * * . /etc/environment.sh; /app/scripts/export-content-cron.sh >> /var/log/wiki-export.log 2>&1 0 4 * * * . /etc/environment.sh; /app/scripts/export-content-cron.sh >> /var/log/wiki-export.log 2>&1
# Bind-mount liveness guard — re-binds ./content + ./.git if a DokPloy redeploy
# orphaned them (every 15 min, offset off :00 to avoid racing the jobs above).
7,22,37,52 * * * * . /etc/environment.sh; /app/scripts/mount-guard.sh >> /var/log/mount-guard.log 2>&1

View file

@ -7,6 +7,12 @@ set -euo pipefail
REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)" REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"
cd "$REPO_DIR" cd "$REPO_DIR"
# Bail out (and self-heal) if a DokPloy redeploy left our bind mounts pinned to
# a deleted inode — otherwise the fs.mkdir in export-content.js spins forever.
# mount-guard.sh restarts the container to re-bind; this run is then aborted and
# the next scheduled export runs against fresh mounts.
"$(dirname "$0")/mount-guard.sh"
# Source env vars from outline.env if it exists (for host-based cron), # Source env vars from outline.env if it exists (for host-based cron),
# otherwise rely on env vars from docker-compose env_file # otherwise rely on env vars from docker-compose env_file
if [[ -f outline.env ]]; then if [[ -f outline.env ]]; then

38
scripts/mount-guard.sh Executable file
View file

@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail
# =============================================================================
# Bind-mount liveness guard for the cron container.
#
# DokPloy rm-rf's the host code dir on every redeploy, replacing the inode
# behind ./content and ./.git. This long-running container keeps its bind mount
# pinned to the now-deleted inode (mountinfo shows ".../content//deleted"), and
# operations on it break — notably fs.mkdir(recursive) in the export spins
# forever on the orphaned directory, never reaching the commit/push.
#
# Detection: a live directory always has >= 2 hard links (itself + its parent);
# a deleted/unlinked inode reports 0. So a link count < 2 means the mount is
# stale. Re-binding requires a container restart — restart re-resolves the bind
# source to the current host inode (verified); a plain remount from inside the
# namespace is not possible.
#
# Run periodically from cron, and once at the top of export-content-cron.sh.
# =============================================================================
is_stale() {
local dir="$1" links
links=$(stat -c %h "$dir" 2>/dev/null || echo 0)
[ "$links" -lt 2 ]
}
for dir in /app/content /app/.git; do
if is_stale "$dir"; then
echo "$(date -u +%FT%TZ) STALE mount at $dir (hard-link count < 2) — restarting container to re-bind"
docker restart "$(cat /etc/hostname)"
# Reached only if the restart did not take effect (e.g. socket error).
# Sleep so a successful restart kills us here rather than falling through.
sleep 10
echo "$(date -u +%FT%TZ) ERROR: restart did not take effect for $dir" >&2
exit 1
fi
done