DokPloy rm-rf's the host code dir on every redeploy, orphaning the long-running cron container's ./content and ./.git bind mounts (pinned to the now-deleted inode). The wiki export then spins forever in fs.mkdir against the dead directory and never commits. mount-guard.sh detects a stale mount (directory hard-link count < 2 = a deleted inode) and re-binds it via docker restart, run every 15 min from cron and once at the top of export-content-cron.sh as a fast-fail.
38 lines
1.6 KiB
Bash
Executable file
38 lines
1.6 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# =============================================================================
|
|
# Bind-mount liveness guard for the cron container.
|
|
#
|
|
# DokPloy rm-rf's the host code dir on every redeploy, replacing the inode
|
|
# behind ./content and ./.git. This long-running container keeps its bind mount
|
|
# pinned to the now-deleted inode (mountinfo shows ".../content//deleted"), and
|
|
# operations on it break — notably fs.mkdir(recursive) in the export spins
|
|
# forever on the orphaned directory, never reaching the commit/push.
|
|
#
|
|
# Detection: a live directory always has >= 2 hard links (itself + its parent);
|
|
# a deleted/unlinked inode reports 0. So a link count < 2 means the mount is
|
|
# stale. Re-binding requires a container restart — restart re-resolves the bind
|
|
# source to the current host inode (verified); a plain remount from inside the
|
|
# namespace is not possible.
|
|
#
|
|
# Run periodically from cron, and once at the top of export-content-cron.sh.
|
|
# =============================================================================
|
|
|
|
is_stale() {
|
|
local dir="$1" links
|
|
links=$(stat -c %h "$dir" 2>/dev/null || echo 0)
|
|
[ "$links" -lt 2 ]
|
|
}
|
|
|
|
for dir in /app/content /app/.git; do
|
|
if is_stale "$dir"; then
|
|
echo "$(date -u +%FT%TZ) STALE mount at $dir (hard-link count < 2) — restarting container to re-bind"
|
|
docker restart "$(cat /etc/hostname)"
|
|
# Reached only if the restart did not take effect (e.g. socket error).
|
|
# Sleep so a successful restart kills us here rather than falling through.
|
|
sleep 10
|
|
echo "$(date -u +%FT%TZ) ERROR: restart did not take effect for $dir" >&2
|
|
exit 1
|
|
fi
|
|
done
|