Add bind-mount liveness guard to cron container
DokPloy rm-rf's the host code dir on every redeploy, orphaning the long-running cron container's ./content and ./.git bind mounts (pinned to the now-deleted inode). The wiki export then spins forever in fs.mkdir against the dead directory and never commits. mount-guard.sh detects a stale mount (directory hard-link count < 2 = a deleted inode) and re-binds it via docker restart, run every 15 min from cron and once at the top of export-content-cron.sh as a fast-fail.
This commit is contained in:
parent
9ed1518d83
commit
15a5c49324
3 changed files with 48 additions and 0 deletions
|
|
@ -4,3 +4,7 @@
|
||||||
# Wiki content export to git — daily at 4 AM UTC
|
# Wiki content export to git — daily at 4 AM UTC
|
||||||
0 4 * * * . /etc/environment.sh; /app/scripts/export-content-cron.sh >> /var/log/wiki-export.log 2>&1
|
0 4 * * * . /etc/environment.sh; /app/scripts/export-content-cron.sh >> /var/log/wiki-export.log 2>&1
|
||||||
|
|
||||||
|
# Bind-mount liveness guard — re-binds ./content + ./.git if a DokPloy redeploy
|
||||||
|
# orphaned them (every 15 min, offset off :00 to avoid racing the jobs above).
|
||||||
|
7,22,37,52 * * * * . /etc/environment.sh; /app/scripts/mount-guard.sh >> /var/log/mount-guard.log 2>&1
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,12 @@ set -euo pipefail
|
||||||
REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
cd "$REPO_DIR"
|
cd "$REPO_DIR"
|
||||||
|
|
||||||
|
# Bail out (and self-heal) if a DokPloy redeploy left our bind mounts pinned to
|
||||||
|
# a deleted inode — otherwise the fs.mkdir in export-content.js spins forever.
|
||||||
|
# mount-guard.sh restarts the container to re-bind; this run is then aborted and
|
||||||
|
# the next scheduled export runs against fresh mounts.
|
||||||
|
"$(dirname "$0")/mount-guard.sh"
|
||||||
|
|
||||||
# Source env vars from outline.env if it exists (for host-based cron),
|
# Source env vars from outline.env if it exists (for host-based cron),
|
||||||
# otherwise rely on env vars from docker-compose env_file
|
# otherwise rely on env vars from docker-compose env_file
|
||||||
if [[ -f outline.env ]]; then
|
if [[ -f outline.env ]]; then
|
||||||
|
|
|
||||||
38
scripts/mount-guard.sh
Executable file
38
scripts/mount-guard.sh
Executable file
|
|
@ -0,0 +1,38 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Bind-mount liveness guard for the cron container.
|
||||||
|
#
|
||||||
|
# DokPloy rm-rf's the host code dir on every redeploy, replacing the inode
|
||||||
|
# behind ./content and ./.git. This long-running container keeps its bind mount
|
||||||
|
# pinned to the now-deleted inode (mountinfo shows ".../content//deleted"), and
|
||||||
|
# operations on it break — notably fs.mkdir(recursive) in the export spins
|
||||||
|
# forever on the orphaned directory, never reaching the commit/push.
|
||||||
|
#
|
||||||
|
# Detection: a live directory always has >= 2 hard links (itself + its parent);
|
||||||
|
# a deleted/unlinked inode reports 0. So a link count < 2 means the mount is
|
||||||
|
# stale. Re-binding requires a container restart — restart re-resolves the bind
|
||||||
|
# source to the current host inode (verified); a plain remount from inside the
|
||||||
|
# namespace is not possible.
|
||||||
|
#
|
||||||
|
# Run periodically from cron, and once at the top of export-content-cron.sh.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
is_stale() {
|
||||||
|
local dir="$1" links
|
||||||
|
links=$(stat -c %h "$dir" 2>/dev/null || echo 0)
|
||||||
|
[ "$links" -lt 2 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
for dir in /app/content /app/.git; do
|
||||||
|
if is_stale "$dir"; then
|
||||||
|
echo "$(date -u +%FT%TZ) STALE mount at $dir (hard-link count < 2) — restarting container to re-bind"
|
||||||
|
docker restart "$(cat /etc/hostname)"
|
||||||
|
# Reached only if the restart did not take effect (e.g. socket error).
|
||||||
|
# Sleep so a successful restart kills us here rather than falling through.
|
||||||
|
sleep 10
|
||||||
|
echo "$(date -u +%FT%TZ) ERROR: restart did not take effect for $dir" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
Loading…
Add table
Add a link
Reference in a new issue