feat(vpn): add startup connectivity checks and PersistentKeepalive

Add check_vpn_connectivity() that runs once after wg0 comes up:
- Waits for handshake (up to 15s) and prints public key if missing
- Measures RX bytes before/after curl to detect server-side routing issues
- Tests DNS resolution and dumps resolv.conf on failure
- On failure prints exact server-side commands to fix (sysctl, iptables, wg)

Add PersistentKeepalive=25 to wg0.conf to keep NAT mappings alive.
This commit is contained in:
2026-05-17 18:40:24 +02:00
parent be87f2e230
commit 68c4335348
2 changed files with 89 additions and 1 deletions

View File

@@ -135,6 +135,11 @@ start_vpn() {
# We remove the auto-created default route afterwards and set our own.
wg setconf "$INTERFACE" <(grep -v -i '^\(Address\|DNS\|MTU\|PreUp\|PostUp\|PreDown\|PostDown\|SaveConfig\)' "$CONFIG_FILE")
# Log public key so it can be verified against the server's peer list
local PUBKEY
PUBKEY=$(wg show "$INTERFACE" public-key 2>/dev/null || echo "unknown")
echo "[vpn] Public key: ${PUBKEY}"
# Assign the address
ip -4 address add "$VPN_ADDRESS" dev "$INTERFACE"
@@ -196,6 +201,10 @@ start_vpn() {
fi
echo "[vpn] WireGuard interface ${INTERFACE} is up."
echo "[vpn] Routes:"
ip route show | sed 's/^/[vpn] /'
echo "[vpn] WireGuard status:"
wg show "$INTERFACE" 2>/dev/null | sed 's/^/[vpn] /'
}
# ──────────────────────────────────────────────
@@ -244,7 +253,12 @@ health_loop() {
fi
else
failures=$((failures + 1))
echo "[health] Ping failed ($failures/$max_failures)"
echo "[health] Check failed ($failures/$max_failures) — curl http://${CHECK_HOST} timed out"
# Dump WireGuard stats to show if handshake is stale and how much data flows
echo "[health] wg stats:"
wg show "$INTERFACE" 2>/dev/null | grep -E 'latest handshake|transfer|endpoint' | sed 's/^/[health] /' || echo "[health] wg0 not found"
echo "[health] routes:"
ip route show | grep -E 'wg0|default' | sed 's/^/[health] /'
if [ "$failures" -ge "$max_failures" ]; then
echo "[health] VPN appears down. Restarting WireGuard..."
@@ -273,8 +287,81 @@ cleanup() {
trap cleanup SIGTERM SIGINT
# ──────────────────────────────────────────────
# Startup connectivity checks — diagnose issues early
# ──────────────────────────────────────────────
check_vpn_connectivity() {
echo "[check] ── Startup connectivity checks ──"
# 1. Wait for WireGuard handshake (up to 15s)
local elapsed=0
local handshake_ts=0
echo "[check] Waiting for WireGuard handshake (up to 15s)..."
while [ "$elapsed" -lt 15 ]; do
handshake_ts=$(wg show "$INTERFACE" latest-handshakes 2>/dev/null | awk '{print $2}' | head -1)
if [ -n "$handshake_ts" ] && [ "$handshake_ts" != "0" ]; then
local age=$(( $(date +%s) - handshake_ts ))
echo "[check] OK Handshake established ${age}s ago"
break
fi
sleep 1
elapsed=$((elapsed + 1))
done
if [ "$elapsed" -ge 15 ]; then
echo "[check] FAIL No WireGuard handshake after 15s — tunnel is not up"
echo "[check] This container's public key (must be on the server):"
echo "[check] PublicKey = $(wg show "$INTERFACE" public-key 2>/dev/null || echo 'unknown')"
echo "[check] AllowedIPs = ${VPN_ADDRESS}"
echo "[check] Verify on server: wg show"
fi
# 2. Check whether traffic actually flows through the tunnel
echo "[check] Testing traffic through tunnel (http://${CHECK_HOST})..."
local rx_before
rx_before=$(wg show "$INTERFACE" transfer 2>/dev/null | awk '{print $2}' | head -1)
if curl -sf --max-time 8 "http://${CHECK_HOST}" > /dev/null 2>&1; then
echo "[check] OK Traffic flows — tunnel is fully working"
else
local rx_after
rx_after=$(wg show "$INTERFACE" transfer 2>/dev/null | awk '{print $2}' | head -1)
echo "[check] FAIL http://${CHECK_HOST} unreachable through tunnel"
if [ -n "$rx_before" ] && [ -n "$rx_after" ]; then
if [ "$rx_after" -le "$rx_before" ]; then
echo "[check] RX bytes unchanged (${rx_before}${rx_after})"
echo "[check] Server receives packets but does NOT route them back"
echo "[check] Fix on VPN server (${VPN_ENDPOINT}):"
echo "[check] sysctl net.ipv4.ip_forward # must output 1"
echo "[check] iptables -t nat -L POSTROUTING -v -n # must have MASQUERADE"
echo "[check] wg show # check peer + AllowedIPs"
else
echo "[check] RX increased (${rx_before}${rx_after}) — tunnel passes data"
echo "[check] Issue may be specific to ${CHECK_HOST} or DNS"
fi
fi
local transfer
transfer=$(wg show "$INTERFACE" transfer 2>/dev/null | awk '{printf "rx=%s tx=%s", $2, $3}')
echo "[check] wg transfer: ${transfer}"
fi
# 3. DNS check
echo "[check] Testing DNS resolution..."
if nslookup 1.1.1.1 > /dev/null 2>&1 || nslookup google.com > /dev/null 2>&1; then
echo "[check] OK DNS resolves"
else
echo "[check] FAIL DNS resolution failed"
echo "[check] resolv.conf: $(tr '\n' ' ' < /etc/resolv.conf)"
echo "[check] Check that DNS servers are reachable through wg0"
fi
echo "[check] ── End of checks ──"
}
# ── Main ──
enable_forwarding
setup_killswitch
start_vpn
check_vpn_connectivity
health_loop

View File

@@ -13,4 +13,5 @@ PostDown = ip route del 192.168.178.0/24 via 192.168.178.1 dev wlp4s0f0
PublicKey = KgTUh3KLijVluDvNpzDCJJfrJ7EyLzYLmdHCksG4sRg=
AllowedIPs = 0.0.0.0/0
Endpoint = 91.148.236.64:51820
PersistentKeepalive = 25