feat(vpn): add startup connectivity checks and PersistentKeepalive
Add check_vpn_connectivity() that runs once after wg0 comes up: - Waits for handshake (up to 15s) and prints public key if missing - Measures RX bytes before/after curl to detect server-side routing issues - Tests DNS resolution and dumps resolv.conf on failure - On failure prints exact server-side commands to fix (sysctl, iptables, wg) Add PersistentKeepalive=25 to wg0.conf to keep NAT mappings alive.
This commit is contained in:
@@ -135,6 +135,11 @@ start_vpn() {
|
||||
# We remove the auto-created default route afterwards and set our own.
|
||||
wg setconf "$INTERFACE" <(grep -v -i '^\(Address\|DNS\|MTU\|PreUp\|PostUp\|PreDown\|PostDown\|SaveConfig\)' "$CONFIG_FILE")
|
||||
|
||||
# Log public key so it can be verified against the server's peer list
|
||||
local PUBKEY
|
||||
PUBKEY=$(wg show "$INTERFACE" public-key 2>/dev/null || echo "unknown")
|
||||
echo "[vpn] Public key: ${PUBKEY}"
|
||||
|
||||
# Assign the address
|
||||
ip -4 address add "$VPN_ADDRESS" dev "$INTERFACE"
|
||||
|
||||
@@ -196,6 +201,10 @@ start_vpn() {
|
||||
fi
|
||||
|
||||
echo "[vpn] WireGuard interface ${INTERFACE} is up."
|
||||
echo "[vpn] Routes:"
|
||||
ip route show | sed 's/^/[vpn] /'
|
||||
echo "[vpn] WireGuard status:"
|
||||
wg show "$INTERFACE" 2>/dev/null | sed 's/^/[vpn] /'
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
@@ -244,7 +253,12 @@ health_loop() {
|
||||
fi
|
||||
else
|
||||
failures=$((failures + 1))
|
||||
echo "[health] Ping failed ($failures/$max_failures)"
|
||||
echo "[health] Check failed ($failures/$max_failures) — curl http://${CHECK_HOST} timed out"
|
||||
# Dump WireGuard stats to show if handshake is stale and how much data flows
|
||||
echo "[health] wg stats:"
|
||||
wg show "$INTERFACE" 2>/dev/null | grep -E 'latest handshake|transfer|endpoint' | sed 's/^/[health] /' || echo "[health] wg0 not found"
|
||||
echo "[health] routes:"
|
||||
ip route show | grep -E 'wg0|default' | sed 's/^/[health] /'
|
||||
|
||||
if [ "$failures" -ge "$max_failures" ]; then
|
||||
echo "[health] VPN appears down. Restarting WireGuard..."
|
||||
@@ -273,8 +287,81 @@ cleanup() {
|
||||
|
||||
trap cleanup SIGTERM SIGINT
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Startup connectivity checks — diagnose issues early
|
||||
# ──────────────────────────────────────────────
|
||||
check_vpn_connectivity() {
|
||||
echo "[check] ── Startup connectivity checks ──"
|
||||
|
||||
# 1. Wait for WireGuard handshake (up to 15s)
|
||||
local elapsed=0
|
||||
local handshake_ts=0
|
||||
echo "[check] Waiting for WireGuard handshake (up to 15s)..."
|
||||
while [ "$elapsed" -lt 15 ]; do
|
||||
handshake_ts=$(wg show "$INTERFACE" latest-handshakes 2>/dev/null | awk '{print $2}' | head -1)
|
||||
if [ -n "$handshake_ts" ] && [ "$handshake_ts" != "0" ]; then
|
||||
local age=$(( $(date +%s) - handshake_ts ))
|
||||
echo "[check] OK Handshake established ${age}s ago"
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
elapsed=$((elapsed + 1))
|
||||
done
|
||||
if [ "$elapsed" -ge 15 ]; then
|
||||
echo "[check] FAIL No WireGuard handshake after 15s — tunnel is not up"
|
||||
echo "[check] This container's public key (must be on the server):"
|
||||
echo "[check] PublicKey = $(wg show "$INTERFACE" public-key 2>/dev/null || echo 'unknown')"
|
||||
echo "[check] AllowedIPs = ${VPN_ADDRESS}"
|
||||
echo "[check] Verify on server: wg show"
|
||||
fi
|
||||
|
||||
# 2. Check whether traffic actually flows through the tunnel
|
||||
echo "[check] Testing traffic through tunnel (http://${CHECK_HOST})..."
|
||||
local rx_before
|
||||
rx_before=$(wg show "$INTERFACE" transfer 2>/dev/null | awk '{print $2}' | head -1)
|
||||
|
||||
if curl -sf --max-time 8 "http://${CHECK_HOST}" > /dev/null 2>&1; then
|
||||
echo "[check] OK Traffic flows — tunnel is fully working"
|
||||
else
|
||||
local rx_after
|
||||
rx_after=$(wg show "$INTERFACE" transfer 2>/dev/null | awk '{print $2}' | head -1)
|
||||
echo "[check] FAIL http://${CHECK_HOST} unreachable through tunnel"
|
||||
|
||||
if [ -n "$rx_before" ] && [ -n "$rx_after" ]; then
|
||||
if [ "$rx_after" -le "$rx_before" ]; then
|
||||
echo "[check] RX bytes unchanged (${rx_before} → ${rx_after})"
|
||||
echo "[check] Server receives packets but does NOT route them back"
|
||||
echo "[check] Fix on VPN server (${VPN_ENDPOINT}):"
|
||||
echo "[check] sysctl net.ipv4.ip_forward # must output 1"
|
||||
echo "[check] iptables -t nat -L POSTROUTING -v -n # must have MASQUERADE"
|
||||
echo "[check] wg show # check peer + AllowedIPs"
|
||||
else
|
||||
echo "[check] RX increased (${rx_before} → ${rx_after}) — tunnel passes data"
|
||||
echo "[check] Issue may be specific to ${CHECK_HOST} or DNS"
|
||||
fi
|
||||
fi
|
||||
|
||||
local transfer
|
||||
transfer=$(wg show "$INTERFACE" transfer 2>/dev/null | awk '{printf "rx=%s tx=%s", $2, $3}')
|
||||
echo "[check] wg transfer: ${transfer}"
|
||||
fi
|
||||
|
||||
# 3. DNS check
|
||||
echo "[check] Testing DNS resolution..."
|
||||
if nslookup 1.1.1.1 > /dev/null 2>&1 || nslookup google.com > /dev/null 2>&1; then
|
||||
echo "[check] OK DNS resolves"
|
||||
else
|
||||
echo "[check] FAIL DNS resolution failed"
|
||||
echo "[check] resolv.conf: $(tr '\n' ' ' < /etc/resolv.conf)"
|
||||
echo "[check] Check that DNS servers are reachable through wg0"
|
||||
fi
|
||||
|
||||
echo "[check] ── End of checks ──"
|
||||
}
|
||||
|
||||
# ── Main ──
|
||||
enable_forwarding
|
||||
setup_killswitch
|
||||
start_vpn
|
||||
check_vpn_connectivity
|
||||
health_loop
|
||||
|
||||
@@ -13,4 +13,5 @@ PostDown = ip route del 192.168.178.0/24 via 192.168.178.1 dev wlp4s0f0
|
||||
PublicKey = KgTUh3KLijVluDvNpzDCJJfrJ7EyLzYLmdHCksG4sRg=
|
||||
AllowedIPs = 0.0.0.0/0
|
||||
Endpoint = 91.148.236.64:51820
|
||||
PersistentKeepalive = 25
|
||||
|
||||
|
||||
Reference in New Issue
Block a user