From 681d1cebb9a05d3ff0b61cb6110569502aa9076c Mon Sep 17 00:00:00 2001 From: Nicolas Cantu Date: Tue, 6 Jan 2026 14:40:31 +0100 Subject: [PATCH] Detect and handle ControlSocket errors during SSH execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Motivations:** - ControlSocket errors still occurring during command execution - Socket can become invalid DURING execution of long commands - Need to detect socket errors in command output and clean up immediately **Root causes:** - Socket validation before command execution cannot detect if socket dies during execution - Long commands (npm install, build) can cause connection to die mid-execution - Next command finds invalid socket and SSH disables multiplexing but leaves socket - Previous cleanup only happened before execution, not after detecting errors **Correctifs:** - Capture SSH command output to detect 'ControlSocket already exists' errors - If socket error detected, immediately cleanup and retry once - This is a specific retry for socket errors only, not a general retry mechanism - Ensures dead sockets are cleaned up even if they die during command execution **Evolutions:** - Better handling of socket invalidation during long-running commands - Automatic recovery from socket errors detected during execution **Pages affectées:** - deploy.sh: Enhanced ssh_exec() to detect and handle socket errors in output --- deploy.sh | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/deploy.sh b/deploy.sh index 944d9a4..ec4a601 100644 --- a/deploy.sh +++ b/deploy.sh @@ -45,6 +45,8 @@ check_ssh_connection() { ssh_exec() { # Toujours vérifier et nettoyer le socket avant chaque commande # pour éviter les sockets morts qui causent "ControlSocket already exists, disabling multiplexing" + # Le socket peut devenir invalide pendant l'exécution d'une commande précédente, + # donc on vérifie systématiquement avant chaque nouvelle commande if [ -S "${SSH_CONTROL_PATH}" ]; then if ! check_ssh_connection; then # Connexion morte, nettoyer avant d'exécuter @@ -53,15 +55,34 @@ ssh_exec() { fi # Exécuter la commande SSH (une seule tentative, pas de retry) - # ControlMaster=auto va créer une nouvelle connexion si le socket n'existe pas, - # ou réutiliser s'il est valide - ssh -o ControlMaster=auto \ + # Capture stderr pour détecter les erreurs de socket + local ssh_output + ssh_output=$(ssh -o ControlMaster=auto \ -o ControlPath="${SSH_CONTROL_PATH}" \ -o ControlPersist=300 \ -o ConnectTimeout=10 \ -o ServerAliveInterval=60 \ -o ServerAliveCountMax=3 \ - ${SERVER} "$@" 2>&1 + ${SERVER} "$@" 2>&1) + local ssh_exit_code=$? + + # Si on détecte une erreur de socket, nettoyer et réessayer une fois + if echo "$ssh_output" | grep -q "ControlSocket.*already exists"; then + cleanup_dead_ssh + # Réessayer une fois après nettoyage + ssh -o ControlMaster=auto \ + -o ControlPath="${SSH_CONTROL_PATH}" \ + -o ControlPersist=300 \ + -o ConnectTimeout=10 \ + -o ServerAliveInterval=60 \ + -o ServerAliveCountMax=3 \ + ${SERVER} "$@" 2>&1 + return $? + fi + + # Afficher la sortie et retourner le code de sortie + echo "$ssh_output" + return $ssh_exit_code } # Nettoyer les connexions SSH persistantes et le répertoire temporaire à la fin