309 lines
10 KiB
Bash
Executable File
309 lines
10 KiB
Bash
Executable File
#!/bin/bash
|
|
# LeCoffre Node - Script de vérification de santé pour la production
|
|
# Vérifie l'état complet du système et génère un rapport détaillé
|
|
# Utilisé pour les déploiements en production et la maintenance
|
|
|
|
set -e
|
|
|
|
# Couleurs pour l'affichage
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Configuration
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
REPORT_DIR="./reports"
|
|
REPORT_FILE="$REPORT_DIR/production-health-report-${TIMESTAMP}.md"
|
|
|
|
# Fonction pour afficher un message avec timestamp
|
|
print_message() {
|
|
echo -e "${BLUE}[$(date '+%H:%M:%S')]${NC} $1"
|
|
}
|
|
|
|
# Fonction pour initialiser le rapport
|
|
init_report() {
|
|
mkdir -p "$REPORT_DIR"
|
|
|
|
cat > "$REPORT_FILE" << EOF
|
|
# Rapport de Santé Production - LeCoffre Node
|
|
|
|
**Date**: $(date '+%Y-%m-%d %H:%M:%S')
|
|
**Timestamp**: $TIMESTAMP
|
|
**Environnement**: Production
|
|
**Serveur**: $(hostname)
|
|
|
|
---
|
|
|
|
## Résumé Exécutif
|
|
|
|
EOF
|
|
}
|
|
|
|
# Fonction pour ajouter une section au rapport
|
|
add_section() {
|
|
local section_title="$1"
|
|
echo "" >> "$REPORT_FILE"
|
|
echo "## $section_title" >> "$REPORT_FILE"
|
|
echo "" >> "$REPORT_FILE"
|
|
}
|
|
|
|
# Fonction pour ajouter du contenu au rapport
|
|
add_content() {
|
|
echo "$1" >> "$REPORT_FILE"
|
|
}
|
|
|
|
# Fonction pour exécuter le test d'URLs et capturer les résultats
|
|
run_url_tests() {
|
|
local test_type="$1"
|
|
local output_file="$REPORT_DIR/url-tests-${test_type}-${TIMESTAMP}.log"
|
|
|
|
print_message "Exécution des tests d'URLs ($test_type)..."
|
|
|
|
if [ -f "./scripts/url-health-check.sh" ]; then
|
|
./scripts/url-health-check.sh > "$output_file" 2>&1
|
|
local exit_code=$?
|
|
|
|
# Extraire les statistiques du log
|
|
local total_urls=$(grep "Total URLs testées:" "$output_file" | grep -o '[0-9]\+' || echo "0")
|
|
local accessible_urls=$(grep "URLs accessibles:" "$output_file" | grep -o '[0-9]\+' || echo "0")
|
|
local failed_urls=$(grep "URLs échouées:" "$output_file" | grep -o '[0-9]\+' || echo "0")
|
|
|
|
add_content "### Tests d'URLs ($test_type)"
|
|
add_content ""
|
|
add_content "| Métrique | Valeur |"
|
|
add_content "|----------|--------|"
|
|
add_content "| Total URLs testées | $total_urls |"
|
|
add_content "| URLs accessibles | $accessible_urls |"
|
|
add_content "| URLs échouées | $failed_urls |"
|
|
add_content "| Taux de réussite | $(( accessible_urls * 100 / (total_urls > 0 ? total_urls : 1) ))% |"
|
|
add_content ""
|
|
|
|
if [ $exit_code -eq 0 ]; then
|
|
add_content "✅ **Statut**: Toutes les URLs sont accessibles"
|
|
elif [ $exit_code -eq 1 ]; then
|
|
add_content "⚠️ **Statut**: Certaines URLs ne sont pas accessibles"
|
|
else
|
|
add_content "❌ **Statut**: Trop d'URLs ne sont pas accessibles"
|
|
fi
|
|
|
|
add_content ""
|
|
add_content "**Log détaillé**: \`$output_file\`"
|
|
add_content ""
|
|
|
|
return $exit_code
|
|
else
|
|
add_content "### Tests d'URLs ($test_type)"
|
|
add_content ""
|
|
add_content "❌ **Erreur**: Script url-health-check.sh non trouvé"
|
|
add_content ""
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Fonction pour vérifier l'état des services Docker
|
|
check_docker_services() {
|
|
add_section "État des Services Docker"
|
|
|
|
print_message "Vérification des services Docker..."
|
|
|
|
# Liste des services à vérifier
|
|
local services=(
|
|
"tor-proxy:Tor Proxy"
|
|
"bitcoin-signet:Bitcoin Signet"
|
|
"blindbit-oracle:BlindBit Oracle"
|
|
"sdk_storage:SDK Storage"
|
|
"sdk_relay:SDK Relay"
|
|
"lecoffre-front:LeCoffre Frontend"
|
|
"ihm_client:IHM Client"
|
|
"grafana:Grafana"
|
|
"loki:Loki"
|
|
"promtail:Promtail"
|
|
"status-api:Status API"
|
|
"watchtower:Watchtower"
|
|
)
|
|
|
|
add_content "| Service | Container | Statut | Santé | Uptime |"
|
|
add_content "|---------|-----------|--------|-------|--------|"
|
|
|
|
for service_entry in "${services[@]}"; do
|
|
local service_name="${service_entry%%:*}"
|
|
local display_name="${service_entry##*:}"
|
|
|
|
if docker ps --format '{{.Names}}' | grep -q "^${service_name}$"; then
|
|
local running=$(docker inspect --format='{{.State.Running}}' "$service_name" 2>/dev/null || echo "false")
|
|
local health=$(docker inspect --format='{{.State.Health.Status}}' "$service_name" 2>/dev/null || echo "no-healthcheck")
|
|
local uptime=$(docker inspect --format='{{.State.StartedAt}}' "$service_name" 2>/dev/null || echo "unknown")
|
|
|
|
if [ "$running" = "true" ]; then
|
|
if [ "$health" = "healthy" ]; then
|
|
add_content "| $display_name | $service_name | 🟢 Running | ✅ Healthy | $uptime |"
|
|
else
|
|
add_content "| $display_name | $service_name | 🟢 Running | ⚠️ $health | $uptime |"
|
|
fi
|
|
else
|
|
add_content "| $display_name | $service_name | 🔴 Stopped | ❌ Unhealthy | $uptime |"
|
|
fi
|
|
else
|
|
add_content "| $display_name | $service_name | ❌ Not Found | ❌ N/A | N/A |"
|
|
fi
|
|
done
|
|
|
|
add_content ""
|
|
}
|
|
|
|
# Fonction pour vérifier l'espace disque
|
|
check_disk_space() {
|
|
add_section "Espace Disque"
|
|
|
|
print_message "Vérification de l'espace disque..."
|
|
|
|
add_content "| Partition | Taille | Utilisé | Disponible | Usage |"
|
|
add_content "|-----------|--------|---------|------------|-------|"
|
|
|
|
df -h | tail -n +2 | while read -r line; do
|
|
local partition=$(echo "$line" | awk '{print $1}')
|
|
local size=$(echo "$line" | awk '{print $2}')
|
|
local used=$(echo "$line" | awk '{print $3}')
|
|
local available=$(echo "$line" | awk '{print $4}')
|
|
local usage=$(echo "$line" | awk '{print $5}')
|
|
|
|
# Déterminer la couleur basée sur l'usage
|
|
local usage_num=$(echo "$usage" | sed 's/%//')
|
|
if [ "$usage_num" -gt 90 ]; then
|
|
local status="🔴"
|
|
elif [ "$usage_num" -gt 80 ]; then
|
|
local status="🟡"
|
|
else
|
|
local status="🟢"
|
|
fi
|
|
|
|
add_content "| $partition | $size | $used | $available | $status $usage |"
|
|
done
|
|
|
|
add_content ""
|
|
}
|
|
|
|
# Fonction pour vérifier la mémoire
|
|
check_memory() {
|
|
add_section "Mémoire Système"
|
|
|
|
print_message "Vérification de la mémoire..."
|
|
|
|
local mem_info=$(free -h)
|
|
local total=$(echo "$mem_info" | grep "Mem:" | awk '{print $2}')
|
|
local used=$(echo "$mem_info" | grep "Mem:" | awk '{print $3}')
|
|
local free=$(echo "$mem_info" | grep "Mem:" | awk '{print $4}')
|
|
local available=$(echo "$mem_info" | grep "Mem:" | awk '{print $7}')
|
|
|
|
add_content "| Type | Taille |"
|
|
add_content "|------|--------|"
|
|
add_content "| Total | $total |"
|
|
add_content "| Utilisé | $used |"
|
|
add_content "| Libre | $free |"
|
|
add_content "| Disponible | $available |"
|
|
add_content ""
|
|
}
|
|
|
|
# Fonction pour vérifier les volumes Docker
|
|
check_docker_volumes() {
|
|
add_section "Volumes Docker"
|
|
|
|
print_message "Vérification des volumes Docker..."
|
|
|
|
add_content "| Volume | Driver | Taille |"
|
|
add_content "|--------|--------|--------|"
|
|
|
|
docker volume ls --format "table {{.Name}}\t{{.Driver}}" | tail -n +2 | while read -r volume_name driver; do
|
|
if [[ "$volume_name" == *"4nk_node"* ]]; then
|
|
local size=$(docker system df -v 2>/dev/null | grep "$volume_name" | awk '{print $3}' || echo "N/A")
|
|
add_content "| $volume_name | $driver | $size |"
|
|
fi
|
|
done
|
|
|
|
add_content ""
|
|
}
|
|
|
|
# Fonction pour générer le résumé final
|
|
generate_summary() {
|
|
add_section "Résumé et Recommandations"
|
|
|
|
add_content "### Points d'Attention"
|
|
add_content ""
|
|
add_content "- Vérifiez les URLs échouées dans les logs détaillés"
|
|
add_content "- Surveillez l'espace disque des partitions critiques"
|
|
add_content "- Vérifiez l'état de santé des services Docker"
|
|
add_content "- Consultez les logs des services pour les erreurs"
|
|
add_content ""
|
|
|
|
add_content "### Actions Recommandées"
|
|
add_content ""
|
|
add_content "1. **Maintenance Préventive**"
|
|
add_content " - Nettoyer les logs anciens"
|
|
add_content " - Vérifier les certificats SSL"
|
|
add_content " - Mettre à jour les images Docker"
|
|
add_content ""
|
|
add_content "2. **Surveillance Continue**"
|
|
add_content " - Monitorer les métriques Grafana"
|
|
add_content " - Surveiller les alertes Loki"
|
|
add_content " - Vérifier les backups automatiques"
|
|
add_content ""
|
|
add_content "3. **Sécurité**"
|
|
add_content " - Vérifier les accès SSH"
|
|
add_content " - Contrôler les certificats SSL"
|
|
add_content " - Auditer les logs de sécurité"
|
|
add_content ""
|
|
|
|
add_content "---"
|
|
add_content ""
|
|
add_content "*Rapport généré automatiquement par LeCoffre Node Production Health Check*"
|
|
add_content "*Timestamp: $TIMESTAMP*"
|
|
}
|
|
|
|
# Fonction principale
|
|
main() {
|
|
echo -e "${BLUE}========================================${NC}"
|
|
echo -e "${BLUE} LeCoffre Node - Production Health Check${NC}"
|
|
echo -e "${BLUE}========================================${NC}"
|
|
echo
|
|
|
|
print_message "Initialisation du rapport..."
|
|
init_report
|
|
|
|
print_message "Vérification des services Docker..."
|
|
check_docker_services
|
|
|
|
print_message "Vérification de l'espace disque..."
|
|
check_disk_space
|
|
|
|
print_message "Vérification de la mémoire..."
|
|
check_memory
|
|
|
|
print_message "Vérification des volumes Docker..."
|
|
check_docker_volumes
|
|
|
|
print_message "Exécution des tests d'URLs..."
|
|
run_url_tests "production"
|
|
|
|
print_message "Génération du résumé..."
|
|
generate_summary
|
|
|
|
echo
|
|
echo -e "${GREEN}✅ Rapport de santé généré avec succès !${NC}"
|
|
echo -e "${GREEN}Rapport: $REPORT_FILE${NC}"
|
|
echo
|
|
echo -e "${BLUE}Fichiers générés:${NC}"
|
|
echo -e "${YELLOW} - $REPORT_FILE (rapport principal)${NC}"
|
|
echo -e "${YELLOW} - $REPORT_DIR/url-tests-production-${TIMESTAMP}.log (tests détaillés)${NC}"
|
|
echo
|
|
echo -e "${BLUE}Pour consulter le rapport:${NC}"
|
|
echo -e "${YELLOW} cat $REPORT_FILE${NC}"
|
|
echo -e "${YELLOW} less $REPORT_FILE${NC}"
|
|
echo
|
|
}
|
|
|
|
# Exécuter le script principal
|
|
main "$@"
|