feat: Ajout du système de monitoring centralisé
- Nouveau module Grafana central partagé - Installation de Loki pour la collecte de logs - Installation de Prometheus pour les métriques - Installation de Promtail pour la collecte de logs - Configuration DNS pour tous les services de monitoring - Documentation complète du monitoring dans docs/MONITORING.md - Correction de la résolution DNS pour Bitcoin - Suppression de log-monitoring.yml obsolète - Mise à jour du CHANGELOG et VERSION Services ajoutés: - grafana-central.local:3000 (tableau de bord) - loki.local:3100 (logs) - prometheus.local:9091 (métriques) - promtail.local (collecte de logs)
This commit is contained in:
parent
fc5143cf47
commit
f7e110446b
22
CHANGELOG.md
22
CHANGELOG.md
@ -1,6 +1,26 @@
|
|||||||
## Changed (2025-09-10)
|
## Changed (2025-09-11)
|
||||||
# Changelog - 4NK Node
|
# Changelog - 4NK Node
|
||||||
|
|
||||||
|
## [1.1.4-wip.2] - 2025-09-11
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- **Monitoring centralisé** : Nouveau module Grafana central partagé
|
||||||
|
- **Loki** : Système de collecte et d'indexation des logs
|
||||||
|
- **Prometheus** : Collecte et stockage des métriques de performance
|
||||||
|
- **Promtail** : Agent de collecte des logs pour tous les services
|
||||||
|
- **Documentation monitoring** : Guide complet dans `docs/MONITORING.md`
|
||||||
|
- **Configuration DNS** : Ajout des entrées pour les services de monitoring
|
||||||
|
- **Dashboards 4NK** : Tableaux de bord pour surveillance des services
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- **Architecture monitoring** : Remplacement de `log-monitoring.yml` par un système centralisé
|
||||||
|
- **Grafana** : Migration vers un Grafana central partagé (`grafana-central.local:3000`)
|
||||||
|
- **Configuration Bitcoin** : Correction de la résolution DNS pour `bitcoin.local`
|
||||||
|
- **Port Prometheus** : Changement du port 9090 vers 9091 pour éviter les conflits
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- **log-monitoring.yml** : Fichier obsolète remplacé par le système centralisé
|
||||||
|
|
||||||
## [1.1.4-wip.1] - 2025-09-10
|
## [1.1.4-wip.1] - 2025-09-10
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
@ -26,6 +26,8 @@ services:
|
|||||||
- ./modules/bitcoin/data:/home/bitcoin/.bitcoin
|
- ./modules/bitcoin/data:/home/bitcoin/.bitcoin
|
||||||
- ./modules/bitcoin/conf/bitcoin.conf:/home/bitcoin/.bitcoin/bitcoin.conf
|
- ./modules/bitcoin/conf/bitcoin.conf:/home/bitcoin/.bitcoin/bitcoin.conf
|
||||||
- ./modules/bitcoin/logs:/home/bitcoin/.bitcoin/logs
|
- ./modules/bitcoin/logs:/home/bitcoin/.bitcoin/logs
|
||||||
|
dns:
|
||||||
|
- 172.17.0.1 # Gateway Docker pour accéder à dnsmasq
|
||||||
networks:
|
networks:
|
||||||
4nk_network:
|
4nk_network:
|
||||||
ipv4_address: 172.20.0.11
|
ipv4_address: 172.20.0.11
|
||||||
@ -276,7 +278,95 @@ services:
|
|||||||
ipv4_address: 172.20.0.33
|
ipv4_address: 172.20.0.33
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
loki.local:
|
||||||
|
image: grafana/loki:2.9.0
|
||||||
|
container_name: loki.local
|
||||||
|
hostname: loki.local
|
||||||
|
ports:
|
||||||
|
- "3100:3100"
|
||||||
|
command: -config.file=/etc/loki/local-config.yaml
|
||||||
|
volumes:
|
||||||
|
- ./modules/grafana-central/conf/loki-config.yaml:/etc/loki/local-config.yaml:ro
|
||||||
|
- loki_data:/loki
|
||||||
|
networks:
|
||||||
|
4nk_network:
|
||||||
|
ipv4_address: 172.20.0.51
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
prometheus.local:
|
||||||
|
image: prom/prometheus:v2.54.1
|
||||||
|
container_name: prometheus.local
|
||||||
|
hostname: prometheus.local
|
||||||
|
ports:
|
||||||
|
- "9091:9090"
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.path=/prometheus'
|
||||||
|
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
||||||
|
- '--web.console.templates=/etc/prometheus/consoles'
|
||||||
|
- '--storage.tsdb.retention.time=200h'
|
||||||
|
- '--web.enable-lifecycle'
|
||||||
|
volumes:
|
||||||
|
- ./modules/grafana-central/conf/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
- prometheus_data:/prometheus
|
||||||
|
networks:
|
||||||
|
4nk_network:
|
||||||
|
ipv4_address: 172.20.0.52
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
promtail.local:
|
||||||
|
image: grafana/promtail:2.9.0
|
||||||
|
container_name: promtail.local
|
||||||
|
hostname: promtail.local
|
||||||
|
command: -config.file=/etc/promtail/config.yml
|
||||||
|
volumes:
|
||||||
|
- ./modules/grafana-central/conf/promtail-config.yml:/etc/promtail/config.yml:ro
|
||||||
|
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
||||||
|
- /var/log/docker:/var/log/docker:ro
|
||||||
|
- ./modules:/workspace/modules:ro
|
||||||
|
- ./projects:/workspace/projects:ro
|
||||||
|
- ./modules/grafana-central/logs:/workspace/logs:ro
|
||||||
|
networks:
|
||||||
|
4nk_network:
|
||||||
|
ipv4_address: 172.20.0.53
|
||||||
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- loki.local
|
||||||
|
|
||||||
|
grafana-central.local:
|
||||||
|
image: grafana/grafana:10.0.0
|
||||||
|
container_name: grafana-central.local
|
||||||
|
hostname: grafana-central.local
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||||
|
- GF_USERS_ALLOW_SIGN_UP=false
|
||||||
|
- GF_SERVER_ROOT_URL=http://grafana-central.local:3000
|
||||||
|
- GF_SERVER_SERVE_FROM_SUB_PATH=false
|
||||||
|
volumes:
|
||||||
|
- grafana_central_data:/var/lib/grafana
|
||||||
|
- ./modules/grafana-central/conf/grafana.ini:/etc/grafana/grafana.ini:ro
|
||||||
|
- ./modules/grafana-central/conf/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
|
||||||
|
- ./modules/grafana-central/logs:/var/log/grafana
|
||||||
|
networks:
|
||||||
|
4nk_network:
|
||||||
|
ipv4_address: 172.20.0.50
|
||||||
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- loki.local
|
||||||
|
- prometheus.local
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:3000/api/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 40s
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
|
grafana_central_data:
|
||||||
|
loki_data:
|
||||||
|
prometheus_data:
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
4nk_network:
|
4nk_network:
|
||||||
|
195
docs/MONITORING.md
Normal file
195
docs/MONITORING.md
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
# Monitoring et Observabilité 4NK
|
||||||
|
|
||||||
|
## Vue d'ensemble
|
||||||
|
|
||||||
|
Le système de monitoring 4NK utilise une stack complète d'observabilité basée sur Grafana, Loki et Prometheus pour surveiller l'ensemble des services du projet.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Services de monitoring
|
||||||
|
|
||||||
|
- **Grafana Central** (`grafana-central.local:3000`)
|
||||||
|
- Tableau de bord central pour visualisation des métriques et logs
|
||||||
|
- Accès : http://grafana-central.local:3000 (admin/admin)
|
||||||
|
|
||||||
|
- **Loki** (`loki.local:3100`)
|
||||||
|
- Système de collecte et d'indexation des logs
|
||||||
|
- Stockage des logs de tous les services 4NK
|
||||||
|
|
||||||
|
- **Prometheus** (`prometheus.local:9091`)
|
||||||
|
- Collecte et stockage des métriques de performance
|
||||||
|
- Surveillance des services et alertes
|
||||||
|
|
||||||
|
- **Promtail** (`promtail.local`)
|
||||||
|
- Agent de collecte des logs
|
||||||
|
- Envoi des logs vers Loki
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Datasources Grafana
|
||||||
|
|
||||||
|
- **Loki-4NK** : Logs des services 4NK
|
||||||
|
- **Prometheus-4NK** : Métriques des services 4NK
|
||||||
|
- **Loki-Infra** : Logs d'infrastructure (si disponible)
|
||||||
|
- **Prometheus-Infra** : Métriques d'infrastructure (si disponible)
|
||||||
|
|
||||||
|
### Collecte de logs
|
||||||
|
|
||||||
|
Promtail collecte automatiquement les logs de :
|
||||||
|
|
||||||
|
#### Services 4NK
|
||||||
|
- `4nk-bitcoin` : Logs Bitcoin
|
||||||
|
- `4nk-blindbit-oracle` : Logs BlindBit Oracle
|
||||||
|
- `4nk-sdk-storage` : Logs SDK Storage
|
||||||
|
- `4nk-sdk-relay1/2/3` : Logs des relais SDK
|
||||||
|
- `4nk-sdk-signer` : Logs SDK Signer
|
||||||
|
- `4nk-ihm-client` : Logs IHM Client
|
||||||
|
- `4nk-coffre-front/back` : Logs LeCoffre
|
||||||
|
|
||||||
|
#### Logs système
|
||||||
|
- Logs Docker containers
|
||||||
|
- Logs système
|
||||||
|
- Logs Grafana Central
|
||||||
|
|
||||||
|
### Collecte de métriques
|
||||||
|
|
||||||
|
Prometheus surveille les métriques de :
|
||||||
|
|
||||||
|
- Bitcoin Core (port 38332)
|
||||||
|
- BlindBit Oracle (port 8000)
|
||||||
|
- SDK Storage (port 8080)
|
||||||
|
- SDK Relay 1/2/3 (port 8090)
|
||||||
|
- SDK Signer (port 9090)
|
||||||
|
- IHM Client (port 80)
|
||||||
|
- Conteneurs Docker
|
||||||
|
- Node Exporter (si installé)
|
||||||
|
|
||||||
|
## Dashboards
|
||||||
|
|
||||||
|
### Dashboards 4NK
|
||||||
|
- **4NK Overview** : Vue d'ensemble des services
|
||||||
|
- **Bitcoin Monitoring** : Surveillance du nœud Bitcoin
|
||||||
|
- **SDK Services** : Métriques des services SDK
|
||||||
|
|
||||||
|
### Dashboards Infrastructure
|
||||||
|
- **System Overview** : Vue d'ensemble du système
|
||||||
|
- **Docker Containers** : Surveillance des conteneurs
|
||||||
|
|
||||||
|
### Dashboards System
|
||||||
|
- **Node Metrics** : Métriques du serveur
|
||||||
|
- **Network Monitoring** : Surveillance réseau
|
||||||
|
|
||||||
|
## Utilisation
|
||||||
|
|
||||||
|
### Accès aux services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Grafana (tableau de bord principal)
|
||||||
|
http://grafana-central.local:3000
|
||||||
|
|
||||||
|
# Prometheus (métriques brutes)
|
||||||
|
http://prometheus.local:9091
|
||||||
|
|
||||||
|
# Loki (logs bruts)
|
||||||
|
http://loki.local:3100
|
||||||
|
```
|
||||||
|
|
||||||
|
### Requêtes de logs dans Grafana
|
||||||
|
|
||||||
|
```logql
|
||||||
|
# Tous les logs Bitcoin
|
||||||
|
{job="4nk-bitcoin"}
|
||||||
|
|
||||||
|
# Logs d'erreur de tous les services
|
||||||
|
{job=~"4nk-.*"} |= "error"
|
||||||
|
|
||||||
|
# Logs des dernières 5 minutes
|
||||||
|
{job="4nk-bitcoin"} |= "error" | json
|
||||||
|
```
|
||||||
|
|
||||||
|
### Requêtes de métriques dans Grafana
|
||||||
|
|
||||||
|
```promql
|
||||||
|
# Services en ligne
|
||||||
|
up{job=~"4nk-.*"}
|
||||||
|
|
||||||
|
# Utilisation CPU
|
||||||
|
rate(process_cpu_seconds_total[5m])
|
||||||
|
|
||||||
|
# Utilisation mémoire
|
||||||
|
process_resident_memory_bytes
|
||||||
|
```
|
||||||
|
|
||||||
|
## Maintenance
|
||||||
|
|
||||||
|
### Redémarrage des services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Redémarrage complet du monitoring
|
||||||
|
docker-compose restart loki.local prometheus.local promtail.local grafana-central.local
|
||||||
|
|
||||||
|
# Redémarrage d'un service spécifique
|
||||||
|
docker-compose restart loki.local
|
||||||
|
```
|
||||||
|
|
||||||
|
### Sauvegarde des données
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Sauvegarde des volumes de monitoring
|
||||||
|
docker run --rm -v 4nk_node_loki_data:/data -v $(pwd):/backup alpine tar czf /backup/loki-backup.tar.gz /data
|
||||||
|
docker run --rm -v 4nk_node_prometheus_data:/data -v $(pwd):/backup alpine tar czf /backup/prometheus-backup.tar.gz /data
|
||||||
|
docker run --rm -v 4nk_node_grafana_central_data:/data -v $(pwd):/backup alpine tar czf /backup/grafana-backup.tar.gz /data
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restauration des données
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Restauration Loki
|
||||||
|
docker run --rm -v 4nk_node_loki_data:/data -v $(pwd):/backup alpine tar xzf /backup/loki-backup.tar.gz -C /
|
||||||
|
|
||||||
|
# Restauration Prometheus
|
||||||
|
docker run --rm -v 4nk_node_prometheus_data:/data -v $(pwd):/backup alpine tar xzf /backup/prometheus-backup.tar.gz -C /
|
||||||
|
|
||||||
|
# Restauration Grafana
|
||||||
|
docker run --rm -v 4nk_node_grafana_central_data:/data -v $(pwd):/backup alpine tar xzf /backup/grafana-backup.tar.gz -C /
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dépannage
|
||||||
|
|
||||||
|
### Problèmes courants
|
||||||
|
|
||||||
|
1. **Promtail ne collecte pas les logs**
|
||||||
|
- Vérifier les chemins de logs dans `promtail-config.yml`
|
||||||
|
- Vérifier les permissions sur les fichiers de logs
|
||||||
|
|
||||||
|
2. **Prometheus ne collecte pas les métriques**
|
||||||
|
- Vérifier la configuration dans `prometheus.yml`
|
||||||
|
- Vérifier que les services exposent des métriques sur `/metrics`
|
||||||
|
|
||||||
|
3. **Grafana ne peut pas se connecter aux datasources**
|
||||||
|
- Vérifier la configuration dans `datasources.yml`
|
||||||
|
- Vérifier que Loki et Prometheus sont accessibles
|
||||||
|
|
||||||
|
### Logs de diagnostic
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Logs Promtail
|
||||||
|
docker logs promtail.local
|
||||||
|
|
||||||
|
# Logs Loki
|
||||||
|
docker logs loki.local
|
||||||
|
|
||||||
|
# Logs Prometheus
|
||||||
|
docker logs prometheus.local
|
||||||
|
|
||||||
|
# Logs Grafana
|
||||||
|
docker logs grafana-central.local
|
||||||
|
```
|
||||||
|
|
||||||
|
## Évolutions futures
|
||||||
|
|
||||||
|
- Ajout d'AlertManager pour les alertes
|
||||||
|
- Intégration avec des services externes (Slack, email)
|
||||||
|
- Dashboards personnalisés pour chaque service
|
||||||
|
- Métriques personnalisées pour les services 4NK
|
||||||
|
- Intégration avec des outils de tracing distribués
|
@ -1,61 +0,0 @@
|
|||||||
version: '3.8'
|
|
||||||
|
|
||||||
services:
|
|
||||||
loki:
|
|
||||||
image: grafana/loki:2.9.0
|
|
||||||
container_name: 4nk-loki
|
|
||||||
ports:
|
|
||||||
- "3100:3100"
|
|
||||||
command: -config.file=/etc/loki/local-config.yaml
|
|
||||||
volumes:
|
|
||||||
- ./log/loki-config.yaml:/etc/loki/local-config.yaml:ro
|
|
||||||
- loki_data:/loki
|
|
||||||
networks:
|
|
||||||
- 4nk_network
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
promtail:
|
|
||||||
image: grafana/promtail:2.9.0
|
|
||||||
container_name: 4nk-promtail
|
|
||||||
command: -config.file=/etc/promtail/config.yml
|
|
||||||
volumes:
|
|
||||||
- ./log/promtail-config.yml:/etc/promtail/config.yml:ro
|
|
||||||
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
|
||||||
- /var/log/docker:/var/log/docker:ro
|
|
||||||
- ./modules:/workspace/modules:ro
|
|
||||||
- ./projects:/workspace/projects:ro
|
|
||||||
- ./log:/workspace/logs:ro
|
|
||||||
networks:
|
|
||||||
- 4nk_network
|
|
||||||
restart: unless-stopped
|
|
||||||
depends_on:
|
|
||||||
- loki
|
|
||||||
|
|
||||||
grafana:
|
|
||||||
image: grafana/grafana:10.0.0
|
|
||||||
container_name: 4nk-grafana
|
|
||||||
ports:
|
|
||||||
- "3000:3000"
|
|
||||||
environment:
|
|
||||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
|
||||||
- GF_USERS_ALLOW_SIGN_UP=false
|
|
||||||
- GF_SERVER_ROOT_URL=%(protocol)s://%(domain)s/grafana
|
|
||||||
- GF_SERVER_SERVE_FROM_SUB_PATH=true
|
|
||||||
volumes:
|
|
||||||
- grafana_data:/var/lib/grafana
|
|
||||||
- ./log/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
|
|
||||||
- ./log/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
|
|
||||||
- ./log/dashboards:/etc/grafana/provisioning/dashboards/dashboards:ro
|
|
||||||
networks:
|
|
||||||
- 4nk_network
|
|
||||||
restart: unless-stopped
|
|
||||||
depends_on:
|
|
||||||
- loki
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
loki_data:
|
|
||||||
grafana_data:
|
|
||||||
|
|
||||||
networks:
|
|
||||||
4nk_network:
|
|
||||||
external: true
|
|
49
modules/grafana-central/conf/grafana.ini
Normal file
49
modules/grafana-central/conf/grafana.ini
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
[paths]
|
||||||
|
data = /var/lib/grafana/data
|
||||||
|
logs = /var/lib/grafana/logs
|
||||||
|
plugins = /var/lib/grafana/plugins
|
||||||
|
provisioning = /etc/grafana/provisioning
|
||||||
|
|
||||||
|
[server]
|
||||||
|
http_port = 3000
|
||||||
|
http_addr = 0.0.0.0
|
||||||
|
root_url = http://grafana-central.local:3000/
|
||||||
|
serve_from_sub_path = false
|
||||||
|
|
||||||
|
[database]
|
||||||
|
type = sqlite3
|
||||||
|
path = grafana.db
|
||||||
|
|
||||||
|
[security]
|
||||||
|
admin_user = admin
|
||||||
|
admin_password = admin
|
||||||
|
secret_key = SW2YcwTIb9zpOOhoPsMm
|
||||||
|
|
||||||
|
[users]
|
||||||
|
allow_sign_up = false
|
||||||
|
auto_assign_org = true
|
||||||
|
auto_assign_org_role = Viewer
|
||||||
|
|
||||||
|
[log]
|
||||||
|
mode = console
|
||||||
|
level = info
|
||||||
|
|
||||||
|
[alerting]
|
||||||
|
enabled = true
|
||||||
|
|
||||||
|
[explore]
|
||||||
|
enabled = true
|
||||||
|
|
||||||
|
[panels]
|
||||||
|
disable_sanitize_html = false
|
||||||
|
|
||||||
|
[plugins]
|
||||||
|
enable_alpha = false
|
||||||
|
app_tls_skip_verify_insecure = false
|
||||||
|
|
||||||
|
[auth]
|
||||||
|
disable_login_form = false
|
||||||
|
disable_signout_menu = false
|
||||||
|
|
||||||
|
[auth.anonymous]
|
||||||
|
enabled = false
|
@ -1 +1 @@
|
|||||||
MANIFEST-000791
|
MANIFEST-000907
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
2025/09/11-07:35:29.765658 7f32ae08cb38 Recovering log #790
|
2025/09/11-10:08:06.375380 7f8065a7cb38 Recovering log #906
|
||||||
2025/09/11-07:35:29.783093 7f32ae08cb38 Delete type=3 #789
|
2025/09/11-10:08:06.391400 7f8065a7cb38 Delete type=0 #906
|
||||||
2025/09/11-07:35:29.783159 7f32ae08cb38 Delete type=0 #790
|
2025/09/11-10:08:06.391444 7f8065a7cb38 Delete type=3 #905
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
2025/09/11-07:34:58.695441 7eff68c3fb38 Recovering log #788
|
2025/09/11-10:07:35.336676 7f4fb108cb38 Recovering log #904
|
||||||
2025/09/11-07:34:58.713050 7eff68c3fb38 Delete type=0 #788
|
2025/09/11-10:07:35.363529 7f4fb108cb38 Delete type=3 #903
|
||||||
2025/09/11-07:34:58.713099 7eff68c3fb38 Delete type=3 #787
|
2025/09/11-10:07:35.363589 7f4fb108cb38 Delete type=0 #904
|
||||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user