diff --git a/README.md b/README.md new file mode 100644 index 0000000..9eaeac6 --- /dev/null +++ b/README.md @@ -0,0 +1,252 @@ +# Network Monitor + +A robust network monitoring service written in Rust that tracks network traffic from multiple sources and provides real-time data via UDP broadcasting. This service is designed for high availability with automatic retry mechanisms and comprehensive health monitoring. + +## Features + +### πŸ”„ Dual Network Monitoring + +- **Clash Proxy Monitoring**: Connects to Clash proxy via WebSocket to monitor proxy traffic statistics +- **WAN Interface Monitoring**: Polls OpenWRT/LuCI router interfaces for WAN traffic data + +### πŸš€ High Availability + +- **Infinite Retry Mechanism**: Automatically recovers from network failures and service interruptions +- **Health Monitoring**: Comprehensive health tracking with detailed statistics and alerting +- **Exponential Backoff**: Smart retry strategy with configurable delays and jitter + +### πŸ“‘ Real-time Data Broadcasting + +- **UDP Server**: Broadcasts network statistics to connected clients +- **Client Management**: Automatic client discovery and connection management +- **Data Formats**: Structured binary data for efficient transmission + +### πŸ›‘οΈ Robust Error Handling + +- **Connection Timeouts**: Configurable timeouts for all network operations +- **Graceful Degradation**: Continues operation even when one monitoring source fails +- **Detailed Logging**: Comprehensive logging for debugging and monitoring + +## Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” WebSocket β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Clash Proxy │◄───────────────►│ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ + β”‚ Network β”‚ UDP +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” HTTP/LuCI β”‚ Monitor │◄──────────┐ +β”‚ OpenWRT Router │◄───────────────►│ Service β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ + β”‚ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” UDP Data β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ Client 1 │◄───────────────►│ UDP Server β”‚β—„β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Client 2 │◄───────────────► +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## Installation + +### Prerequisites + +- Rust 1.70+ (for building from source) +- Docker (for containerized deployment) + +### Building from Source + +```bash +# Clone the repository +git clone +cd network-monitor + +# Build the project +cargo build --release + +# Run tests +cargo test + +# Run the service +cargo run +``` + +### Docker Deployment + +```bash +# Build the Docker image +docker build -t network-monitor . + +# Run the container +docker run -d \ + --name network-monitor \ + -p 17890:17890/udp \ + -e CLASH_URL="ws://192.168.1.1:9090/connections?token=your-token" \ + -e LUCI_URL="http://192.168.1.1/cgi-bin/luci" \ + -e LUCI_USERNAME="root" \ + -e LUCI_PASSWORD="your-password" \ + network-monitor +``` + +## Configuration + +The service can be configured via command-line arguments or environment variables: + +| Parameter | Environment Variable | Default Value | Description | +|-----------|---------------------|---------------|-------------| +| `-c, --clash-url` | `CLASH_URL` | `ws://192.168.1.1:9090/connections?token=123456` | Clash WebSocket URL | +| `-p, --listen-port` | `LISTEN_PORT` | `17890` | UDP server listen port | +| `-l, --luci-url` | `LUCI_URL` | `http://192.168.1.1/cgi-bin/luci` | OpenWRT LuCI base URL | +| `-u, --luci-username` | `LUCI_USERNAME` | `root` | LuCI authentication username | +| `-P, --luci-password` | `LUCI_PASSWORD` | `123456` | LuCI authentication password | + +### Environment File + +Create a `.env` file in the project root: + +```env +CLASH_URL=ws://192.168.1.1:9090/connections?token=your-clash-token +LISTEN_PORT=17890 +LUCI_URL=http://192.168.1.1/cgi-bin/luci +LUCI_USERNAME=root +LUCI_PASSWORD=your-router-password +``` + +## Data Formats + +### Clash Traffic Data (32 bytes) + +``` +Bytes 0-7: Direct upload speed (u64, little-endian) +Bytes 8-15: Direct download speed (u64, little-endian) +Bytes 16-23: Proxy upload speed (u64, little-endian) +Bytes 24-31: Proxy download speed (u64, little-endian) +``` + +### WAN Traffic Data (16 bytes) + +``` +Bytes 0-7: WAN upload speed (u64, little-endian) +Bytes 8-15: WAN download speed (u64, little-endian) +``` + +## Health Monitoring + +The service includes comprehensive health monitoring with the following metrics: + +- **Connection Status**: Real-time health status for each service +- **Uptime Percentage**: Success rate over time +- **Failure Tracking**: Consecutive failure counts and timestamps +- **Performance Metrics**: Total attempts, successes, and failures + +Health reports are logged every minute with detailed statistics. + +## Retry Strategy + +The service implements a sophisticated retry mechanism: + +- **Infinite Retries**: Critical services never give up +- **Exponential Backoff**: Delays increase exponentially with failures +- **Jitter**: Random delays prevent thundering herd effects +- **Configurable Limits**: Maximum delays and retry counts can be customized + +### Retry Configurations + +- **Fast Retry**: For lightweight operations (5 attempts, 100ms-5s delays) +- **Default Retry**: Balanced approach (10 attempts, 500ms-30s delays) +- **Slow Retry**: For heavyweight operations (15 attempts, 1s-60s delays) +- **Infinite Retry**: For critical services (unlimited attempts) + +## Logging + +The service uses structured logging with multiple levels: + +- **INFO**: Normal operation events and health reports +- **WARN**: Service health issues and recoverable errors +- **ERROR**: Critical failures and persistent issues +- **DEBUG**: Detailed operation information + +Set the `RUST_LOG` environment variable to control log levels: + +```bash +export RUST_LOG=info # or debug, warn, error +``` + +## Development + +### Project Structure + +``` +src/ +β”œβ”€β”€ main.rs # Application entry point +β”œβ”€β”€ lib.rs # Library exports +β”œβ”€β”€ clash_conn_msg.rs # Clash message structures +β”œβ”€β”€ health_monitor.rs # Health monitoring system +β”œβ”€β”€ retry.rs # Retry mechanism implementation +β”œβ”€β”€ statistics.rs # Traffic statistics processing +β”œβ”€β”€ udp_server.rs # UDP broadcasting server +└── wan.rs # WAN traffic polling +tests/ +└── integration_test.rs # Integration tests +``` + +### Running Tests + +```bash +# Run all tests +cargo test + +# Run with output +cargo test -- --nocapture + +# Run specific test +cargo test test_network_failure_recovery +``` + +### Contributing + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Add tests for new functionality +5. Ensure all tests pass +6. Submit a pull request + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## Troubleshooting + +### Common Issues + +1. **WebSocket Connection Failures** + - Verify Clash is running and accessible + - Check the WebSocket URL and authentication token + - Ensure network connectivity to the Clash instance + +2. **LuCI Authentication Failures** + - Verify router credentials + - Check if the router is accessible + - Ensure the LuCI interface is enabled + +3. **UDP Client Connection Issues** + - Verify the UDP port is not blocked by firewall + - Check if the service is binding to the correct interface + - Ensure clients are connecting to the correct port + +### Debug Mode + +Enable debug logging for detailed troubleshooting: + +```bash +RUST_LOG=debug cargo run +``` + +This will provide detailed information about: + +- Connection attempts and failures +- Retry mechanisms in action +- Health monitoring decisions +- UDP client management +- Data processing and broadcasting diff --git a/src/health_monitor.rs b/src/health_monitor.rs index 1d87729..f40c143 100644 --- a/src/health_monitor.rs +++ b/src/health_monitor.rs @@ -115,7 +115,7 @@ impl HealthMonitor { .get_or_init(|| async { let monitor = HealthMonitor::new(); - // 启动ε₯εΊ·ηŠΆζ€ζŠ₯ε‘Šδ»»εŠ‘ + // Start health status reporting task let monitor_clone = monitor.clone(); tokio::spawn(async move { monitor_clone.start_health_reporting().await; @@ -192,7 +192,7 @@ impl HealthMonitor { self.log_service_health("WebSocket", &websocket_health); self.log_service_health("WAN Polling", &wan_health); - // ε¦‚ζžœζœ‰ζœεŠ‘δΈε₯εΊ·οΌŒε‘ε‡Ίθ­¦ε‘Š + // Warn if any service is unhealthy if !websocket_health.is_healthy { warn!("WebSocket service is unhealthy! Consecutive failures: {}", websocket_health.consecutive_failures); } @@ -200,7 +200,7 @@ impl HealthMonitor { warn!("WAN Polling service is unhealthy! Consecutive failures: {}", wan_health.consecutive_failures); } - // ε¦‚ζžœθΏžη»­ε€±θ΄₯ζ¬‘ζ•°θΏ‡ε€šοΌŒε‘ε‡Ίι”™θ――θ­¦ζŠ₯ + // Alert if consecutive failures are too many if websocket_health.consecutive_failures > 10 { error!("WebSocket service has {} consecutive failures!", websocket_health.consecutive_failures); } diff --git a/src/lib.rs b/src/lib.rs index 683cf89..27d2a95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ pub mod retry; pub mod health_monitor; -// ι‡ζ–°ε―Όε‡ΊεΈΈη”¨ηš„η±»εž‹ε’Œε‡½ζ•° +// Re-export commonly used types and functions pub use retry::{RetryConfig, Retrier, retry_with_config, retry, retry_forever}; pub use health_monitor::{HealthMonitor, ServiceType, ConnectionHealth}; diff --git a/tests/integration_test.rs b/tests/integration_test.rs index fd1d1d9..314de9d 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -4,10 +4,10 @@ use std::time::Duration; use tokio::time::sleep; use network_monitor::retry::{RetryConfig, retry_with_config}; -/// ζ¨‘ζ‹Ÿη½‘η»œζ•…ιšœηš„ζ΅‹θ―• +/// Test simulating network failure recovery #[tokio::test] async fn test_network_failure_recovery() { - // ζ¨‘ζ‹ŸδΈ€δΈͺ会倱θ΄₯ε‡ ζ¬‘η„ΆεŽζˆεŠŸηš„ζ“δ½œ + // Simulate an operation that fails a few times then succeeds let attempt_count = Arc::new(AtomicU32::new(0)); let max_failures = 3; @@ -16,7 +16,7 @@ async fn test_network_failure_recovery() { initial_delay: Duration::from_millis(10), max_delay: Duration::from_millis(100), backoff_multiplier: 1.5, - jitter: false, // ε…³ι—­ζŠ–εŠ¨δ»₯便桋试更可钄桋 + jitter: false, // Disable jitter for more predictable testing }; let attempt_count_clone = attempt_count.clone(); @@ -26,10 +26,10 @@ async fn test_network_failure_recovery() { let current_attempt = attempt_count.fetch_add(1, Ordering::SeqCst) + 1; if current_attempt <= max_failures { - // ζ¨‘ζ‹Ÿη½‘η»œι”™θ―― + // Simulate network error Err(format!("Network error on attempt {}", current_attempt)) } else { - // ζ¨‘ζ‹Ÿζ’ε€ζˆεŠŸ + // Simulate successful recovery Ok(format!("Success on attempt {}", current_attempt)) } } @@ -40,7 +40,7 @@ async fn test_network_failure_recovery() { assert_eq!(attempt_count.load(Ordering::SeqCst), 4); } -/// ζ΅‹θ―•θΏžζŽ₯θΆ…ζ—ΆεœΊζ™― +/// Test connection timeout scenario #[tokio::test] async fn test_connection_timeout_scenario() { let config = RetryConfig { @@ -59,17 +59,17 @@ async fn test_connection_timeout_scenario() { async move { attempt_count.fetch_add(1, Ordering::SeqCst); - // ζ¨‘ζ‹ŸθΏžζŽ₯θΆ…ζ—Ά + // Simulate connection timeout sleep(Duration::from_millis(1)).await; Err("Connection timeout") } }).await; assert!(result.is_err()); - assert_eq!(attempt_count.load(Ordering::SeqCst), 3); // εΊ”θ―₯尝试了3欑 + assert_eq!(attempt_count.load(Ordering::SeqCst), 3); // Should have attempted 3 times } -/// ζ΅‹θ―•εΏ«ι€Ÿζ’ε€εœΊζ™― +/// Test fast recovery scenario #[tokio::test] async fn test_fast_recovery() { let config = RetryConfig::fast(); @@ -95,7 +95,7 @@ async fn test_fast_recovery() { assert_eq!(attempt_count.load(Ordering::SeqCst), 2); } -/// ζ΅‹θ―•ζ…’ι€Ÿι‡θ―•εœΊζ™― +/// Test slow retry scenario #[tokio::test] async fn test_slow_retry_scenario() { let config = RetryConfig::slow(); @@ -121,7 +121,7 @@ async fn test_slow_retry_scenario() { assert_eq!(attempt_count.load(Ordering::SeqCst), 3); } -/// ζ΅‹θ―•ζœ€ε€§ι‡θ―•ζ¬‘ζ•°ι™εˆΆ +/// Test maximum retry limit #[tokio::test] async fn test_max_retry_limit() { let config = RetryConfig {