Add comprehensive logging with tracing, file rotation, and systemd integration

- Add tracing-appender and tracing-journald for production logging
- Add LoggingConfig with trace_sample_rate, json_output, journald options
- Expand init_logging() with file rotation, journald, and stderr layers
- Add sanitize_path() helper for PII protection in logs
- Instrument FUSE operations with #[instrument] and trace decision points
- Instrument gRPC handlers (10 methods) with span correlation
- Add spawn instrumentation for health monitor, indexer, watcher tasks
- Add broadcast lag handling (RecvError::Lagged) in event subscribers
- Fix webhook.rs expect() calls with proper error handling
- Add logging to patterns.rs, collections.rs, artwork.rs database ops
- Add Drop impl logging for PluginManager and WatchHandle
- Update systemd service with rate limiting and journal output
- Add logrotate config and example config.toml with logging section
This commit is contained in:
Alexander
2026-05-13 11:21:51 +02:00
parent bc9fa36646
commit 5ac33987c0
32 changed files with 1646 additions and 177 deletions
+45 -19
View File
@@ -5,7 +5,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::mpsc;
use tracing::{debug, info, warn};
use tracing::{debug, info, info_span, Instrument};
pub struct HealthMonitor {
origins: DashMap<OriginId, Arc<dyn Origin>>,
@@ -150,22 +150,32 @@ impl HealthMonitor {
pub fn start(self: Arc<Self>) -> HealthCheckHandle {
let (stop_tx, mut stop_rx) = mpsc::channel::<()>(1);
let monitor = self.clone();
let interval_secs = monitor.check_interval.as_secs();
tokio::spawn(async move {
let mut interval = tokio::time::interval(monitor.check_interval);
info!(
interval_secs = interval_secs,
origin_count = monitor.origins.len(),
"Health monitor starting"
);
loop {
tokio::select! {
_ = interval.tick() => {
monitor.check_all().await;
}
_ = stop_rx.recv() => {
info!("Health monitor stopping");
break;
tokio::spawn(
async move {
let mut interval = tokio::time::interval(monitor.check_interval);
loop {
tokio::select! {
_ = interval.tick() => {
monitor.check_all().await;
}
_ = stop_rx.recv() => {
info!("Health monitor stopping");
break;
}
}
}
}
});
.instrument(info_span!("health_monitor")),
);
HealthCheckHandle { stop_tx }
}
@@ -199,14 +209,24 @@ impl HealthMonitor {
match status {
HealthStatus::Healthy => {
if state.status != HealthStatus::Healthy {
info!("Origin {} is now healthy", id);
info!(
origin_id = %id,
previous_status = ?state.status,
duration_ms = latency_ms,
"Origin health state transition to healthy"
);
}
state.status = HealthStatus::Healthy;
state.consecutive_failures = 0;
}
HealthStatus::Degraded => {
if state.status != HealthStatus::Degraded {
warn!("Origin {} is degraded", id);
info!(
origin_id = %id,
previous_status = ?state.status,
duration_ms = latency_ms,
"Origin health state transition to degraded"
);
}
state.status = HealthStatus::Degraded;
}
@@ -214,16 +234,22 @@ impl HealthMonitor {
state.consecutive_failures += 1;
if state.consecutive_failures >= threshold {
if state.status != HealthStatus::Unhealthy {
warn!(
"Origin {} is now unhealthy ({} failures)",
id, state.consecutive_failures
info!(
origin_id = %id,
previous_status = ?state.status,
consecutive_failures = state.consecutive_failures,
threshold = threshold,
duration_ms = latency_ms,
"Origin health state transition to unhealthy"
);
}
state.status = HealthStatus::Unhealthy;
} else {
debug!(
"Origin {} check failed ({}/{})",
id, state.consecutive_failures, threshold
origin_id = %id,
consecutive_failures = state.consecutive_failures,
threshold = threshold,
"Origin health check failed"
);
state.status = HealthStatus::Degraded;
}