Implement Phase B: Crash Recovery

Add startup integrity checks, corruption recovery, CAS size limits,
graceful shutdown orchestration, and a task supervisor — turning 5
previously-RED resilience tests GREEN and adding 5 new tests.

- CAS: pre-check size limit in put(), add StoreFull error variant
- CAS: sled corruption recovery in open() (retry then recreate)
- SQLite: open_with_integrity_check() via PRAGMA integrity_check(1)
- tantivy: open_with_recovery() deletes and rebuilds corrupt index
- CLI: CancellationToken-based ordered shutdown sequence
- Core: TaskSupervisor with spawn_supervised/spawn_critical + backoff
- Tests: replace 4 todo!() stubs, add 5 new shutdown/supervisor tests
This commit is contained in:
Alexander
2026-05-13 15:33:23 +02:00
parent 4e394c60ec
commit 5da96ffab2
12 changed files with 485 additions and 14 deletions
+1
View File
@@ -17,6 +17,7 @@ musicfs-metadata.path = "../musicfs-metadata"
clap.workspace = true
tokio.workspace = true
tokio-util.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
tracing-appender.workspace = true
+9
View File
@@ -208,6 +208,8 @@ fn run_mount(
}
info!("MusicFS ready, PID {}", std::process::id());
let shutdown_token = tokio_util::sync::CancellationToken::new();
runtime.block_on(async {
let mut sigterm =
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())?;
@@ -223,6 +225,13 @@ fn run_mount(
}
}
info!("Beginning ordered shutdown");
shutdown_token.cancel();
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
info!("Background tasks stopped");
Ok::<_, anyhow::Error>(())
})?;