Implement Phase A: Stop Dying resilience fixes

Implements all 6 critical resilience fixes from phase-a-stop-dying.md:

- Issue 2.9: Migrate std::sync::RwLock → parking_lot::RwLock (7 files)
  Prevents lock poisoning cascade on writer panic

- Issue 2.2: Add install_panic_hook() to log panics via tracing
  Ensures panics are captured in logs/journald before process death

- Issue 3.7: Add ExecStopPost to systemd service
  Cleans up stale FUSE mounts on service stop

- Issue 2.7: Add check_stale_mount() detection on startup
  Auto-cleans leftover mounts from previous crashes

- Issue 2.10: Integrate sd_notify for systemd lifecycle
  Sends READY=1 after mount, STOPPING on shutdown

- Issue 2.1: Add signal handling with spawn_mount
  Catches SIGTERM/SIGINT for clean shutdown instead of instant death

All 7 Phase A tests pass:
- test_poisoned_tree_lock_returns_eio_not_panic
- test_parking_lot_rwlock_survives_panic
- test_panic_hook_logs_to_tracing
- test_systemd_service_has_execstoppost
- test_stale_mount_check_function_exists
- test_sd_notify_ready_sent
- test_sigterm_triggers_shutdown
This commit is contained in:
Alexander
2026-05-13 14:48:32 +02:00
parent 24086cc744
commit 6285eeb6c0
18 changed files with 301 additions and 63 deletions
@@ -16,6 +16,7 @@ libc.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["fs", "sync", "time"] }
tracing.workspace = true
parking_lot.workspace = true
[dev-dependencies]
tempfile.workspace = true
+10 -10
View File
@@ -2,8 +2,9 @@ use crate::health::{HealthMonitor, HealthSnapshot};
use crate::router::Router;
use crate::traits::{Origin, WatchHandle};
use musicfs_core::{OriginId, RealPath};
use parking_lot::RwLock;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::sync::Arc;
use tracing::{info, warn};
pub struct OriginRegistry {
@@ -29,17 +30,17 @@ impl OriginRegistry {
self.router.set_priority(id.clone(), priority);
self.health_monitor.add_origin(origin.clone());
self.origins.write().unwrap().insert(id, origin);
self.origins.write().insert(id, origin);
}
pub fn unregister(&self, id: &OriginId) {
info!("Unregistering origin {}", id);
if let Some(handles) = self.watch_handles.write().unwrap().remove(id) {
if let Some(handles) = self.watch_handles.write().remove(id) {
info!("Dropping {} watch handles for origin {}", handles.len(), id);
}
self.origins.write().unwrap().remove(id);
self.origins.write().remove(id);
self.router.remove_priority(id);
self.health_monitor.remove_origin(id);
}
@@ -47,22 +48,21 @@ impl OriginRegistry {
pub fn register_watch(&self, origin_id: &OriginId, handle: WatchHandle) {
self.watch_handles
.write()
.unwrap()
.entry(origin_id.clone())
.or_default()
.push(handle);
}
pub fn get(&self, id: &OriginId) -> Option<Arc<dyn Origin>> {
self.origins.read().unwrap().get(id).cloned()
self.origins.read().get(id).cloned()
}
pub fn list(&self) -> Vec<Arc<dyn Origin>> {
self.origins.read().unwrap().values().cloned().collect()
self.origins.read().values().cloned().collect()
}
pub fn route(&self, path: &RealPath) -> Option<Arc<dyn Origin>> {
let origins = self.origins.read().unwrap();
let origins = self.origins.read();
let health = self.health_monitor.snapshot();
let candidates: Vec<_> = origins
@@ -86,7 +86,7 @@ impl OriginRegistry {
}
pub fn route_with_fallback(&self, path: &RealPath) -> Option<Arc<dyn Origin>> {
let origins = self.origins.read().unwrap();
let origins = self.origins.read();
let health = self.health_monitor.snapshot();
let candidates: Vec<_> = origins
@@ -109,7 +109,7 @@ impl OriginRegistry {
}
pub fn route_all(&self, path: &RealPath) -> Vec<Arc<dyn Origin>> {
let origins = self.origins.read().unwrap();
let origins = self.origins.read();
let health = self.health_monitor.snapshot();
let mut result: Vec<_> = origins