Implement Phase A: Stop Dying resilience fixes

Implements all 6 critical resilience fixes from phase-a-stop-dying.md:

- Issue 2.9: Migrate std::sync::RwLock → parking_lot::RwLock (7 files)
  Prevents lock poisoning cascade on writer panic

- Issue 2.2: Add install_panic_hook() to log panics via tracing
  Ensures panics are captured in logs/journald before process death

- Issue 3.7: Add ExecStopPost to systemd service
  Cleans up stale FUSE mounts on service stop

- Issue 2.7: Add check_stale_mount() detection on startup
  Auto-cleans leftover mounts from previous crashes

- Issue 2.10: Integrate sd_notify for systemd lifecycle
  Sends READY=1 after mount, STOPPING on shutdown

- Issue 2.1: Add signal handling with spawn_mount
  Catches SIGTERM/SIGINT for clean shutdown instead of instant death

All 7 Phase A tests pass:
- test_poisoned_tree_lock_returns_eio_not_panic
- test_parking_lot_rwlock_survives_panic
- test_panic_hook_logs_to_tracing
- test_systemd_service_has_execstoppost
- test_stale_mount_check_function_exists
- test_sd_notify_ready_sent
- test_sigterm_triggers_shutdown
This commit is contained in:
Alexander
2026-05-13 14:48:32 +02:00
parent 24086cc744
commit 6285eeb6c0
18 changed files with 301 additions and 63 deletions
+6 -6
View File
@@ -1,7 +1,7 @@
use musicfs_cas::CasStore;
use musicfs_core::ChunkHash;
use parking_lot::RwLock;
use std::collections::BTreeMap;
use std::sync::RwLock;
use std::time::Instant;
use tracing::info;
@@ -64,8 +64,8 @@ impl Default for LruEviction {
impl EvictionPolicy for LruEviction {
fn record_access(&self, hash: ChunkHash) {
let now = Instant::now();
let mut times = self.access_times.write().unwrap();
let mut h2t = self.hash_to_time.write().unwrap();
let mut times = self.access_times.write();
let mut h2t = self.hash_to_time.write();
if let Some(old_time) = h2t.remove(&hash) {
times.remove(&old_time);
@@ -76,13 +76,13 @@ impl EvictionPolicy for LruEviction {
}
fn select_victims(&self, count: usize) -> Vec<ChunkHash> {
let times = self.access_times.read().unwrap();
let times = self.access_times.read();
times.values().take(count).copied().collect()
}
fn remove(&self, hash: &ChunkHash) {
let mut times = self.access_times.write().unwrap();
let mut h2t = self.hash_to_time.write().unwrap();
let mut times = self.access_times.write();
let mut h2t = self.hash_to_time.write();
if let Some(time) = h2t.remove(hash) {
times.remove(&time);
+4 -4
View File
@@ -1,8 +1,8 @@
use musicfs_core::{FileId, FileMeta, VirtualPath};
use parking_lot::RwLock;
use std::collections::{BTreeMap, HashMap};
use std::ffi::{OsStr, OsString};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::RwLock;
use std::time::{Duration, SystemTime};
use tracing::{debug, trace};
@@ -291,7 +291,7 @@ impl VirtualTree {
}
pub fn needs_refresh(&self) -> bool {
let last = *self.last_refresh.read().unwrap();
let last = *self.last_refresh.read();
last.elapsed().unwrap_or(Duration::MAX) > self.refresh_policy.ttl
}
@@ -303,11 +303,11 @@ impl VirtualTree {
root.children.clear();
}
*self.last_refresh.write().unwrap() = SystemTime::now();
*self.last_refresh.write() = SystemTime::now();
}
pub fn mark_refreshed(&self) {
*self.last_refresh.write().unwrap() = SystemTime::now();
*self.last_refresh.write() = SystemTime::now();
}
pub fn refresh_policy(&self) -> &RefreshPolicy {