Implement Phase C: Production Hardening

Implements phase-c-hardening.md to fix 6 RED resilience tests:

- D1/D2: Health check timeout (1.5s) + parallel execution via join_all
- C6: Recursive CAS calculate_size() to scan shard subdirectories
- C7: FUSE read timeout (30s) returns EIO instead of hanging
- 6.4: Auto-re-fetch corrupt/missing chunks from origin
- 6.6: Passthrough mode - continue even when CAS write fails
- C9: PID file with flock prevents concurrent mounts
- 5.3: fd exhaustion handling test

All 27 resilience tests now pass. Full test suite green.

Files changed:
- musicfs-origins/src/health.rs: timeout + join_all
- musicfs-origins/Cargo.toml: add futures dependency
- musicfs-cas/src/store.rs: recursive calculate_size
- musicfs-cas/src/reader.rs: auto-re-fetch on IntegrityError/NotFound
- musicfs-cas/src/fetcher.rs: passthrough fallback
- musicfs-fuse/src/filesystem.rs: 30s read timeout
- musicfs-cli/src/main.rs: PID file with flock
- musicfs-test-utils/tests/resilience.rs: updated tests
This commit is contained in:
Alexander
2026-05-13 15:55:22 +02:00
parent 3038c94b8c
commit 0ff2a17ab7
11 changed files with 325 additions and 39 deletions
+4 -2
View File
@@ -5,7 +5,7 @@ use musicfs_sync::CdcChunker;
use parking_lot::RwLock;
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{debug, info};
use tracing::{debug, info, warn};
pub struct ContentFetcher {
store: Arc<CasStore>,
@@ -92,7 +92,9 @@ impl ContentFetcher {
let mut chunk_refs = Vec::with_capacity(chunks.len());
for chunk in chunks {
if !self.store.exists(&chunk.hash) {
self.store.put(chunk.data).await.map_err(FetchError::Store)?;
if let Err(e) = self.store.put(chunk.data).await {
warn!(hash = %chunk.hash, error = %e, "CAS write failed, continuing in passthrough mode");
}
}
chunk_refs.push(ChunkRef {
+27 -3
View File
@@ -1,13 +1,13 @@
use crate::chunks::ChunkRef;
use crate::fetcher::{ContentFetcher, FetchError};
use crate::store::CasStore;
use crate::store::{CasError, CasStore};
use bytes::{Bytes, BytesMut};
use musicfs_core::FileId;
use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{debug, trace};
use tracing::{debug, trace, warn};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkManifest {
@@ -116,7 +116,31 @@ impl FileReader {
continue;
}
let chunk_data = self.store.get(&chunk_ref.hash).await?;
let chunk_data = match self.store.get(&chunk_ref.hash).await {
Ok(data) => data,
Err(CasError::IntegrityError { .. }) => {
warn!(hash = %chunk_ref.hash, "Chunk corrupt, deleting and re-fetching");
let _ = self.store.delete(&chunk_ref.hash).await;
if let Some(fetcher) = &self.fetcher {
let new_manifest = fetcher.fetch_file(file_id).await?;
self.manifests.write().insert(file_id, new_manifest);
self.store.get(&chunk_ref.hash).await?
} else {
return Err(ReaderError::Cas(CasError::NotFound(chunk_ref.hash.as_hex())));
}
}
Err(CasError::NotFound(_)) => {
warn!(hash = %chunk_ref.hash, "Chunk missing, attempting re-fetch");
if let Some(fetcher) = &self.fetcher {
let new_manifest = fetcher.fetch_file(file_id).await?;
self.manifests.write().insert(file_id, new_manifest);
self.store.get(&chunk_ref.hash).await?
} else {
return Err(ReaderError::Cas(CasError::NotFound(chunk_ref.hash.as_hex())));
}
}
Err(e) => return Err(ReaderError::Cas(e)),
};
let read_start = if offset > chunk_start {
(offset - chunk_start) as usize
+20 -8
View File
@@ -77,17 +77,29 @@ impl CasStore {
}
async fn calculate_size(dir: &Path) -> u64 {
let mut size = 0u64;
if let Ok(mut entries) = fs::read_dir(dir).await {
while let Ok(Some(entry)) = entries.next_entry().await {
if let Ok(meta) = entry.metadata().await {
if meta.is_file() {
size += meta.len();
Self::calculate_size_recursive(dir).await
}
fn calculate_size_recursive(dir: &Path) -> std::pin::Pin<Box<dyn std::future::Future<Output = u64> + Send + '_>> {
Box::pin(async move {
let mut size = 0u64;
if let Ok(mut entries) = fs::read_dir(dir).await {
while let Ok(Some(entry)) = entries.next_entry().await {
if let Ok(meta) = entry.metadata().await {
if meta.is_file() {
size += meta.len();
} else if meta.is_dir() {
// Skip sled index directory
let name = entry.file_name();
if name != "index.sled" {
size += Self::calculate_size_recursive(&entry.path()).await;
}
}
}
}
}
}
size
size
})
}
pub async fn put(&self, data: &[u8]) -> Result<ChunkHash, CasError> {