Implement Phase C: Production Hardening
Implements phase-c-hardening.md to fix 6 RED resilience tests: - D1/D2: Health check timeout (1.5s) + parallel execution via join_all - C6: Recursive CAS calculate_size() to scan shard subdirectories - C7: FUSE read timeout (30s) returns EIO instead of hanging - 6.4: Auto-re-fetch corrupt/missing chunks from origin - 6.6: Passthrough mode - continue even when CAS write fails - C9: PID file with flock prevents concurrent mounts - 5.3: fd exhaustion handling test All 27 resilience tests now pass. Full test suite green. Files changed: - musicfs-origins/src/health.rs: timeout + join_all - musicfs-origins/Cargo.toml: add futures dependency - musicfs-cas/src/store.rs: recursive calculate_size - musicfs-cas/src/reader.rs: auto-re-fetch on IntegrityError/NotFound - musicfs-cas/src/fetcher.rs: passthrough fallback - musicfs-fuse/src/filesystem.rs: 30s read timeout - musicfs-cli/src/main.rs: PID file with flock - musicfs-test-utils/tests/resilience.rs: updated tests
This commit is contained in:
@@ -5,7 +5,7 @@ use musicfs_sync::CdcChunker;
|
||||
use parking_lot::RwLock;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, info};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
pub struct ContentFetcher {
|
||||
store: Arc<CasStore>,
|
||||
@@ -92,7 +92,9 @@ impl ContentFetcher {
|
||||
let mut chunk_refs = Vec::with_capacity(chunks.len());
|
||||
for chunk in chunks {
|
||||
if !self.store.exists(&chunk.hash) {
|
||||
self.store.put(chunk.data).await.map_err(FetchError::Store)?;
|
||||
if let Err(e) = self.store.put(chunk.data).await {
|
||||
warn!(hash = %chunk.hash, error = %e, "CAS write failed, continuing in passthrough mode");
|
||||
}
|
||||
}
|
||||
|
||||
chunk_refs.push(ChunkRef {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use crate::chunks::ChunkRef;
|
||||
use crate::fetcher::{ContentFetcher, FetchError};
|
||||
use crate::store::CasStore;
|
||||
use crate::store::{CasError, CasStore};
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use musicfs_core::FileId;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, trace};
|
||||
use tracing::{debug, trace, warn};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChunkManifest {
|
||||
@@ -116,7 +116,31 @@ impl FileReader {
|
||||
continue;
|
||||
}
|
||||
|
||||
let chunk_data = self.store.get(&chunk_ref.hash).await?;
|
||||
let chunk_data = match self.store.get(&chunk_ref.hash).await {
|
||||
Ok(data) => data,
|
||||
Err(CasError::IntegrityError { .. }) => {
|
||||
warn!(hash = %chunk_ref.hash, "Chunk corrupt, deleting and re-fetching");
|
||||
let _ = self.store.delete(&chunk_ref.hash).await;
|
||||
if let Some(fetcher) = &self.fetcher {
|
||||
let new_manifest = fetcher.fetch_file(file_id).await?;
|
||||
self.manifests.write().insert(file_id, new_manifest);
|
||||
self.store.get(&chunk_ref.hash).await?
|
||||
} else {
|
||||
return Err(ReaderError::Cas(CasError::NotFound(chunk_ref.hash.as_hex())));
|
||||
}
|
||||
}
|
||||
Err(CasError::NotFound(_)) => {
|
||||
warn!(hash = %chunk_ref.hash, "Chunk missing, attempting re-fetch");
|
||||
if let Some(fetcher) = &self.fetcher {
|
||||
let new_manifest = fetcher.fetch_file(file_id).await?;
|
||||
self.manifests.write().insert(file_id, new_manifest);
|
||||
self.store.get(&chunk_ref.hash).await?
|
||||
} else {
|
||||
return Err(ReaderError::Cas(CasError::NotFound(chunk_ref.hash.as_hex())));
|
||||
}
|
||||
}
|
||||
Err(e) => return Err(ReaderError::Cas(e)),
|
||||
};
|
||||
|
||||
let read_start = if offset > chunk_start {
|
||||
(offset - chunk_start) as usize
|
||||
|
||||
@@ -77,17 +77,29 @@ impl CasStore {
|
||||
}
|
||||
|
||||
async fn calculate_size(dir: &Path) -> u64 {
|
||||
let mut size = 0u64;
|
||||
if let Ok(mut entries) = fs::read_dir(dir).await {
|
||||
while let Ok(Some(entry)) = entries.next_entry().await {
|
||||
if let Ok(meta) = entry.metadata().await {
|
||||
if meta.is_file() {
|
||||
size += meta.len();
|
||||
Self::calculate_size_recursive(dir).await
|
||||
}
|
||||
|
||||
fn calculate_size_recursive(dir: &Path) -> std::pin::Pin<Box<dyn std::future::Future<Output = u64> + Send + '_>> {
|
||||
Box::pin(async move {
|
||||
let mut size = 0u64;
|
||||
if let Ok(mut entries) = fs::read_dir(dir).await {
|
||||
while let Ok(Some(entry)) = entries.next_entry().await {
|
||||
if let Ok(meta) = entry.metadata().await {
|
||||
if meta.is_file() {
|
||||
size += meta.len();
|
||||
} else if meta.is_dir() {
|
||||
// Skip sled index directory
|
||||
let name = entry.file_name();
|
||||
if name != "index.sled" {
|
||||
size += Self::calculate_size_recursive(&entry.path()).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
size
|
||||
size
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn put(&self, data: &[u8]) -> Result<ChunkHash, CasError> {
|
||||
|
||||
Reference in New Issue
Block a user