Implement Week 5 CDC & Delta Detection with Oracle fixes

- Add CdcChunker using FastCDC v3 (16KB/64KB/256KB chunks)
- Add DeltaDetector with scan_origin() returning ScannedFile (no FileId assignment)
- Add OriginWatcher with inotify and 200ms debounce using tokio::spawn
- Fix LocalOrigin::read() to loop until all bytes read
- Add read_full() method to Origin trait
- Add mtime field to ChunkManifest
- Update ContentFetcher to use CDC chunking
- Update bandwidth reduction test to assert >90% (NFR-6.4)

Tests: 71 pass (+11 new)
This commit is contained in:
Alexander
2026-05-12 20:05:44 +02:00
parent 0e5a514015
commit 32c96701c8
12 changed files with 998 additions and 15 deletions
+32 -9
View File
@@ -1,6 +1,7 @@
use crate::{CasStore, ChunkManifest, ChunkRef};
use musicfs_core::{Event, EventBus, FileId, FileMeta, OriginId};
use musicfs_origins::Origin;
use musicfs_sync::CdcChunker;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use tracing::{debug, info};
@@ -10,6 +11,7 @@ pub struct ContentFetcher {
origins: RwLock<HashMap<OriginId, Arc<dyn Origin>>>,
file_meta: RwLock<HashMap<FileId, FileMeta>>,
event_bus: Option<Arc<EventBus>>,
chunker: CdcChunker,
}
impl ContentFetcher {
@@ -19,6 +21,7 @@ impl ContentFetcher {
origins: RwLock::new(HashMap::new()),
file_meta: RwLock::new(HashMap::new()),
event_bus: None,
chunker: CdcChunker::default(),
}
}
@@ -28,6 +31,7 @@ impl ContentFetcher {
origins: RwLock::new(HashMap::new()),
file_meta: RwLock::new(HashMap::new()),
event_bus: Some(event_bus),
chunker: CdcChunker::default(),
}
}
@@ -71,25 +75,44 @@ impl ContentFetcher {
);
let data = origin
.read(&meta.real_path.path, 0, meta.size as u32)
.read_full(&meta.real_path.path)
.await
.map_err(|e| FetchError::OriginRead(e.to_string()))?;
let hash = self.store.put(&data).await.map_err(FetchError::Store)?;
let mtime = meta
.mtime
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() as i64)
.unwrap_or(0);
let chunks = self.chunker.chunk_refs(&data);
info!("Chunked {:?} into {} chunks", file_id, chunks.len());
let mut chunk_refs = Vec::with_capacity(chunks.len());
for chunk in chunks {
if !self.store.exists(&chunk.hash) {
self.store.put(chunk.data).await.map_err(FetchError::Store)?;
}
chunk_refs.push(ChunkRef {
hash: chunk.hash,
offset: chunk.offset,
size: chunk.length,
});
}
let manifest = ChunkManifest {
file_id,
total_size: meta.size,
chunks: vec![ChunkRef {
hash,
offset: 0,
size: data.len() as u32,
}],
mtime,
chunks: chunk_refs,
};
debug!(
"Created manifest for {:?}: {} bytes, 1 chunk",
file_id, meta.size
"Created manifest for {:?}: {} bytes, {} chunks",
file_id,
meta.size,
manifest.chunks.len()
);
Ok(manifest)
+8 -1
View File
@@ -11,6 +11,7 @@ use std::sync::{Arc, RwLock};
pub struct ChunkManifest {
pub file_id: FileId,
pub total_size: u64,
pub mtime: i64,
pub chunks: Vec<ChunkRef>,
}
@@ -23,11 +24,12 @@ impl ChunkManifest {
rmp_serde::from_slice(data).ok()
}
pub fn from_db(file_id: FileId, total_size: u64, chunk_blob: &[u8]) -> Option<Self> {
pub fn from_db(file_id: FileId, total_size: u64, mtime: i64, chunk_blob: &[u8]) -> Option<Self> {
let chunks = Self::chunks_from_bytes(chunk_blob)?;
Some(Self {
file_id,
total_size,
mtime,
chunks,
})
}
@@ -166,6 +168,7 @@ mod tests {
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: data.len() as u64,
mtime: 0,
chunks: vec![ChunkRef {
hash,
offset: 0,
@@ -193,6 +196,7 @@ mod tests {
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: data.len() as u64,
mtime: 0,
chunks: vec![ChunkRef {
hash,
offset: 0,
@@ -222,6 +226,7 @@ mod tests {
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: 8,
mtime: 0,
chunks: vec![
ChunkRef {
hash: hash1,
@@ -256,6 +261,7 @@ mod tests {
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: data.len() as u64,
mtime: 0,
chunks: vec![ChunkRef {
hash,
offset: 0,
@@ -272,6 +278,7 @@ mod tests {
let manifest = ChunkManifest {
file_id: FileId(42),
total_size: 1024,
mtime: 0,
chunks: vec![ChunkRef {
hash: ChunkHash::from_bytes(b"test"),
offset: 0,