Implement Week 5 CDC & Delta Detection with Oracle fixes

- Add CdcChunker using FastCDC v3 (16KB/64KB/256KB chunks)
- Add DeltaDetector with scan_origin() returning ScannedFile (no FileId assignment)
- Add OriginWatcher with inotify and 200ms debounce using tokio::spawn
- Fix LocalOrigin::read() to loop until all bytes read
- Add read_full() method to Origin trait
- Add mtime field to ChunkManifest
- Update ContentFetcher to use CDC chunking
- Update bandwidth reduction test to assert >90% (NFR-6.4)

Tests: 71 pass (+11 new)
This commit is contained in:
Alexander
2026-05-12 20:05:44 +02:00
parent 0e5a514015
commit 32c96701c8
12 changed files with 998 additions and 15 deletions
+21 -3
View File
@@ -93,13 +93,31 @@ impl Origin for LocalOrigin {
let mut file = fs::File::open(&full_path).await?;
file.seek(std::io::SeekFrom::Start(offset)).await?;
let mut buffer = vec![0u8; size as usize];
let bytes_read = file.read(&mut buffer).await?;
buffer.truncate(bytes_read);
// FIX: Loop until all requested bytes are read or EOF
// Single read() only returns kernel buffer (~2MB), not full request
let mut buffer = Vec::with_capacity(size as usize);
let mut temp_buf = vec![0u8; 64 * 1024]; // 64KB chunks
let mut total_read = 0usize;
while total_read < size as usize {
let to_read = std::cmp::min(temp_buf.len(), size as usize - total_read);
let n = file.read(&mut temp_buf[..to_read]).await?;
if n == 0 {
break; // EOF
}
buffer.extend_from_slice(&temp_buf[..n]);
total_read += n;
}
Ok(buffer)
}
async fn read_full(&self, path: &Path) -> Result<Vec<u8>> {
let full_path = self.full_path(path);
debug!("LocalOrigin::read_full({:?})", full_path);
Ok(fs::read(&full_path).await?)
}
async fn exists(&self, path: &Path) -> Result<bool> {
let full_path = self.full_path(path);
Ok(fs::try_exists(&full_path).await?)
@@ -26,6 +26,9 @@ pub trait Origin: Send + Sync {
async fn read(&self, path: &Path, offset: u64, size: u32) -> Result<Vec<u8>>;
/// Read entire file content (for CDC chunking of files <4GB)
async fn read_full(&self, path: &Path) -> Result<Vec<u8>>;
async fn exists(&self, path: &Path) -> Result<bool>;
async fn health(&self) -> HealthStatus;