Move the files around

This commit is contained in:
Alexander
2026-05-13 20:34:14 +02:00
parent 90e9683076
commit 305d027c8b
113 changed files with 650 additions and 3569 deletions
+29
View File
@@ -0,0 +1,29 @@
[package]
name = "musicfs-cas"
version.workspace = true
edition.workspace = true
[features]
default = []
failpoints = ["fail/failpoints"]
[dependencies]
fail = { workspace = true, optional = true }
musicfs-core = { path = "../musicfs-core" }
musicfs-origins = { path = "../musicfs-origins" }
musicfs-sync = { path = "../musicfs-sync" }
tokio.workspace = true
tracing.workspace = true
serde.workspace = true
sled.workspace = true
xxhash-rust.workspace = true
bytes.workspace = true
rmp-serde.workspace = true
hex.workspace = true
dirs.workspace = true
thiserror.workspace = true
parking_lot.workspace = true
[dev-dependencies]
tempfile.workspace = true
musicfs-cache = { path = "../musicfs-cache" }
+45
View File
@@ -0,0 +1,45 @@
use musicfs_core::ChunkHash;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkLocation {
pub path: PathBuf,
pub size: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkRef {
pub hash: ChunkHash,
pub offset: u64,
pub size: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chunk_hash_from_bytes() {
let data = b"hello world";
let hash = ChunkHash::from_bytes(data);
assert_eq!(hash.as_hex().len(), 16);
}
#[test]
fn test_chunk_hash_deterministic() {
let data = b"test data";
let hash1 = ChunkHash::from_bytes(data);
let hash2 = ChunkHash::from_bytes(data);
assert_eq!(hash1, hash2);
}
#[test]
fn test_chunk_hash_hex_roundtrip() {
let data = b"roundtrip test";
let hash = ChunkHash::from_bytes(data);
let hex = hash.as_hex();
let restored = ChunkHash::from_hex(&hex).unwrap();
assert_eq!(hash, restored);
}
}
+284
View File
@@ -0,0 +1,284 @@
use crate::{CasStore, ChunkManifest, ChunkRef};
use musicfs_core::{Event, EventBus, FileId, FileMeta, OriginId};
use musicfs_origins::Origin;
use musicfs_sync::CdcChunker;
use parking_lot::RwLock;
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{debug, info, warn};
pub struct ContentFetcher {
store: Arc<CasStore>,
origins: RwLock<HashMap<OriginId, Arc<dyn Origin>>>,
file_meta: RwLock<HashMap<FileId, FileMeta>>,
event_bus: Option<Arc<EventBus>>,
chunker: CdcChunker,
}
impl ContentFetcher {
pub fn new(store: Arc<CasStore>) -> Self {
Self {
store,
origins: RwLock::new(HashMap::new()),
file_meta: RwLock::new(HashMap::new()),
event_bus: None,
chunker: CdcChunker::default(),
}
}
pub fn with_event_bus(store: Arc<CasStore>, event_bus: Arc<EventBus>) -> Self {
Self {
store,
origins: RwLock::new(HashMap::new()),
file_meta: RwLock::new(HashMap::new()),
event_bus: Some(event_bus),
chunker: CdcChunker::default(),
}
}
pub fn register_origin(&self, origin: Arc<dyn Origin>) {
let id = origin.id().clone();
self.origins.write().insert(id, origin);
}
pub fn register_file(&self, meta: FileMeta) {
self.file_meta.write().insert(meta.id, meta);
}
pub fn register_files(&self, files: impl IntoIterator<Item = FileMeta>) {
let mut map = self.file_meta.write();
for meta in files {
map.insert(meta.id, meta);
}
}
pub async fn fetch_file(&self, file_id: FileId) -> Result<ChunkManifest, FetchError> {
let meta = {
let files = self.file_meta.read();
files
.get(&file_id)
.cloned()
.ok_or(FetchError::FileNotFound(file_id))?
};
let origin = {
let origins = self.origins.read();
origins
.get(&meta.real_path.origin_id)
.cloned()
.ok_or_else(|| FetchError::OriginNotFound(meta.real_path.origin_id.clone()))?
};
info!("Fetching file {:?} from origin {}", file_id, origin.id());
let data = origin
.read_full(&meta.real_path.path)
.await
.map_err(|e| FetchError::OriginRead(e.to_string()))?;
let mtime = meta
.mtime
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() as i64)
.unwrap_or(0);
let chunks = self.chunker.chunk_refs(&data);
info!("Chunked {:?} into {} chunks", file_id, chunks.len());
let mut chunk_refs = Vec::with_capacity(chunks.len());
for chunk in chunks {
if !self.store.exists(&chunk.hash) {
if let Err(e) = self.store.put(chunk.data).await {
warn!(hash = %chunk.hash, error = %e, "CAS write failed, continuing in passthrough mode");
}
}
chunk_refs.push(ChunkRef {
hash: chunk.hash,
offset: chunk.offset,
size: chunk.length,
});
}
let manifest = ChunkManifest {
file_id,
total_size: meta.size,
mtime,
chunks: chunk_refs,
};
debug!(
"Created manifest for {:?}: {} bytes, {} chunks",
file_id,
meta.size,
manifest.chunks.len()
);
Ok(manifest)
}
pub async fn ensure_cached(&self, file_id: FileId) -> Result<ChunkManifest, FetchError> {
self.fetch_file(file_id).await
}
pub fn get_file_meta(&self, file_id: FileId) -> Option<FileMeta> {
self.file_meta.read().get(&file_id).cloned()
}
pub fn emit_access_event(&self, meta: &FileMeta, offset: u64, size: u32) {
if let Some(bus) = &self.event_bus {
bus.publish(Event::FileAccessed {
file_id: meta.id,
path: meta.virtual_path.clone(),
origin_id: meta.real_path.origin_id.clone(),
offset,
size,
});
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum FetchError {
#[error("File not found: {0:?}")]
FileNotFound(FileId),
#[error("Origin not found: {0}")]
OriginNotFound(OriginId),
#[error("Origin read error: {0}")]
OriginRead(String),
#[error("Store error: {0}")]
Store(#[from] crate::CasError),
}
#[cfg(test)]
mod tests {
use super::*;
use crate::CasConfig;
use musicfs_core::{RealPath, VirtualPath};
use musicfs_origins::LocalOrigin;
use std::path::PathBuf;
use std::time::SystemTime;
use tempfile::TempDir;
#[tokio::test]
async fn test_fetch_file() {
let cas_dir = TempDir::new().unwrap();
let origin_dir = TempDir::new().unwrap();
std::fs::write(origin_dir.path().join("test.flac"), b"fake audio data").unwrap();
let config = CasConfig {
chunks_dir: cas_dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let fetcher = ContentFetcher::new(store.clone());
let origin = Arc::new(LocalOrigin::new("local", origin_dir.path()));
fetcher.register_origin(origin);
let meta = FileMeta {
id: FileId(1),
virtual_path: VirtualPath::new("/Artist/Album/test.flac"),
real_path: RealPath {
origin_id: OriginId::from("local"),
path: PathBuf::from("/test.flac"),
},
size: 15,
mtime: SystemTime::now(),
content_hash: None,
audio: None,
};
fetcher.register_file(meta);
let manifest = fetcher.fetch_file(FileId(1)).await.unwrap();
assert_eq!(manifest.total_size, 15);
assert_eq!(manifest.chunks.len(), 1);
let data = store.get(&manifest.chunks[0].hash).await.unwrap();
assert_eq!(&data[..], b"fake audio data");
}
#[tokio::test]
async fn test_fetch_file_not_found() {
let cas_dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: cas_dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let fetcher = ContentFetcher::new(store);
let result = fetcher.fetch_file(FileId(999)).await;
assert!(matches!(result, Err(FetchError::FileNotFound(_))));
}
#[tokio::test]
async fn test_fetch_emits_event() {
let cas_dir = TempDir::new().unwrap();
let origin_dir = TempDir::new().unwrap();
std::fs::write(origin_dir.path().join("test.flac"), b"audio").unwrap();
let config = CasConfig {
chunks_dir: cas_dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let event_bus = Arc::new(EventBus::default());
let mut rx = event_bus.subscribe();
let fetcher = ContentFetcher::with_event_bus(store, event_bus);
let origin = Arc::new(LocalOrigin::new("local", origin_dir.path()));
fetcher.register_origin(origin);
let meta = FileMeta {
id: FileId(1),
virtual_path: VirtualPath::new("/Artist/test.flac"),
real_path: RealPath {
origin_id: OriginId::from("local"),
path: PathBuf::from("/test.flac"),
},
size: 5,
mtime: SystemTime::now(),
content_hash: None,
audio: None,
};
fetcher.register_file(meta.clone());
fetcher.emit_access_event(&meta, 0, 5);
let event = rx.try_recv().unwrap();
assert!(matches!(event, Event::FileAccessed { .. }));
}
#[tokio::test]
async fn test_fetch_origin_not_found() {
let cas_dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: cas_dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let fetcher = ContentFetcher::new(store);
let meta = FileMeta {
id: FileId(1),
virtual_path: VirtualPath::new("/test.flac"),
real_path: RealPath {
origin_id: OriginId::from("nonexistent"),
path: PathBuf::from("/test.flac"),
},
size: 100,
mtime: SystemTime::now(),
content_hash: None,
audio: None,
};
fetcher.register_file(meta);
let result = fetcher.fetch_file(FileId(1)).await;
assert!(matches!(result, Err(FetchError::OriginNotFound(_))));
}
}
+9
View File
@@ -0,0 +1,9 @@
mod chunks;
mod fetcher;
mod reader;
mod store;
pub use chunks::{ChunkLocation, ChunkRef};
pub use fetcher::{ContentFetcher, FetchError};
pub use reader::{ChunkManifest, FileReader, ReaderError};
pub use store::{CasConfig, CasError, CasStore, DedupStats};
+332
View File
@@ -0,0 +1,332 @@
use crate::chunks::ChunkRef;
use crate::fetcher::{ContentFetcher, FetchError};
use crate::store::{CasError, CasStore};
use bytes::{Bytes, BytesMut};
use musicfs_core::FileId;
use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{debug, trace, warn};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkManifest {
pub file_id: FileId,
pub total_size: u64,
pub mtime: i64,
pub chunks: Vec<ChunkRef>,
}
impl ChunkManifest {
pub fn chunks_to_bytes(&self) -> Vec<u8> {
rmp_serde::to_vec(&self.chunks).unwrap_or_default()
}
pub fn chunks_from_bytes(data: &[u8]) -> Option<Vec<ChunkRef>> {
rmp_serde::from_slice(data).ok()
}
pub fn from_db(
file_id: FileId,
total_size: u64,
mtime: i64,
chunk_blob: &[u8],
) -> Option<Self> {
let chunks = Self::chunks_from_bytes(chunk_blob)?;
Some(Self {
file_id,
total_size,
mtime,
chunks,
})
}
}
pub struct FileReader {
store: Arc<CasStore>,
fetcher: Option<Arc<ContentFetcher>>,
manifests: RwLock<HashMap<FileId, ChunkManifest>>,
}
impl FileReader {
pub fn new(store: Arc<CasStore>) -> Self {
Self {
store,
fetcher: None,
manifests: RwLock::new(HashMap::new()),
}
}
pub fn with_fetcher(store: Arc<CasStore>, fetcher: Arc<ContentFetcher>) -> Self {
Self {
store,
fetcher: Some(fetcher),
manifests: RwLock::new(HashMap::new()),
}
}
pub fn register_manifest(&self, manifest: ChunkManifest) {
let mut manifests = self.manifests.write();
manifests.insert(manifest.file_id, manifest);
}
async fn get_or_fetch_manifest(&self, file_id: FileId) -> Result<ChunkManifest, ReaderError> {
{
let manifests = self.manifests.read();
if let Some(m) = manifests.get(&file_id) {
trace!(file_id = ?file_id, "manifest cache hit");
return Ok(m.clone());
}
}
trace!(file_id = ?file_id, "manifest cache miss");
let Some(fetcher) = &self.fetcher else {
return Err(ReaderError::ManifestNotFound(file_id));
};
let manifest = fetcher.ensure_cached(file_id).await?;
self.manifests.write().insert(file_id, manifest.clone());
Ok(manifest)
}
pub async fn read(
&self,
file_id: FileId,
offset: u64,
size: u32,
) -> Result<Bytes, ReaderError> {
let manifest = self.get_or_fetch_manifest(file_id).await?;
if let Some(fetcher) = &self.fetcher {
if let Some(meta) = fetcher.get_file_meta(file_id) {
fetcher.emit_access_event(&meta, offset, size);
}
}
if offset >= manifest.total_size {
return Ok(Bytes::new());
}
let end = std::cmp::min(offset + size as u64, manifest.total_size);
let mut result = BytesMut::with_capacity((end - offset) as usize);
let mut chunks_read = 0u32;
for chunk_ref in &manifest.chunks {
let chunk_start = chunk_ref.offset;
let chunk_end = chunk_ref.offset + chunk_ref.size as u64;
if chunk_end <= offset || chunk_start >= end {
continue;
}
let chunk_data = match self.store.get(&chunk_ref.hash).await {
Ok(data) => data,
Err(CasError::IntegrityError { .. }) => {
warn!(hash = %chunk_ref.hash, "Chunk corrupt, deleting and re-fetching");
let _ = self.store.delete(&chunk_ref.hash).await;
if let Some(fetcher) = &self.fetcher {
let new_manifest = fetcher.fetch_file(file_id).await?;
self.manifests.write().insert(file_id, new_manifest);
self.store.get(&chunk_ref.hash).await?
} else {
return Err(ReaderError::Cas(CasError::NotFound(
chunk_ref.hash.as_hex(),
)));
}
}
Err(CasError::NotFound(_)) => {
warn!(hash = %chunk_ref.hash, "Chunk missing, attempting re-fetch");
if let Some(fetcher) = &self.fetcher {
let new_manifest = fetcher.fetch_file(file_id).await?;
self.manifests.write().insert(file_id, new_manifest);
self.store.get(&chunk_ref.hash).await?
} else {
return Err(ReaderError::Cas(CasError::NotFound(
chunk_ref.hash.as_hex(),
)));
}
}
Err(e) => return Err(ReaderError::Cas(e)),
};
let read_start = if offset > chunk_start {
(offset - chunk_start) as usize
} else {
0
};
let read_end = if end < chunk_end {
(end - chunk_start) as usize
} else {
chunk_ref.size as usize
};
result.extend_from_slice(&chunk_data[read_start..read_end]);
chunks_read += 1;
}
let bytes_read = result.len() as u64;
debug!(file_id = ?file_id, offset, size, chunks_read, bytes_read, "read completed");
Ok(result.freeze())
}
}
#[derive(Debug, thiserror::Error)]
pub enum ReaderError {
#[error("Manifest not found for file {0:?}")]
ManifestNotFound(FileId),
#[error("Fetch error: {0}")]
Fetch(#[from] FetchError),
#[error("CAS error: {0}")]
Cas(#[from] crate::store::CasError),
}
#[cfg(test)]
mod tests {
use super::*;
use crate::store::CasConfig;
use musicfs_core::ChunkHash;
use tempfile::TempDir;
#[tokio::test]
async fn test_file_reader_simple() {
let dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let data = b"Hello, World!";
let hash = store.put(data).await.unwrap();
let reader = FileReader::new(store);
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: data.len() as u64,
mtime: 0,
chunks: vec![ChunkRef {
hash,
offset: 0,
size: data.len() as u32,
}],
});
let result = reader.read(FileId(1), 0, data.len() as u32).await.unwrap();
assert_eq!(&result[..], data);
}
#[tokio::test]
async fn test_file_reader_partial() {
let dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let data = b"ABCDEFGHIJ";
let hash = store.put(data).await.unwrap();
let reader = FileReader::new(store);
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: data.len() as u64,
mtime: 0,
chunks: vec![ChunkRef {
hash,
offset: 0,
size: data.len() as u32,
}],
});
let result = reader.read(FileId(1), 3, 4).await.unwrap();
assert_eq!(&result[..], b"DEFG");
}
#[tokio::test]
async fn test_file_reader_multi_chunk() {
let dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let chunk1 = b"AAAA";
let chunk2 = b"BBBB";
let hash1 = store.put(chunk1).await.unwrap();
let hash2 = store.put(chunk2).await.unwrap();
let reader = FileReader::new(store);
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: 8,
mtime: 0,
chunks: vec![
ChunkRef {
hash: hash1,
offset: 0,
size: 4,
},
ChunkRef {
hash: hash2,
offset: 4,
size: 4,
},
],
});
let result = reader.read(FileId(1), 2, 4).await.unwrap();
assert_eq!(&result[..], b"AABB");
}
#[tokio::test]
async fn test_file_reader_eof() {
let dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let data = b"short";
let hash = store.put(data).await.unwrap();
let reader = FileReader::new(store);
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: data.len() as u64,
mtime: 0,
chunks: vec![ChunkRef {
hash,
offset: 0,
size: data.len() as u32,
}],
});
let result = reader.read(FileId(1), 100, 10).await.unwrap();
assert!(result.is_empty());
}
#[test]
fn test_chunk_manifest_serialization() {
let manifest = ChunkManifest {
file_id: FileId(42),
total_size: 1024,
mtime: 0,
chunks: vec![ChunkRef {
hash: ChunkHash::from_bytes(b"test"),
offset: 0,
size: 1024,
}],
};
let bytes = manifest.chunks_to_bytes();
let restored = ChunkManifest::chunks_from_bytes(&bytes).unwrap();
assert_eq!(restored.len(), 1);
assert_eq!(restored[0].size, 1024);
}
}
+396
View File
@@ -0,0 +1,396 @@
use crate::chunks::ChunkLocation;
use bytes::Bytes;
use musicfs_core::ChunkHash;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use tokio::fs;
use tracing::{debug, info, trace, warn};
#[cfg(feature = "failpoints")]
use fail::fail_point;
const DEFAULT_MAX_SIZE_10GB: u64 = 10 * 1024 * 1024 * 1024;
const DEFAULT_SHARD_LEVELS_256_SUBDIRS: u8 = 2;
#[derive(Debug, Clone)]
pub struct CasConfig {
pub chunks_dir: PathBuf,
pub max_size: u64,
pub shard_levels: u8,
}
impl Default for CasConfig {
fn default() -> Self {
let cache_dir = dirs::cache_dir()
.unwrap_or_else(|| PathBuf::from(".cache"))
.join("musicfs")
.join("chunks");
Self {
chunks_dir: cache_dir,
max_size: DEFAULT_MAX_SIZE_10GB,
shard_levels: DEFAULT_SHARD_LEVELS_256_SUBDIRS,
}
}
}
pub struct CasStore {
config: CasConfig,
index: sled::Db,
current_size: AtomicU64,
}
impl CasStore {
pub async fn open(config: CasConfig) -> Result<Self, CasError> {
fs::create_dir_all(&config.chunks_dir).await?;
let index_path = config.chunks_dir.join("index.sled");
let index = match sled::open(&index_path) {
Ok(db) => db,
Err(e) => {
warn!(error = %e, path = ?index_path, "sled index corrupted, attempting recovery");
match sled::Config::new().path(&index_path).open() {
Ok(db) => {
info!("sled index repaired successfully");
db
}
Err(repair_err) => {
warn!(error = %repair_err, "sled repair failed, recreating index");
if index_path.exists() {
std::fs::remove_dir_all(&index_path).map_err(CasError::Io)?;
}
sled::open(&index_path)?
}
}
}
};
let current_size = Self::calculate_size(&config.chunks_dir).await;
Ok(Self {
config,
index,
current_size: AtomicU64::new(current_size),
})
}
async fn calculate_size(dir: &Path) -> u64 {
Self::calculate_size_recursive(dir).await
}
fn calculate_size_recursive(
dir: &Path,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = u64> + Send + '_>> {
Box::pin(async move {
let mut size = 0u64;
if let Ok(mut entries) = fs::read_dir(dir).await {
while let Ok(Some(entry)) = entries.next_entry().await {
if let Ok(meta) = entry.metadata().await {
if meta.is_file() {
size += meta.len();
} else if meta.is_dir() {
// Skip sled index directory
let name = entry.file_name();
if name != "index.sled" {
size += Self::calculate_size_recursive(&entry.path()).await;
}
}
}
}
}
size
})
}
pub async fn put(&self, data: &[u8]) -> Result<ChunkHash, CasError> {
let hash = ChunkHash::from_bytes(data);
let path = self.chunk_path(&hash);
if path.exists() {
trace!(hash = %hash, size_bytes = data.len(), "dedup hit");
return Ok(hash);
}
if self.config.max_size > 0 {
let new_size = self.current_size.load(Ordering::SeqCst) + data.len() as u64;
if new_size > self.config.max_size {
warn!(
current_size = self.current_size.load(Ordering::SeqCst),
chunk_size = data.len(),
max_size = self.config.max_size,
"CAS store full, rejecting write"
);
return Err(CasError::StoreFull {
current: self.current_size.load(Ordering::SeqCst),
max: self.config.max_size,
});
}
}
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).await?;
}
#[cfg(feature = "failpoints")]
fail_point!("cas-put-before-write", |_| {
Err(CasError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
"Failpoint: cas-put-before-write",
)))
});
fs::write(&path, data).await?;
#[cfg(feature = "failpoints")]
fail_point!("cas-put-after-write-before-index", |_| {
Err(CasError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
"Failpoint: cas-put-after-write-before-index",
)))
});
let location = ChunkLocation {
path: path.clone(),
size: data.len() as u32,
};
self.index.insert(
hash.0.as_slice(),
rmp_serde::to_vec(&location).map_err(|e| CasError::Serialization(e.to_string()))?,
)?;
self.current_size
.fetch_add(data.len() as u64, Ordering::SeqCst);
debug!(hash = %hash, size_bytes = data.len(), "chunk stored");
Ok(hash)
}
pub async fn get(&self, hash: &ChunkHash) -> Result<Bytes, CasError> {
let path = self.chunk_path(hash);
if !path.exists() {
return Err(CasError::NotFound(hash.as_hex()));
}
let data = fs::read(&path).await?;
if self.config.max_size > 0 {
self.verify_integrity(hash, &data)?;
}
debug!(hash = %hash, size_bytes = data.len(), "chunk retrieved");
Ok(Bytes::from(data))
}
pub fn exists(&self, hash: &ChunkHash) -> bool {
self.chunk_path(hash).exists()
}
fn verify_integrity(&self, expected: &ChunkHash, data: &[u8]) -> Result<(), CasError> {
let actual = ChunkHash::from_bytes(data);
if actual != *expected {
warn!(
"Chunk integrity failure: expected {}, got {}",
expected, actual
);
return Err(CasError::IntegrityError {
expected: expected.as_hex(),
actual: actual.as_hex(),
});
}
Ok(())
}
fn chunk_path(&self, hash: &ChunkHash) -> PathBuf {
let hex = hash.as_hex();
let mut path = self.config.chunks_dir.clone();
for i in 0..self.config.shard_levels as usize {
let start = i * 2;
let end = start + 2;
if end <= hex.len() {
path = path.join(&hex[start..end]);
}
}
path.join(&hex)
}
pub async fn delete(&self, hash: &ChunkHash) -> Result<(), CasError> {
let path = self.chunk_path(hash);
if path.exists() {
let meta = fs::metadata(&path).await?;
fs::remove_file(&path).await?;
self.index.remove(hash.0.as_slice())?;
self.current_size.fetch_sub(meta.len(), Ordering::SeqCst);
debug!(hash = %hash, size_bytes = meta.len(), "chunk deleted");
}
Ok(())
}
pub fn current_size(&self) -> u64 {
self.current_size.load(Ordering::SeqCst)
}
pub fn max_size(&self) -> u64 {
self.config.max_size
}
pub fn list_chunks(&self) -> impl Iterator<Item = ChunkHash> + '_ {
self.index.iter().filter_map(|r| {
r.ok().and_then(|(k, _)| {
if k.len() == 8 {
let mut arr = [0u8; 8];
arr.copy_from_slice(&k);
Some(ChunkHash(arr))
} else {
None
}
})
})
}
pub fn dedup_stats(&self) -> DedupStats {
let chunks_stored = self.index.len() as u64;
let size_bytes = self.current_size();
DedupStats {
chunks_stored,
chunks_unique: chunks_stored,
size_bytes,
size_limit_bytes: self.config.max_size,
}
}
}
#[derive(Debug, Clone)]
pub struct DedupStats {
pub chunks_stored: u64,
pub chunks_unique: u64,
pub size_bytes: u64,
pub size_limit_bytes: u64,
}
impl DedupStats {
pub fn dedup_ratio(&self) -> f64 {
if self.chunks_stored == 0 {
0.0
} else {
1.0 - (self.chunks_unique as f64 / self.chunks_stored as f64)
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum CasError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Sled error: {0}")]
Sled(#[from] sled::Error),
#[error("Chunk not found: {0}")]
NotFound(String),
#[error("Integrity error: expected {expected}, got {actual}")]
IntegrityError { expected: String, actual: String },
#[error("Serialization error: {0}")]
Serialization(String),
#[error("Store full: {current} / {max} bytes")]
StoreFull { current: u64, max: u64 },
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
async fn test_store() -> (CasStore, TempDir) {
let dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: dir.path().join("chunks"),
max_size: 1024 * 1024,
shard_levels: 2,
};
let store = CasStore::open(config).await.unwrap();
(store, dir)
}
#[tokio::test]
async fn test_cas_put_get() {
let (store, _dir) = test_store().await;
let data = b"test chunk data";
let hash = store.put(data).await.unwrap();
let retrieved = store.get(&hash).await.unwrap();
assert_eq!(&retrieved[..], data);
}
#[tokio::test]
async fn test_cas_dedup() {
let (store, _dir) = test_store().await;
let data = b"duplicate data";
let hash1 = store.put(data).await.unwrap();
let hash2 = store.put(data).await.unwrap();
assert_eq!(hash1, hash2);
}
#[tokio::test]
async fn test_cas_exists() {
let (store, _dir) = test_store().await;
let data = b"existence test";
let hash = store.put(data).await.unwrap();
assert!(store.exists(&hash));
let fake_hash = ChunkHash::from_bytes(b"nonexistent");
assert!(!store.exists(&fake_hash));
}
#[tokio::test]
async fn test_cas_delete() {
let (store, _dir) = test_store().await;
let data = b"delete me";
let hash = store.put(data).await.unwrap();
assert!(store.exists(&hash));
store.delete(&hash).await.unwrap();
assert!(!store.exists(&hash));
}
#[tokio::test]
async fn test_cas_integrity() {
let (store, _dir) = test_store().await;
let data = b"integrity test";
let hash = store.put(data).await.unwrap();
let retrieved = store.get(&hash).await.unwrap();
assert_eq!(&retrieved[..], data);
}
#[tokio::test]
async fn test_cas_dedup_stats() {
let (store, _dir) = test_store().await;
store.put(b"chunk1").await.unwrap();
store.put(b"chunk2").await.unwrap();
store.put(b"chunk1").await.unwrap();
let stats = store.dedup_stats();
assert_eq!(stats.chunks_stored, 2);
assert_eq!(stats.chunks_unique, 2);
}
}
+203
View File
@@ -0,0 +1,203 @@
use musicfs_cache::TreeBuilder;
use musicfs_cas::{CasConfig, CasStore, ChunkManifest, ChunkRef, ContentFetcher, FileReader};
use musicfs_core::{FileId, FileMeta, OriginId, RealPath, VirtualPath};
use musicfs_origins::LocalOrigin;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use std::time::SystemTime;
use tempfile::TempDir;
fn make_file_meta(id: i64, vpath: &str, size: u64) -> FileMeta {
FileMeta {
id: FileId(id),
virtual_path: VirtualPath::new(vpath),
real_path: RealPath {
origin_id: OriginId::from("test"),
path: PathBuf::from("/test"),
},
size,
mtime: SystemTime::now(),
content_hash: None,
audio: None,
}
}
#[tokio::test]
async fn test_cas_and_tree_integration() {
let dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let file_data = b"This is test audio file content for testing.";
let chunk_hash = store.put(file_data).await.unwrap();
let mut builder = TreeBuilder::new();
builder.add_file(&make_file_meta(
1,
"/Artist/Album/Track.flac",
file_data.len() as u64,
));
let _tree = Arc::new(RwLock::new(builder.build()));
let reader = Arc::new(FileReader::new(store.clone()));
reader.register_manifest(ChunkManifest {
file_id: FileId(1),
total_size: file_data.len() as u64,
mtime: 0,
chunks: vec![ChunkRef {
hash: chunk_hash,
offset: 0,
size: file_data.len() as u32,
}],
});
let result = reader
.read(FileId(1), 0, file_data.len() as u32)
.await
.unwrap();
assert_eq!(&result[..], file_data);
}
#[tokio::test]
async fn test_cache_persistence() {
let dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: dir.path().join("chunks"),
..Default::default()
};
let data = b"persistent data";
let hash = {
let store = CasStore::open(config.clone()).await.unwrap();
store.put(data).await.unwrap()
};
let store = CasStore::open(config).await.unwrap();
let retrieved = store.get(&hash).await.unwrap();
assert_eq!(&retrieved[..], data);
}
#[tokio::test]
async fn test_deduplication() {
let dir = TempDir::new().unwrap();
let config = CasConfig {
chunks_dir: dir.path().join("chunks"),
..Default::default()
};
let store = CasStore::open(config).await.unwrap();
let data = b"duplicate this content";
let hash1 = store.put(data).await.unwrap();
let size_after_first = store.current_size();
let hash2 = store.put(data).await.unwrap();
let size_after_second = store.current_size();
assert_eq!(hash1, hash2);
assert_eq!(size_after_first, size_after_second);
}
#[tokio::test]
async fn test_fetcher_cache_miss_flow() {
let origin_dir = TempDir::new().unwrap();
let cas_dir = TempDir::new().unwrap();
let test_content = b"This is audio content that will be fetched on cache miss";
let test_file_path = origin_dir.path().join("test.flac");
std::fs::write(&test_file_path, test_content).unwrap();
let config = CasConfig {
chunks_dir: cas_dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let origin_id = OriginId::from("test-origin");
let origin = Arc::new(LocalOrigin::new(
origin_id.clone(),
origin_dir.path().to_path_buf(),
));
let fetcher = ContentFetcher::new(store.clone());
fetcher.register_origin(origin);
let file_id = FileId(42);
let file_meta = FileMeta {
id: file_id,
virtual_path: VirtualPath::new("/Artist/Album/test.flac"),
real_path: RealPath {
origin_id,
path: PathBuf::from("/test.flac"),
},
size: test_content.len() as u64,
mtime: SystemTime::now(),
content_hash: None,
audio: None,
};
fetcher.register_file(file_meta);
let manifest = fetcher.fetch_file(file_id).await.unwrap();
assert_eq!(manifest.file_id, file_id);
assert_eq!(manifest.total_size, test_content.len() as u64);
assert_eq!(manifest.chunks.len(), 1);
let chunk_data = store.get(&manifest.chunks[0].hash).await.unwrap();
assert_eq!(&chunk_data[..], test_content);
}
#[tokio::test]
async fn test_reader_with_fetcher_integration() {
let origin_dir = TempDir::new().unwrap();
let cas_dir = TempDir::new().unwrap();
let test_content = b"Audio file content for reader integration test";
let test_file_path = origin_dir.path().join("song.flac");
std::fs::write(&test_file_path, test_content).unwrap();
let config = CasConfig {
chunks_dir: cas_dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(CasStore::open(config).await.unwrap());
let origin_id = OriginId::from("local");
let origin = Arc::new(LocalOrigin::new(
origin_id.clone(),
origin_dir.path().to_path_buf(),
));
let fetcher = ContentFetcher::new(store.clone());
fetcher.register_origin(origin);
let file_id = FileId(100);
let file_meta = FileMeta {
id: file_id,
virtual_path: VirtualPath::new("/Test/song.flac"),
real_path: RealPath {
origin_id,
path: PathBuf::from("/song.flac"),
},
size: test_content.len() as u64,
mtime: SystemTime::now(),
content_hash: None,
audio: None,
};
fetcher.register_file(file_meta);
let reader = FileReader::with_fetcher(store, Arc::new(fetcher));
let result = reader
.read(file_id, 0, test_content.len() as u32)
.await
.unwrap();
assert_eq!(&result[..], test_content);
let result2 = reader.read(file_id, 0, 10).await.unwrap();
assert_eq!(&result2[..], &test_content[..10]);
}