Add Week 9 Smart Features: collections, artwork, predictive prefetch

Smart Collections (musicfs-search/src/collections.rs):
- CollectionStore with thread-safe Mutex<Connection>
- CollectionQuery enum: Match, DateRange, RecentlyAdded/Played, MostPlayed, Genre, Compound
- Builtin collections for Recently Added, 80s/90s Music

Artwork Extraction & Caching:
- ArtworkExtractor using symphonia Visual (musicfs-metadata)
- ArtworkCache with CAS storage + on-demand resize (musicfs-cache)
- ArtType: Front/Back/Other, ArtSize: Thumbnail/Medium/Full

Predictive Prefetching:
- PatternStore tracks access patterns with sequence prediction
- PrefetchEngine listens to FileAccessed events, prefetches predictions
- PrefetchOps exposes /.prefetch/ virtual directory with status/hints

Oracle review fixes applied:
- CollectionStore uses Mutex for thread safety
- FileAccessed event now includes file_id for canonical correlation
- JSON parse warnings in collection deserialization

130 tests pass (15 new tests added)
This commit is contained in:
Alexander
2026-05-13 07:21:28 +02:00
parent 3cb6dfcaf8
commit 34d05b7a49
18 changed files with 1933 additions and 0 deletions
+4
View File
@@ -6,6 +6,7 @@ edition.workspace = true
[dependencies]
musicfs-core = { path = "../musicfs-core" }
musicfs-cas = { path = "../musicfs-cas" }
musicfs-metadata = { path = "../musicfs-metadata" }
rusqlite = { workspace = true, features = ["bundled"] }
sled.workspace = true
tokio.workspace = true
@@ -13,6 +14,9 @@ tracing.workspace = true
thiserror.workspace = true
serde.workspace = true
rmp-serde.workspace = true
image.workspace = true
parking_lot.workspace = true
chrono.workspace = true
[dev-dependencies]
tempfile.workspace = true
+196
View File
@@ -0,0 +1,196 @@
use image::ImageFormat;
use musicfs_cas::CasStore;
use musicfs_core::ChunkHash;
use musicfs_metadata::artwork::{ArtSize, ArtType, Artwork};
use std::io::Cursor;
use std::path::Path;
use std::sync::Arc;
use tracing::debug;
const MAX_ARTWORK_INPUT_SIZE: usize = 10 * 1024 * 1024;
pub struct ArtworkCache {
store: Arc<CasStore>,
db_path: std::path::PathBuf,
}
#[derive(Debug)]
pub struct CachedArtwork {
pub file_id: i64,
pub art_type: String,
pub chunk_hash: ChunkHash,
pub width: u32,
pub height: u32,
}
impl ArtworkCache {
pub fn new(store: Arc<CasStore>, db_path: &Path) -> Result<Self, ArtworkError> {
let db = rusqlite::Connection::open(db_path)?;
db.execute(
"CREATE TABLE IF NOT EXISTS artwork (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL,
art_type TEXT NOT NULL,
chunk_hash TEXT NOT NULL,
width INTEGER NOT NULL,
height INTEGER NOT NULL,
UNIQUE(file_id, art_type)
)",
[],
)?;
Ok(Self {
store,
db_path: db_path.to_path_buf(),
})
}
pub async fn store(&self, file_id: i64, artwork: &Artwork) -> Result<ChunkHash, ArtworkError> {
if artwork.data.len() > MAX_ARTWORK_INPUT_SIZE {
return Err(ArtworkError::ImageTooLarge(artwork.data.len()));
}
let hash = self.store.put(&artwork.data).await?;
let art_type_str = match artwork.art_type {
ArtType::Front => "front",
ArtType::Back => "back",
ArtType::Other => "other",
};
let db_path = self.db_path.clone();
let art_type_clone = art_type_str.to_string();
let hash_hex = hash.to_hex();
let width = artwork.width;
let height = artwork.height;
tokio::task::spawn_blocking(move || {
let db = rusqlite::Connection::open(&db_path)?;
db.execute(
"INSERT OR REPLACE INTO artwork
(file_id, art_type, chunk_hash, width, height)
VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![file_id, art_type_clone, hash_hex, width, height],
)?;
Ok::<_, ArtworkError>(())
})
.await
.map_err(|e| ArtworkError::SpawnBlocking(e.to_string()))??;
debug!("Cached artwork for file {}", file_id);
Ok(hash)
}
pub async fn get(
&self,
file_id: i64,
art_type: &str,
size: ArtSize,
) -> Result<Option<Vec<u8>>, ArtworkError> {
let db_path = self.db_path.clone();
let art_type_clone = art_type.to_string();
let hash_hex: Option<String> = tokio::task::spawn_blocking(move || {
let db = rusqlite::Connection::open(&db_path)?;
db.query_row(
"SELECT chunk_hash FROM artwork WHERE file_id = ?1 AND art_type = ?2",
rusqlite::params![file_id, art_type_clone],
|row| row.get(0),
)
.ok()
.ok_or(ArtworkError::NotFound)
})
.await
.map_err(|e| ArtworkError::SpawnBlocking(e.to_string()))?
.ok();
match hash_hex {
Some(hex) => {
let hash = ChunkHash::from_hex(&hex).ok_or(ArtworkError::InvalidHash)?;
let data = self.store.get(&hash).await?;
match size {
ArtSize::Full => Ok(Some(data.to_vec())),
ArtSize::Thumbnail | ArtSize::Medium => {
let resized = self.resize_on_demand(&data, size)?;
Ok(Some(resized))
}
}
}
None => Ok(None),
}
}
pub async fn has(&self, file_id: i64, art_type: &str) -> Result<bool, ArtworkError> {
let db_path = self.db_path.clone();
let art_type_clone = art_type.to_string();
tokio::task::spawn_blocking(move || {
let db = rusqlite::Connection::open(&db_path)?;
let count: i64 = db.query_row(
"SELECT COUNT(*) FROM artwork WHERE file_id = ?1 AND art_type = ?2",
rusqlite::params![file_id, art_type_clone],
|row| row.get(0),
)?;
Ok(count > 0)
})
.await
.map_err(|e| ArtworkError::SpawnBlocking(e.to_string()))?
}
fn resize_on_demand(&self, data: &[u8], size: ArtSize) -> Result<Vec<u8>, ArtworkError> {
let max_dim = size.max_dimension().unwrap_or(300);
let img = image::load_from_memory(data).map_err(|_| ArtworkError::InvalidImage)?;
if img.width() <= max_dim && img.height() <= max_dim {
return Ok(data.to_vec());
}
let resized = img.thumbnail(max_dim, max_dim);
let mut output = Vec::new();
let mut cursor = Cursor::new(&mut output);
resized
.write_to(&mut cursor, ImageFormat::Jpeg)
.map_err(|_| ArtworkError::ResizeFailed)?;
Ok(output)
}
}
#[derive(Debug, thiserror::Error)]
pub enum ArtworkError {
#[error("database error: {0}")]
Database(#[from] rusqlite::Error),
#[error("CAS error: {0}")]
Cas(#[from] musicfs_cas::CasError),
#[error("invalid hash")]
InvalidHash,
#[error("artwork not found")]
NotFound,
#[error("image too large: {0} bytes (max 10MB)")]
ImageTooLarge(usize),
#[error("invalid image data")]
InvalidImage,
#[error("resize failed")]
ResizeFailed,
#[error("spawn_blocking error: {0}")]
SpawnBlocking(String),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_max_artwork_size() {
assert_eq!(MAX_ARTWORK_INPUT_SIZE, 10 * 1024 * 1024);
}
}
+6
View File
@@ -1,11 +1,17 @@
mod artwork;
mod db;
mod eviction;
mod metadata;
mod patterns;
mod prefetch;
mod tree;
pub use artwork::{ArtworkCache, ArtworkError, CachedArtwork};
pub use db::Database;
pub use eviction::{EvictionError, EvictionPolicy, LruEviction};
pub use metadata::MetadataCache;
pub use patterns::{AccessContext, AccessPattern, PatternError, PatternStore};
pub use prefetch::{PrefetchConfig, PrefetchEngine, PrefetchHandle};
pub use tree::{
DirNode, FileNode, Inode, RefreshPolicy, TreeBuilder, VirtualNode, VirtualTree, ROOT_INODE,
};
@@ -0,0 +1,282 @@
use musicfs_core::FileId;
use parking_lot::{Mutex, RwLock};
use std::collections::HashMap;
use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Debug, Clone)]
pub struct AccessPattern {
pub file_id: FileId,
pub timestamp: SystemTime,
pub context: AccessContext,
pub hour_of_day: u8,
}
#[derive(Debug, Clone, Default)]
pub struct AccessContext {
pub album_id: Option<i64>,
pub track_number: Option<u32>,
pub artist: Option<String>,
}
pub struct PatternStore {
db: Mutex<rusqlite::Connection>,
sequence_counts: RwLock<HashMap<(FileId, FileId), u32>>,
time_patterns: RwLock<HashMap<u8, Vec<FileId>>>,
max_history: usize,
}
impl PatternStore {
pub fn new(db_path: &Path, max_history: usize) -> Result<Self, PatternError> {
let db = rusqlite::Connection::open(db_path)?;
db.execute(
"CREATE TABLE IF NOT EXISTS access_log (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL,
access_time INTEGER NOT NULL,
hour_of_day INTEGER NOT NULL
)",
[],
)?;
db.execute(
"CREATE INDEX IF NOT EXISTS idx_access_log_file ON access_log(file_id)",
[],
)?;
db.execute(
"CREATE INDEX IF NOT EXISTS idx_access_log_time ON access_log(access_time)",
[],
)?;
db.execute(
"CREATE TABLE IF NOT EXISTS sequence_counts (
from_file_id INTEGER NOT NULL,
to_file_id INTEGER NOT NULL,
count INTEGER NOT NULL DEFAULT 1,
PRIMARY KEY (from_file_id, to_file_id)
)",
[],
)?;
let sequence_counts = {
let mut map = HashMap::new();
let mut stmt = db.prepare("SELECT from_file_id, to_file_id, count FROM sequence_counts")?;
let rows = stmt.query_map([], |row| {
Ok((
(
FileId(row.get::<_, i64>(0)?),
FileId(row.get::<_, i64>(1)?),
),
row.get::<_, u32>(2)?,
))
})?;
for row in rows {
let (key, count) = row?;
map.insert(key, count);
}
map
};
Ok(Self {
db: Mutex::new(db),
sequence_counts: RwLock::new(sequence_counts),
time_patterns: RwLock::new(HashMap::new()),
max_history,
})
}
pub fn record(&self, file_id: FileId, _context: AccessContext) -> Result<(), PatternError> {
let now = SystemTime::now();
let timestamp = now.duration_since(UNIX_EPOCH).unwrap().as_secs() as i64;
let hour = (timestamp / 3600 % 24) as u8;
let db = self.db.lock();
db.execute(
"INSERT INTO access_log (file_id, access_time, hour_of_day) VALUES (?1, ?2, ?3)",
rusqlite::params![file_id.0, timestamp, hour],
)?;
{
let mut time_patterns = self.time_patterns.write();
time_patterns.entry(hour).or_default().push(file_id);
}
let prev_file_id: Option<i64> = db
.query_row(
"SELECT file_id FROM access_log WHERE id = (SELECT MAX(id) - 1 FROM access_log)",
[],
|row| row.get(0),
)
.ok();
if let Some(prev_id) = prev_file_id {
let prev = FileId(prev_id);
{
let mut sequences = self.sequence_counts.write();
*sequences.entry((prev, file_id)).or_insert(0) += 1;
}
db.execute(
"INSERT INTO sequence_counts (from_file_id, to_file_id, count)
VALUES (?1, ?2, 1)
ON CONFLICT(from_file_id, to_file_id) DO UPDATE SET count = count + 1",
rusqlite::params![prev_id, file_id.0],
)?;
}
let cutoff = timestamp - (self.max_history as i64 * 86400);
db.execute("DELETE FROM access_log WHERE access_time < ?1", [cutoff])?;
Ok(())
}
pub fn predict_next(&self, current: FileId, limit: usize) -> Vec<FileId> {
let sequences = self.sequence_counts.read();
let mut predictions: Vec<_> = sequences
.iter()
.filter(|((from, _), count)| *from == current && **count >= 2)
.map(|((_, to), count)| (*to, *count))
.collect();
predictions.sort_by(|a, b| b.1.cmp(&a.1));
predictions
.into_iter()
.take(limit)
.map(|(id, _)| id)
.collect()
}
pub fn predict_for_time(&self, hour: u8, limit: usize) -> Vec<FileId> {
let time_patterns = self.time_patterns.read();
time_patterns
.get(&hour)
.map(|files| files.iter().rev().take(limit).copied().collect())
.unwrap_or_default()
}
pub fn recently_played(&self, days: u32) -> Result<Vec<FileId>, PatternError> {
let cutoff = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs() as i64
- (days as i64 * 86400);
let db = self.db.lock();
let mut stmt = db.prepare(
"SELECT DISTINCT file_id FROM access_log WHERE access_time >= ?1 ORDER BY access_time DESC",
)?;
let files: Vec<FileId> = stmt
.query_map([cutoff], |row| Ok(FileId(row.get(0)?)))?
.filter_map(|r| r.ok())
.collect();
Ok(files)
}
pub fn most_played(&self, limit: u32) -> Result<Vec<FileId>, PatternError> {
let db = self.db.lock();
let mut stmt = db.prepare(
"SELECT file_id, COUNT(*) as play_count FROM access_log
GROUP BY file_id ORDER BY play_count DESC LIMIT ?1",
)?;
let files: Vec<FileId> = stmt
.query_map([limit], |row| Ok(FileId(row.get(0)?)))?
.filter_map(|r| r.ok())
.collect();
Ok(files)
}
}
#[derive(Debug, thiserror::Error)]
pub enum PatternError {
#[error("database error: {0}")]
Database(#[from] rusqlite::Error),
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_pattern_prediction() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("patterns.db");
let store = PatternStore::new(&db_path, 30).unwrap();
let ctx = AccessContext::default();
for _ in 0..5 {
store.record(FileId(1), ctx.clone()).unwrap();
store.record(FileId(2), ctx.clone()).unwrap();
store.record(FileId(3), ctx.clone()).unwrap();
}
let predictions = store.predict_next(FileId(1), 3);
assert!(!predictions.is_empty());
assert_eq!(predictions[0], FileId(2));
}
#[test]
fn test_pattern_persistence() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("patterns.db");
let ctx = AccessContext::default();
{
let store = PatternStore::new(&db_path, 30).unwrap();
for _ in 0..3 {
store.record(FileId(1), ctx.clone()).unwrap();
store.record(FileId(2), ctx.clone()).unwrap();
}
}
{
let store = PatternStore::new(&db_path, 30).unwrap();
let predictions = store.predict_next(FileId(1), 3);
assert!(!predictions.is_empty());
assert_eq!(predictions[0], FileId(2));
}
}
#[test]
fn test_recently_played() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("patterns.db");
let store = PatternStore::new(&db_path, 30).unwrap();
let ctx = AccessContext::default();
store.record(FileId(100), ctx.clone()).unwrap();
store.record(FileId(200), ctx.clone()).unwrap();
let recent = store.recently_played(7).unwrap();
assert!(recent.contains(&FileId(100)));
assert!(recent.contains(&FileId(200)));
}
#[test]
fn test_most_played() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("patterns.db");
let store = PatternStore::new(&db_path, 30).unwrap();
let ctx = AccessContext::default();
for _ in 0..5 {
store.record(FileId(1), ctx.clone()).unwrap();
}
for _ in 0..2 {
store.record(FileId(2), ctx.clone()).unwrap();
}
let most = store.most_played(10).unwrap();
assert_eq!(most[0], FileId(1));
}
}
@@ -0,0 +1,202 @@
use crate::patterns::{AccessContext, PatternStore};
use musicfs_cas::ContentFetcher;
use musicfs_core::{Event, EventBus, FileId};
use parking_lot::Mutex as ParkingMutex;
use std::collections::HashSet;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Semaphore;
use tokio::task::JoinHandle;
use tracing::{debug, info, warn};
const DEFAULT_PREFETCH_LOOKAHEAD: usize = 3;
const DEFAULT_MAX_CONCURRENT: usize = 2;
const DEFAULT_COOLDOWN_MS: u64 = 100;
#[derive(Debug, Clone)]
pub struct PrefetchConfig {
pub lookahead: usize,
pub max_concurrent: usize,
pub cooldown: Duration,
pub enabled: bool,
}
impl Default for PrefetchConfig {
fn default() -> Self {
Self {
lookahead: DEFAULT_PREFETCH_LOOKAHEAD,
max_concurrent: DEFAULT_MAX_CONCURRENT,
cooldown: Duration::from_millis(DEFAULT_COOLDOWN_MS),
enabled: true,
}
}
}
pub struct PrefetchEngine {
config: PrefetchConfig,
fetcher: Arc<ContentFetcher>,
in_flight: Arc<ParkingMutex<HashSet<FileId>>>,
semaphore: Arc<Semaphore>,
running: Arc<AtomicBool>,
}
pub struct PrefetchHandle {
handle: JoinHandle<()>,
running: Arc<AtomicBool>,
}
impl PrefetchHandle {
pub async fn stop(self) {
self.running.store(false, Ordering::SeqCst);
let _ = self.handle.await;
}
}
impl PrefetchEngine {
pub fn new(
config: PrefetchConfig,
_pattern_store: Arc<PatternStore>,
fetcher: Arc<ContentFetcher>,
) -> Self {
let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
Self {
config,
fetcher,
in_flight: Arc::new(ParkingMutex::new(HashSet::new())),
semaphore,
running: Arc::new(AtomicBool::new(false)),
}
}
pub fn start(
self: Arc<Self>,
event_bus: Arc<EventBus>,
pattern_store: Arc<PatternStore>,
) -> PrefetchHandle {
self.running.store(true, Ordering::SeqCst);
let running = self.running.clone();
let config = self.config.clone();
let fetcher = self.fetcher.clone();
let in_flight = self.in_flight.clone();
let semaphore = self.semaphore.clone();
let running_inner = running.clone();
let handle = tokio::spawn(async move {
let mut rx = event_bus.subscribe();
while running_inner.load(Ordering::SeqCst) {
match tokio::time::timeout(Duration::from_secs(1), rx.recv()).await {
Ok(Ok(event)) => {
if let Event::FileAccessed { file_id, .. } = event {
if config.enabled {
let ctx = AccessContext::default();
if let Err(e) = pattern_store.record(file_id, ctx) {
warn!("Failed to record access pattern: {}", e);
continue;
}
let predictions =
pattern_store.predict_next(file_id, config.lookahead);
for predicted_id in predictions {
prefetch_file(
predicted_id,
&fetcher,
&in_flight,
&semaphore,
)
.await;
}
tokio::time::sleep(config.cooldown).await;
}
}
}
Ok(Err(_)) => break,
Err(_) => continue,
}
}
info!("Prefetch engine stopped");
});
PrefetchHandle { handle, running }
}
pub fn is_running(&self) -> bool {
self.running.load(Ordering::SeqCst)
}
pub fn in_flight_count(&self) -> usize {
self.in_flight.lock().len()
}
pub fn update_config(&mut self, config: PrefetchConfig) {
self.config = config;
}
}
async fn prefetch_file(
file_id: FileId,
fetcher: &Arc<ContentFetcher>,
in_flight: &Arc<ParkingMutex<HashSet<FileId>>>,
semaphore: &Arc<Semaphore>,
) {
{
let mut guard = in_flight.lock();
if guard.contains(&file_id) {
debug!("Skipping prefetch for {:?} - already in flight", file_id);
return;
}
guard.insert(file_id);
}
let permit = match semaphore.clone().try_acquire_owned() {
Ok(p) => p,
Err(_) => {
debug!("Skipping prefetch for {:?} - concurrency limit", file_id);
in_flight.lock().remove(&file_id);
return;
}
};
let fetcher = fetcher.clone();
let in_flight = in_flight.clone();
tokio::spawn(async move {
debug!("Prefetching file {:?}", file_id);
match fetcher.ensure_cached(file_id).await {
Ok(manifest) => {
info!(
"Prefetched {:?}: {} chunks, {} bytes",
file_id,
manifest.chunks.len(),
manifest.total_size
);
}
Err(e) => {
debug!("Prefetch failed for {:?}: {}", file_id, e);
}
}
in_flight.lock().remove(&file_id);
drop(permit);
});
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_prefetch_config_defaults() {
let config = PrefetchConfig::default();
assert_eq!(config.lookahead, 3);
assert_eq!(config.max_concurrent, 2);
assert!(config.enabled);
}
}