Add Week 9 Smart Features: collections, artwork, predictive prefetch

Smart Collections (musicfs-search/src/collections.rs):
- CollectionStore with thread-safe Mutex<Connection>
- CollectionQuery enum: Match, DateRange, RecentlyAdded/Played, MostPlayed, Genre, Compound
- Builtin collections for Recently Added, 80s/90s Music

Artwork Extraction & Caching:
- ArtworkExtractor using symphonia Visual (musicfs-metadata)
- ArtworkCache with CAS storage + on-demand resize (musicfs-cache)
- ArtType: Front/Back/Other, ArtSize: Thumbnail/Medium/Full

Predictive Prefetching:
- PatternStore tracks access patterns with sequence prediction
- PrefetchEngine listens to FileAccessed events, prefetches predictions
- PrefetchOps exposes /.prefetch/ virtual directory with status/hints

Oracle review fixes applied:
- CollectionStore uses Mutex for thread safety
- FileAccessed event now includes file_id for canonical correlation
- JSON parse warnings in collection deserialization

130 tests pass (15 new tests added)
This commit is contained in:
Alexander
2026-05-13 07:21:28 +02:00
parent 3cb6dfcaf8
commit 34d05b7a49
18 changed files with 1933 additions and 0 deletions
+4
View File
@@ -6,6 +6,7 @@ edition.workspace = true
[dependencies]
musicfs-core = { path = "../musicfs-core" }
musicfs-cas = { path = "../musicfs-cas" }
musicfs-metadata = { path = "../musicfs-metadata" }
rusqlite = { workspace = true, features = ["bundled"] }
sled.workspace = true
tokio.workspace = true
@@ -13,6 +14,9 @@ tracing.workspace = true
thiserror.workspace = true
serde.workspace = true
rmp-serde.workspace = true
image.workspace = true
parking_lot.workspace = true
chrono.workspace = true
[dev-dependencies]
tempfile.workspace = true
+196
View File
@@ -0,0 +1,196 @@
use image::ImageFormat;
use musicfs_cas::CasStore;
use musicfs_core::ChunkHash;
use musicfs_metadata::artwork::{ArtSize, ArtType, Artwork};
use std::io::Cursor;
use std::path::Path;
use std::sync::Arc;
use tracing::debug;
const MAX_ARTWORK_INPUT_SIZE: usize = 10 * 1024 * 1024;
pub struct ArtworkCache {
store: Arc<CasStore>,
db_path: std::path::PathBuf,
}
#[derive(Debug)]
pub struct CachedArtwork {
pub file_id: i64,
pub art_type: String,
pub chunk_hash: ChunkHash,
pub width: u32,
pub height: u32,
}
impl ArtworkCache {
pub fn new(store: Arc<CasStore>, db_path: &Path) -> Result<Self, ArtworkError> {
let db = rusqlite::Connection::open(db_path)?;
db.execute(
"CREATE TABLE IF NOT EXISTS artwork (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL,
art_type TEXT NOT NULL,
chunk_hash TEXT NOT NULL,
width INTEGER NOT NULL,
height INTEGER NOT NULL,
UNIQUE(file_id, art_type)
)",
[],
)?;
Ok(Self {
store,
db_path: db_path.to_path_buf(),
})
}
pub async fn store(&self, file_id: i64, artwork: &Artwork) -> Result<ChunkHash, ArtworkError> {
if artwork.data.len() > MAX_ARTWORK_INPUT_SIZE {
return Err(ArtworkError::ImageTooLarge(artwork.data.len()));
}
let hash = self.store.put(&artwork.data).await?;
let art_type_str = match artwork.art_type {
ArtType::Front => "front",
ArtType::Back => "back",
ArtType::Other => "other",
};
let db_path = self.db_path.clone();
let art_type_clone = art_type_str.to_string();
let hash_hex = hash.to_hex();
let width = artwork.width;
let height = artwork.height;
tokio::task::spawn_blocking(move || {
let db = rusqlite::Connection::open(&db_path)?;
db.execute(
"INSERT OR REPLACE INTO artwork
(file_id, art_type, chunk_hash, width, height)
VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![file_id, art_type_clone, hash_hex, width, height],
)?;
Ok::<_, ArtworkError>(())
})
.await
.map_err(|e| ArtworkError::SpawnBlocking(e.to_string()))??;
debug!("Cached artwork for file {}", file_id);
Ok(hash)
}
pub async fn get(
&self,
file_id: i64,
art_type: &str,
size: ArtSize,
) -> Result<Option<Vec<u8>>, ArtworkError> {
let db_path = self.db_path.clone();
let art_type_clone = art_type.to_string();
let hash_hex: Option<String> = tokio::task::spawn_blocking(move || {
let db = rusqlite::Connection::open(&db_path)?;
db.query_row(
"SELECT chunk_hash FROM artwork WHERE file_id = ?1 AND art_type = ?2",
rusqlite::params![file_id, art_type_clone],
|row| row.get(0),
)
.ok()
.ok_or(ArtworkError::NotFound)
})
.await
.map_err(|e| ArtworkError::SpawnBlocking(e.to_string()))?
.ok();
match hash_hex {
Some(hex) => {
let hash = ChunkHash::from_hex(&hex).ok_or(ArtworkError::InvalidHash)?;
let data = self.store.get(&hash).await?;
match size {
ArtSize::Full => Ok(Some(data.to_vec())),
ArtSize::Thumbnail | ArtSize::Medium => {
let resized = self.resize_on_demand(&data, size)?;
Ok(Some(resized))
}
}
}
None => Ok(None),
}
}
pub async fn has(&self, file_id: i64, art_type: &str) -> Result<bool, ArtworkError> {
let db_path = self.db_path.clone();
let art_type_clone = art_type.to_string();
tokio::task::spawn_blocking(move || {
let db = rusqlite::Connection::open(&db_path)?;
let count: i64 = db.query_row(
"SELECT COUNT(*) FROM artwork WHERE file_id = ?1 AND art_type = ?2",
rusqlite::params![file_id, art_type_clone],
|row| row.get(0),
)?;
Ok(count > 0)
})
.await
.map_err(|e| ArtworkError::SpawnBlocking(e.to_string()))?
}
fn resize_on_demand(&self, data: &[u8], size: ArtSize) -> Result<Vec<u8>, ArtworkError> {
let max_dim = size.max_dimension().unwrap_or(300);
let img = image::load_from_memory(data).map_err(|_| ArtworkError::InvalidImage)?;
if img.width() <= max_dim && img.height() <= max_dim {
return Ok(data.to_vec());
}
let resized = img.thumbnail(max_dim, max_dim);
let mut output = Vec::new();
let mut cursor = Cursor::new(&mut output);
resized
.write_to(&mut cursor, ImageFormat::Jpeg)
.map_err(|_| ArtworkError::ResizeFailed)?;
Ok(output)
}
}
#[derive(Debug, thiserror::Error)]
pub enum ArtworkError {
#[error("database error: {0}")]
Database(#[from] rusqlite::Error),
#[error("CAS error: {0}")]
Cas(#[from] musicfs_cas::CasError),
#[error("invalid hash")]
InvalidHash,
#[error("artwork not found")]
NotFound,
#[error("image too large: {0} bytes (max 10MB)")]
ImageTooLarge(usize),
#[error("invalid image data")]
InvalidImage,
#[error("resize failed")]
ResizeFailed,
#[error("spawn_blocking error: {0}")]
SpawnBlocking(String),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_max_artwork_size() {
assert_eq!(MAX_ARTWORK_INPUT_SIZE, 10 * 1024 * 1024);
}
}
+6
View File
@@ -1,11 +1,17 @@
mod artwork;
mod db;
mod eviction;
mod metadata;
mod patterns;
mod prefetch;
mod tree;
pub use artwork::{ArtworkCache, ArtworkError, CachedArtwork};
pub use db::Database;
pub use eviction::{EvictionError, EvictionPolicy, LruEviction};
pub use metadata::MetadataCache;
pub use patterns::{AccessContext, AccessPattern, PatternError, PatternStore};
pub use prefetch::{PrefetchConfig, PrefetchEngine, PrefetchHandle};
pub use tree::{
DirNode, FileNode, Inode, RefreshPolicy, TreeBuilder, VirtualNode, VirtualTree, ROOT_INODE,
};
@@ -0,0 +1,282 @@
use musicfs_core::FileId;
use parking_lot::{Mutex, RwLock};
use std::collections::HashMap;
use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Debug, Clone)]
pub struct AccessPattern {
pub file_id: FileId,
pub timestamp: SystemTime,
pub context: AccessContext,
pub hour_of_day: u8,
}
#[derive(Debug, Clone, Default)]
pub struct AccessContext {
pub album_id: Option<i64>,
pub track_number: Option<u32>,
pub artist: Option<String>,
}
pub struct PatternStore {
db: Mutex<rusqlite::Connection>,
sequence_counts: RwLock<HashMap<(FileId, FileId), u32>>,
time_patterns: RwLock<HashMap<u8, Vec<FileId>>>,
max_history: usize,
}
impl PatternStore {
pub fn new(db_path: &Path, max_history: usize) -> Result<Self, PatternError> {
let db = rusqlite::Connection::open(db_path)?;
db.execute(
"CREATE TABLE IF NOT EXISTS access_log (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL,
access_time INTEGER NOT NULL,
hour_of_day INTEGER NOT NULL
)",
[],
)?;
db.execute(
"CREATE INDEX IF NOT EXISTS idx_access_log_file ON access_log(file_id)",
[],
)?;
db.execute(
"CREATE INDEX IF NOT EXISTS idx_access_log_time ON access_log(access_time)",
[],
)?;
db.execute(
"CREATE TABLE IF NOT EXISTS sequence_counts (
from_file_id INTEGER NOT NULL,
to_file_id INTEGER NOT NULL,
count INTEGER NOT NULL DEFAULT 1,
PRIMARY KEY (from_file_id, to_file_id)
)",
[],
)?;
let sequence_counts = {
let mut map = HashMap::new();
let mut stmt = db.prepare("SELECT from_file_id, to_file_id, count FROM sequence_counts")?;
let rows = stmt.query_map([], |row| {
Ok((
(
FileId(row.get::<_, i64>(0)?),
FileId(row.get::<_, i64>(1)?),
),
row.get::<_, u32>(2)?,
))
})?;
for row in rows {
let (key, count) = row?;
map.insert(key, count);
}
map
};
Ok(Self {
db: Mutex::new(db),
sequence_counts: RwLock::new(sequence_counts),
time_patterns: RwLock::new(HashMap::new()),
max_history,
})
}
pub fn record(&self, file_id: FileId, _context: AccessContext) -> Result<(), PatternError> {
let now = SystemTime::now();
let timestamp = now.duration_since(UNIX_EPOCH).unwrap().as_secs() as i64;
let hour = (timestamp / 3600 % 24) as u8;
let db = self.db.lock();
db.execute(
"INSERT INTO access_log (file_id, access_time, hour_of_day) VALUES (?1, ?2, ?3)",
rusqlite::params![file_id.0, timestamp, hour],
)?;
{
let mut time_patterns = self.time_patterns.write();
time_patterns.entry(hour).or_default().push(file_id);
}
let prev_file_id: Option<i64> = db
.query_row(
"SELECT file_id FROM access_log WHERE id = (SELECT MAX(id) - 1 FROM access_log)",
[],
|row| row.get(0),
)
.ok();
if let Some(prev_id) = prev_file_id {
let prev = FileId(prev_id);
{
let mut sequences = self.sequence_counts.write();
*sequences.entry((prev, file_id)).or_insert(0) += 1;
}
db.execute(
"INSERT INTO sequence_counts (from_file_id, to_file_id, count)
VALUES (?1, ?2, 1)
ON CONFLICT(from_file_id, to_file_id) DO UPDATE SET count = count + 1",
rusqlite::params![prev_id, file_id.0],
)?;
}
let cutoff = timestamp - (self.max_history as i64 * 86400);
db.execute("DELETE FROM access_log WHERE access_time < ?1", [cutoff])?;
Ok(())
}
pub fn predict_next(&self, current: FileId, limit: usize) -> Vec<FileId> {
let sequences = self.sequence_counts.read();
let mut predictions: Vec<_> = sequences
.iter()
.filter(|((from, _), count)| *from == current && **count >= 2)
.map(|((_, to), count)| (*to, *count))
.collect();
predictions.sort_by(|a, b| b.1.cmp(&a.1));
predictions
.into_iter()
.take(limit)
.map(|(id, _)| id)
.collect()
}
pub fn predict_for_time(&self, hour: u8, limit: usize) -> Vec<FileId> {
let time_patterns = self.time_patterns.read();
time_patterns
.get(&hour)
.map(|files| files.iter().rev().take(limit).copied().collect())
.unwrap_or_default()
}
pub fn recently_played(&self, days: u32) -> Result<Vec<FileId>, PatternError> {
let cutoff = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs() as i64
- (days as i64 * 86400);
let db = self.db.lock();
let mut stmt = db.prepare(
"SELECT DISTINCT file_id FROM access_log WHERE access_time >= ?1 ORDER BY access_time DESC",
)?;
let files: Vec<FileId> = stmt
.query_map([cutoff], |row| Ok(FileId(row.get(0)?)))?
.filter_map(|r| r.ok())
.collect();
Ok(files)
}
pub fn most_played(&self, limit: u32) -> Result<Vec<FileId>, PatternError> {
let db = self.db.lock();
let mut stmt = db.prepare(
"SELECT file_id, COUNT(*) as play_count FROM access_log
GROUP BY file_id ORDER BY play_count DESC LIMIT ?1",
)?;
let files: Vec<FileId> = stmt
.query_map([limit], |row| Ok(FileId(row.get(0)?)))?
.filter_map(|r| r.ok())
.collect();
Ok(files)
}
}
#[derive(Debug, thiserror::Error)]
pub enum PatternError {
#[error("database error: {0}")]
Database(#[from] rusqlite::Error),
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_pattern_prediction() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("patterns.db");
let store = PatternStore::new(&db_path, 30).unwrap();
let ctx = AccessContext::default();
for _ in 0..5 {
store.record(FileId(1), ctx.clone()).unwrap();
store.record(FileId(2), ctx.clone()).unwrap();
store.record(FileId(3), ctx.clone()).unwrap();
}
let predictions = store.predict_next(FileId(1), 3);
assert!(!predictions.is_empty());
assert_eq!(predictions[0], FileId(2));
}
#[test]
fn test_pattern_persistence() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("patterns.db");
let ctx = AccessContext::default();
{
let store = PatternStore::new(&db_path, 30).unwrap();
for _ in 0..3 {
store.record(FileId(1), ctx.clone()).unwrap();
store.record(FileId(2), ctx.clone()).unwrap();
}
}
{
let store = PatternStore::new(&db_path, 30).unwrap();
let predictions = store.predict_next(FileId(1), 3);
assert!(!predictions.is_empty());
assert_eq!(predictions[0], FileId(2));
}
}
#[test]
fn test_recently_played() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("patterns.db");
let store = PatternStore::new(&db_path, 30).unwrap();
let ctx = AccessContext::default();
store.record(FileId(100), ctx.clone()).unwrap();
store.record(FileId(200), ctx.clone()).unwrap();
let recent = store.recently_played(7).unwrap();
assert!(recent.contains(&FileId(100)));
assert!(recent.contains(&FileId(200)));
}
#[test]
fn test_most_played() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("patterns.db");
let store = PatternStore::new(&db_path, 30).unwrap();
let ctx = AccessContext::default();
for _ in 0..5 {
store.record(FileId(1), ctx.clone()).unwrap();
}
for _ in 0..2 {
store.record(FileId(2), ctx.clone()).unwrap();
}
let most = store.most_played(10).unwrap();
assert_eq!(most[0], FileId(1));
}
}
@@ -0,0 +1,202 @@
use crate::patterns::{AccessContext, PatternStore};
use musicfs_cas::ContentFetcher;
use musicfs_core::{Event, EventBus, FileId};
use parking_lot::Mutex as ParkingMutex;
use std::collections::HashSet;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Semaphore;
use tokio::task::JoinHandle;
use tracing::{debug, info, warn};
const DEFAULT_PREFETCH_LOOKAHEAD: usize = 3;
const DEFAULT_MAX_CONCURRENT: usize = 2;
const DEFAULT_COOLDOWN_MS: u64 = 100;
#[derive(Debug, Clone)]
pub struct PrefetchConfig {
pub lookahead: usize,
pub max_concurrent: usize,
pub cooldown: Duration,
pub enabled: bool,
}
impl Default for PrefetchConfig {
fn default() -> Self {
Self {
lookahead: DEFAULT_PREFETCH_LOOKAHEAD,
max_concurrent: DEFAULT_MAX_CONCURRENT,
cooldown: Duration::from_millis(DEFAULT_COOLDOWN_MS),
enabled: true,
}
}
}
pub struct PrefetchEngine {
config: PrefetchConfig,
fetcher: Arc<ContentFetcher>,
in_flight: Arc<ParkingMutex<HashSet<FileId>>>,
semaphore: Arc<Semaphore>,
running: Arc<AtomicBool>,
}
pub struct PrefetchHandle {
handle: JoinHandle<()>,
running: Arc<AtomicBool>,
}
impl PrefetchHandle {
pub async fn stop(self) {
self.running.store(false, Ordering::SeqCst);
let _ = self.handle.await;
}
}
impl PrefetchEngine {
pub fn new(
config: PrefetchConfig,
_pattern_store: Arc<PatternStore>,
fetcher: Arc<ContentFetcher>,
) -> Self {
let semaphore = Arc::new(Semaphore::new(config.max_concurrent));
Self {
config,
fetcher,
in_flight: Arc::new(ParkingMutex::new(HashSet::new())),
semaphore,
running: Arc::new(AtomicBool::new(false)),
}
}
pub fn start(
self: Arc<Self>,
event_bus: Arc<EventBus>,
pattern_store: Arc<PatternStore>,
) -> PrefetchHandle {
self.running.store(true, Ordering::SeqCst);
let running = self.running.clone();
let config = self.config.clone();
let fetcher = self.fetcher.clone();
let in_flight = self.in_flight.clone();
let semaphore = self.semaphore.clone();
let running_inner = running.clone();
let handle = tokio::spawn(async move {
let mut rx = event_bus.subscribe();
while running_inner.load(Ordering::SeqCst) {
match tokio::time::timeout(Duration::from_secs(1), rx.recv()).await {
Ok(Ok(event)) => {
if let Event::FileAccessed { file_id, .. } = event {
if config.enabled {
let ctx = AccessContext::default();
if let Err(e) = pattern_store.record(file_id, ctx) {
warn!("Failed to record access pattern: {}", e);
continue;
}
let predictions =
pattern_store.predict_next(file_id, config.lookahead);
for predicted_id in predictions {
prefetch_file(
predicted_id,
&fetcher,
&in_flight,
&semaphore,
)
.await;
}
tokio::time::sleep(config.cooldown).await;
}
}
}
Ok(Err(_)) => break,
Err(_) => continue,
}
}
info!("Prefetch engine stopped");
});
PrefetchHandle { handle, running }
}
pub fn is_running(&self) -> bool {
self.running.load(Ordering::SeqCst)
}
pub fn in_flight_count(&self) -> usize {
self.in_flight.lock().len()
}
pub fn update_config(&mut self, config: PrefetchConfig) {
self.config = config;
}
}
async fn prefetch_file(
file_id: FileId,
fetcher: &Arc<ContentFetcher>,
in_flight: &Arc<ParkingMutex<HashSet<FileId>>>,
semaphore: &Arc<Semaphore>,
) {
{
let mut guard = in_flight.lock();
if guard.contains(&file_id) {
debug!("Skipping prefetch for {:?} - already in flight", file_id);
return;
}
guard.insert(file_id);
}
let permit = match semaphore.clone().try_acquire_owned() {
Ok(p) => p,
Err(_) => {
debug!("Skipping prefetch for {:?} - concurrency limit", file_id);
in_flight.lock().remove(&file_id);
return;
}
};
let fetcher = fetcher.clone();
let in_flight = in_flight.clone();
tokio::spawn(async move {
debug!("Prefetching file {:?}", file_id);
match fetcher.ensure_cached(file_id).await {
Ok(manifest) => {
info!(
"Prefetched {:?}: {} chunks, {} bytes",
file_id,
manifest.chunks.len(),
manifest.total_size
);
}
Err(e) => {
debug!("Prefetch failed for {:?}: {}", file_id, e);
}
}
in_flight.lock().remove(&file_id);
drop(permit);
});
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_prefetch_config_defaults() {
let config = PrefetchConfig::default();
assert_eq!(config.lookahead, 3);
assert_eq!(config.max_concurrent, 2);
assert!(config.enabled);
}
}
@@ -129,6 +129,7 @@ impl ContentFetcher {
pub fn emit_access_event(&self, meta: &FileMeta, offset: u64, size: u32) {
if let Some(bus) = &self.event_bus {
bus.publish(Event::FileAccessed {
file_id: meta.id,
path: meta.virtual_path.clone(),
origin_id: meta.real_path.origin_id.clone(),
offset,
@@ -40,6 +40,7 @@ pub enum Event {
path: VirtualPath,
},
FileAccessed {
file_id: FileId,
path: VirtualPath,
origin_id: OriginId,
offset: u64,
@@ -1,3 +1,5 @@
mod prefetch;
mod search;
pub use prefetch::PrefetchOps;
pub use search::SearchOps;
@@ -0,0 +1,293 @@
use fuser::{FileAttr, FileType, ReplyAttr, ReplyData, ReplyDirectory, ReplyEntry};
use musicfs_cache::{PatternStore, PrefetchConfig, PrefetchEngine};
use musicfs_cas::ContentFetcher;
use musicfs_core::{EventBus, FileId};
use std::sync::Arc;
use std::time::{Duration, SystemTime};
const PREFETCH_DIR_INODE: u64 = 0xFFFF_FFFF_0000_0002;
const PREFETCH_STATUS_INODE: u64 = 0xFFFF_FFFF_0000_0003;
const PREFETCH_HINTS_BASE: u64 = 0xFFFF_FFFF_2000_0000;
pub struct PrefetchOps {
pattern_store: Arc<PatternStore>,
engine: Option<Arc<PrefetchEngine>>,
uid: u32,
gid: u32,
}
impl PrefetchOps {
pub fn new(pattern_store: Arc<PatternStore>, uid: u32, gid: u32) -> Self {
Self {
pattern_store,
engine: None,
uid,
gid,
}
}
pub fn with_engine(
pattern_store: Arc<PatternStore>,
fetcher: Arc<ContentFetcher>,
config: PrefetchConfig,
uid: u32,
gid: u32,
) -> Self {
let engine = Arc::new(PrefetchEngine::new(config, pattern_store.clone(), fetcher));
Self {
pattern_store,
engine: Some(engine),
uid,
gid,
}
}
pub fn start_engine(
&self,
event_bus: Arc<EventBus>,
) -> Option<musicfs_cache::PrefetchHandle> {
self.engine
.as_ref()
.map(|e| e.clone().start(event_bus, self.pattern_store.clone()))
}
pub fn is_prefetch_dir_name(name: &str) -> bool {
name == ".prefetch"
}
pub fn is_prefetch_inode(inode: u64) -> bool {
inode == PREFETCH_DIR_INODE
|| inode == PREFETCH_STATUS_INODE
|| inode >= PREFETCH_HINTS_BASE
}
pub fn prefetch_dir_inode() -> u64 {
PREFETCH_DIR_INODE
}
pub fn lookup_prefetch_dir(&self, reply: ReplyEntry) {
let attr = self.dir_attr(PREFETCH_DIR_INODE);
reply.entry(&Duration::from_secs(60), &attr, 0);
}
pub fn lookup_status(&self, reply: ReplyEntry) {
let status = self.generate_status();
let attr = self.file_attr(PREFETCH_STATUS_INODE, status.len() as u64);
reply.entry(&Duration::from_secs(1), &attr, 0);
}
pub fn lookup_hint(&self, name: &str, reply: ReplyEntry) {
if let Some(inode) = self.hint_name_to_inode(name) {
let attr = self.file_attr(inode, 256);
reply.entry(&Duration::from_secs(1), &attr, 0);
} else {
reply.error(libc::ENOENT);
}
}
pub fn getattr_prefetch_dir(&self, reply: ReplyAttr) {
let attr = self.dir_attr(PREFETCH_DIR_INODE);
reply.attr(&Duration::from_secs(60), &attr);
}
pub fn getattr_status(&self, reply: ReplyAttr) {
let status = self.generate_status();
let attr = self.file_attr(PREFETCH_STATUS_INODE, status.len() as u64);
reply.attr(&Duration::from_secs(1), &attr);
}
pub fn getattr_hint(&self, inode: u64, reply: ReplyAttr) {
let attr = self.file_attr(inode, 256);
reply.attr(&Duration::from_secs(1), &attr);
}
pub fn readdir_prefetch_root(&self, offset: i64, mut reply: ReplyDirectory) {
let entries: Vec<(u64, FileType, &str)> = vec![
(PREFETCH_DIR_INODE, FileType::Directory, "."),
(1, FileType::Directory, ".."),
(PREFETCH_STATUS_INODE, FileType::RegularFile, "status"),
];
let recently_played = self.pattern_store.recently_played(7).unwrap_or_default();
let predictions: Vec<(u64, FileType, String)> = recently_played
.iter()
.take(10)
.enumerate()
.map(|(i, file_id)| {
let inode = PREFETCH_HINTS_BASE + i as u64;
let name = format!("hint_{:04}", file_id.0);
(inode, FileType::RegularFile, name)
})
.collect();
for (i, (inode, kind, name)) in entries.iter().enumerate().skip(offset as usize) {
if reply.add(*inode, (i + 1) as i64, *kind, *name) {
reply.ok();
return;
}
}
let base_offset = entries.len();
for (i, (inode, kind, name)) in predictions.iter().enumerate() {
let entry_offset = base_offset + i;
if entry_offset < offset as usize {
continue;
}
if reply.add(*inode, (entry_offset + 1) as i64, *kind, name) {
break;
}
}
reply.ok();
}
pub fn read_status(&self, offset: i64, size: u32, reply: ReplyData) {
let status = self.generate_status();
let start = offset as usize;
let end = std::cmp::min(start + size as usize, status.len());
if start >= status.len() {
reply.data(&[]);
} else {
reply.data(&status.as_bytes()[start..end]);
}
}
pub fn read_hint(&self, inode: u64, offset: i64, size: u32, reply: ReplyData) {
let file_id = self.inode_to_file_id(inode);
let predictions = self.pattern_store.predict_next(file_id, 5);
let content = predictions
.iter()
.map(|id| format!("{}", id.0))
.collect::<Vec<_>>()
.join("\n");
let start = offset as usize;
let end = std::cmp::min(start + size as usize, content.len());
if start >= content.len() {
reply.data(&[]);
} else {
reply.data(&content.as_bytes()[start..end]);
}
}
fn generate_status(&self) -> String {
let engine_status = if let Some(engine) = &self.engine {
format!(
"running: {}\nin_flight: {}",
engine.is_running(),
engine.in_flight_count()
)
} else {
"engine: disabled".to_string()
};
let most_played = self
.pattern_store
.most_played(5)
.unwrap_or_default()
.iter()
.map(|id| format!("{}", id.0))
.collect::<Vec<_>>()
.join(", ");
format!(
"MusicFS Prefetch Status\n\
=======================\n\
{}\n\
most_played: [{}]\n",
engine_status, most_played
)
}
fn hint_name_to_inode(&self, name: &str) -> Option<u64> {
if name.starts_with("hint_") {
let id_str = name.strip_prefix("hint_")?;
let id: i64 = id_str.parse().ok()?;
Some(PREFETCH_HINTS_BASE + id as u64)
} else {
None
}
}
fn inode_to_file_id(&self, inode: u64) -> FileId {
FileId((inode - PREFETCH_HINTS_BASE) as i64)
}
fn dir_attr(&self, inode: u64) -> FileAttr {
FileAttr {
ino: inode,
size: 0,
blocks: 0,
atime: SystemTime::UNIX_EPOCH,
mtime: SystemTime::UNIX_EPOCH,
ctime: SystemTime::UNIX_EPOCH,
crtime: SystemTime::UNIX_EPOCH,
kind: FileType::Directory,
perm: 0o555,
nlink: 2,
uid: self.uid,
gid: self.gid,
rdev: 0,
blksize: 512,
flags: 0,
}
}
fn file_attr(&self, inode: u64, size: u64) -> FileAttr {
FileAttr {
ino: inode,
size,
blocks: (size + 511) / 512,
atime: SystemTime::UNIX_EPOCH,
mtime: SystemTime::UNIX_EPOCH,
ctime: SystemTime::UNIX_EPOCH,
crtime: SystemTime::UNIX_EPOCH,
kind: FileType::RegularFile,
perm: 0o444,
nlink: 1,
uid: self.uid,
gid: self.gid,
rdev: 0,
blksize: 512,
flags: 0,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_prefetch_ops_new() {
let dir = TempDir::new().unwrap();
let pattern_store = Arc::new(PatternStore::new(&dir.path().join("patterns.db"), 30).unwrap());
let _ops = PrefetchOps::new(pattern_store, 1000, 1000);
}
#[test]
fn test_is_prefetch_inode() {
assert!(PrefetchOps::is_prefetch_inode(PREFETCH_DIR_INODE));
assert!(PrefetchOps::is_prefetch_inode(PREFETCH_STATUS_INODE));
assert!(PrefetchOps::is_prefetch_inode(PREFETCH_HINTS_BASE));
assert!(PrefetchOps::is_prefetch_inode(PREFETCH_HINTS_BASE + 100));
assert!(!PrefetchOps::is_prefetch_inode(1));
assert!(!PrefetchOps::is_prefetch_inode(1000));
}
#[test]
fn test_hint_name_to_inode() {
let dir = TempDir::new().unwrap();
let pattern_store = Arc::new(PatternStore::new(&dir.path().join("patterns.db"), 30).unwrap());
let ops = PrefetchOps::new(pattern_store, 1000, 1000);
assert_eq!(ops.hint_name_to_inode("hint_0001"), Some(PREFETCH_HINTS_BASE + 1));
assert_eq!(ops.hint_name_to_inode("hint_9999"), Some(PREFETCH_HINTS_BASE + 9999));
assert_eq!(ops.hint_name_to_inode("invalid"), None);
}
}
@@ -8,3 +8,4 @@ musicfs-core = { path = "../musicfs-core" }
symphonia.workspace = true
thiserror.workspace = true
tracing.workspace = true
image.workspace = true
@@ -0,0 +1,116 @@
use image::ImageFormat;
use std::io::Cursor;
use symphonia::core::meta::Visual;
use tracing::debug;
#[derive(Debug, Clone)]
pub struct Artwork {
pub art_type: ArtType,
pub mime_type: String,
pub width: u32,
pub height: u32,
pub data: Vec<u8>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ArtType {
Front,
Back,
Other,
}
#[derive(Debug, Clone, Copy)]
pub enum ArtSize {
Thumbnail,
Medium,
Full,
}
impl ArtSize {
pub fn max_dimension(&self) -> Option<u32> {
match self {
ArtSize::Thumbnail => Some(150),
ArtSize::Medium => Some(300),
ArtSize::Full => None,
}
}
}
pub struct ArtworkExtractor;
impl ArtworkExtractor {
pub fn extract_from_visual(visual: &Visual) -> Option<Artwork> {
let data = visual.data.to_vec();
let img = image::load_from_memory(&data).ok()?;
let art_type = match visual.usage {
Some(symphonia::core::meta::StandardVisualKey::FrontCover) => ArtType::Front,
Some(symphonia::core::meta::StandardVisualKey::BackCover) => ArtType::Back,
_ => ArtType::Other,
};
let mime_type = if visual.media_type.is_empty() {
"image/jpeg".to_string()
} else {
visual.media_type.clone()
};
Some(Artwork {
art_type,
mime_type,
width: img.width(),
height: img.height(),
data,
})
}
pub fn resize(artwork: &Artwork, size: ArtSize) -> Option<Artwork> {
let max_dim = size.max_dimension()?;
if artwork.width <= max_dim && artwork.height <= max_dim {
return Some(artwork.clone());
}
let img = image::load_from_memory(&artwork.data).ok()?;
let resized = img.thumbnail(max_dim, max_dim);
let mut output = Vec::new();
let mut cursor = Cursor::new(&mut output);
resized.write_to(&mut cursor, ImageFormat::Jpeg).ok()?;
debug!(
"Resized artwork from {}x{} to {}x{}",
artwork.width,
artwork.height,
resized.width(),
resized.height()
);
Some(Artwork {
art_type: artwork.art_type,
mime_type: "image/jpeg".to_string(),
width: resized.width(),
height: resized.height(),
data: output,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_art_size_dimensions() {
assert_eq!(ArtSize::Thumbnail.max_dimension(), Some(150));
assert_eq!(ArtSize::Medium.max_dimension(), Some(300));
assert_eq!(ArtSize::Full.max_dimension(), None);
}
#[test]
fn test_art_type_equality() {
assert_eq!(ArtType::Front, ArtType::Front);
assert_ne!(ArtType::Front, ArtType::Back);
}
}
@@ -1,3 +1,5 @@
pub mod artwork;
mod parser;
pub use artwork::{ArtSize, ArtType, Artwork, ArtworkExtractor};
pub use parser::MetadataParser;
+3
View File
@@ -12,6 +12,9 @@ parking_lot.workspace = true
tokio = { workspace = true, features = ["sync", "time"] }
tracing.workspace = true
thiserror.workspace = true
rusqlite.workspace = true
serde.workspace = true
serde_json.workspace = true
[dev-dependencies]
tempfile.workspace = true
@@ -0,0 +1,307 @@
use parking_lot::Mutex;
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::time::{Duration, SystemTime};
use tracing::warn;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SmartCollection {
pub id: i64,
pub name: String,
pub query: CollectionQuery,
pub created_at: SystemTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum CollectionQuery {
Match {
field: String,
pattern: String,
},
DateRange {
field: String,
start: i32,
end: i32,
},
RecentlyAdded {
days: u32,
},
RecentlyPlayed {
days: u32,
},
MostPlayed {
limit: u32,
},
Genre {
genre: String,
},
Compound {
op: BoolOp,
children: Vec<CollectionQuery>,
},
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum BoolOp {
And,
Or,
}
impl CollectionQuery {
pub fn to_tantivy_query(&self) -> String {
match self {
CollectionQuery::Match { field, pattern } => {
format!("{}:{}", field, pattern)
}
CollectionQuery::DateRange { field, start, end } => {
format!("{}:[{} TO {}]", field, start, end)
}
CollectionQuery::Genre { genre } => {
format!("genre:{}", genre)
}
CollectionQuery::Compound { op, children } => {
let sep = match op {
BoolOp::And => " AND ",
BoolOp::Or => " OR ",
};
let parts: Vec<_> = children
.iter()
.map(|c| format!("({})", c.to_tantivy_query()))
.collect();
parts.join(sep)
}
_ => String::new(),
}
}
pub fn is_dynamic(&self) -> bool {
matches!(
self,
CollectionQuery::RecentlyAdded { .. }
| CollectionQuery::RecentlyPlayed { .. }
| CollectionQuery::MostPlayed { .. }
)
}
}
pub struct CollectionStore {
db: Mutex<rusqlite::Connection>,
}
impl CollectionStore {
pub fn new(db_path: &Path) -> Result<Self, CollectionError> {
let db = rusqlite::Connection::open(db_path)?;
db.execute(
"CREATE TABLE IF NOT EXISTS collections (
id INTEGER PRIMARY KEY,
name TEXT UNIQUE NOT NULL,
query_json TEXT NOT NULL,
created_at INTEGER NOT NULL
)",
[],
)?;
Ok(Self { db: Mutex::new(db) })
}
pub fn create(
&self,
name: &str,
query: CollectionQuery,
) -> Result<SmartCollection, CollectionError> {
let query_json = serde_json::to_string(&query)?;
let now = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
let db = self.db.lock();
db.execute(
"INSERT INTO collections (name, query_json, created_at) VALUES (?1, ?2, ?3)",
rusqlite::params![name, query_json, now],
)?;
let id = db.last_insert_rowid();
Ok(SmartCollection {
id,
name: name.to_string(),
query,
created_at: SystemTime::UNIX_EPOCH + Duration::from_secs(now as u64),
})
}
pub fn list(&self) -> Result<Vec<SmartCollection>, CollectionError> {
let db = self.db.lock();
let mut stmt = db.prepare("SELECT id, name, query_json, created_at FROM collections")?;
let collections = stmt.query_map([], |row| {
let query_json: String = row.get(2)?;
let created_secs: i64 = row.get(3)?;
let query = match serde_json::from_str(&query_json) {
Ok(q) => q,
Err(e) => {
warn!("Failed to parse collection query JSON: {}", e);
CollectionQuery::Match {
field: "title".to_string(),
pattern: "*".to_string(),
}
}
};
Ok(SmartCollection {
id: row.get(0)?,
name: row.get(1)?,
query,
created_at: SystemTime::UNIX_EPOCH + Duration::from_secs(created_secs as u64),
})
})?;
collections
.collect::<Result<Vec<_>, _>>()
.map_err(CollectionError::from)
}
pub fn get(&self, name: &str) -> Result<Option<SmartCollection>, CollectionError> {
let db = self.db.lock();
let mut stmt =
db.prepare("SELECT id, name, query_json, created_at FROM collections WHERE name = ?1")?;
let result = stmt
.query_row([name], |row| {
let query_json: String = row.get(2)?;
let created_secs: i64 = row.get(3)?;
let query = match serde_json::from_str(&query_json) {
Ok(q) => q,
Err(e) => {
warn!("Failed to parse collection query JSON: {}", e);
CollectionQuery::Match {
field: "title".to_string(),
pattern: "*".to_string(),
}
}
};
Ok(SmartCollection {
id: row.get(0)?,
name: row.get(1)?,
query,
created_at: SystemTime::UNIX_EPOCH + Duration::from_secs(created_secs as u64),
})
})
.ok();
Ok(result)
}
pub fn delete(&self, name: &str) -> Result<(), CollectionError> {
let db = self.db.lock();
db.execute("DELETE FROM collections WHERE name = ?1", [name])?;
Ok(())
}
}
pub fn builtin_collections() -> Vec<SmartCollection> {
vec![
SmartCollection {
id: -1,
name: "Recently Added".to_string(),
query: CollectionQuery::RecentlyAdded { days: 30 },
created_at: SystemTime::UNIX_EPOCH,
},
SmartCollection {
id: -2,
name: "80s Music".to_string(),
query: CollectionQuery::DateRange {
field: "year".to_string(),
start: 1980,
end: 1989,
},
created_at: SystemTime::UNIX_EPOCH,
},
SmartCollection {
id: -3,
name: "90s Music".to_string(),
query: CollectionQuery::DateRange {
field: "year".to_string(),
start: 1990,
end: 1999,
},
created_at: SystemTime::UNIX_EPOCH,
},
]
}
#[derive(Debug, thiserror::Error)]
pub enum CollectionError {
#[error("database error: {0}")]
Database(#[from] rusqlite::Error),
#[error("serialization error: {0}")]
Serialization(#[from] serde_json::Error),
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_collection_crud() {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join("collections.db");
let store = CollectionStore::new(&db_path).unwrap();
let collection = store
.create("Jazz", CollectionQuery::Genre { genre: "Jazz".to_string() })
.unwrap();
assert_eq!(collection.name, "Jazz");
let collections = store.list().unwrap();
assert_eq!(collections.len(), 1);
store.delete("Jazz").unwrap();
let collections = store.list().unwrap();
assert_eq!(collections.len(), 0);
}
#[test]
fn test_compound_query() {
let query = CollectionQuery::Compound {
op: BoolOp::And,
children: vec![
CollectionQuery::Genre { genre: "Metal".to_string() },
CollectionQuery::DateRange {
field: "year".to_string(),
start: 1980,
end: 1989,
},
],
};
let tantivy_query = query.to_tantivy_query();
assert!(tantivy_query.contains("genre:Metal"));
assert!(tantivy_query.contains("year:[1980 TO 1989]"));
assert!(tantivy_query.contains(" AND "));
}
#[test]
fn test_builtin_collections() {
let builtins = builtin_collections();
assert_eq!(builtins.len(), 3);
assert!(builtins.iter().any(|c| c.name == "Recently Added"));
}
#[test]
fn test_dynamic_query_detection() {
assert!(CollectionQuery::RecentlyAdded { days: 30 }.is_dynamic());
assert!(CollectionQuery::RecentlyPlayed { days: 7 }.is_dynamic());
assert!(CollectionQuery::MostPlayed { limit: 100 }.is_dynamic());
assert!(!CollectionQuery::Genre { genre: "Rock".to_string() }.is_dynamic());
}
}
+5
View File
@@ -1,7 +1,12 @@
mod collections;
mod index;
mod indexer;
mod query;
pub use collections::{
builtin_collections, BoolOp, CollectionError, CollectionQuery, CollectionStore,
SmartCollection,
};
pub use index::{SearchError, SearchHit, SearchIndex};
pub use indexer::{Indexer, IndexerHandle, MetadataLookup};
pub use query::SearchQueryBuilder;