feat: add metadata enrichment integration with music-agregator

- Add SyncedFile message and subdir scoping to RescanOrigin proto
- Add label, album_type, cover_url fields to UpdateMetadataRequest/MetadataResponse
- Implement OriginScanner: walk, hash, diff, ingest with live FUSE tree and content fetcher registration
- Add enrichment DB columns: enrichment_source, enriched_at, enrichment_attempts, genres_json, label, album_type, cover_url
- Add EnrichmentUpdate struct and update_enrichment DB method
- Wire BatchUpdateMetadata to write enrichment fields alongside audio metadata
- Wire gRPC server into CLI mount command with --grpc-port flag
- Pass VirtualTree and ContentFetcher to scanner so rescanned files are immediately visible and readable via FUSE
This commit is contained in:
Alexander
2026-05-17 23:32:18 +02:00
parent 18024dbc62
commit b88583707d
12 changed files with 595 additions and 42 deletions
+1
View File
@@ -7,6 +7,7 @@ pub mod proto {
}
mod metadata;
pub mod scanner;
mod search_service;
mod server;
mod webhook;
+55 -15
View File
@@ -5,7 +5,7 @@ use crate::proto::musicfs::v1::{
ClearOverlayRequest, ClearOverlayResponse, GetMetadataRequest, ImportMetadataRequest,
ImportProgress, MetadataResponse, UpdateMetadataRequest, UpdateMetadataResponse,
};
use musicfs_cache::Database;
use musicfs_cache::{Database, EnrichmentUpdate};
use musicfs_core::{AudioMeta, FileId, VirtualPath};
use std::sync::Arc;
use tokio::sync::mpsc;
@@ -63,6 +63,9 @@ impl MetadataServiceImpl {
channels: meta.channels,
bits_per_sample: meta.bits_per_sample,
encoder: meta.encoder.clone(),
label: None,
album_type: None,
cover_url: None,
custom_tags: Default::default(),
}
}
@@ -160,24 +163,40 @@ impl MetadataService for MetadataServiceImpl {
let audio_meta = Self::request_to_audio_meta(&req);
match self.db.update_metadata(file_id, &audio_meta) {
Ok(()) => {
debug!(file_id = req.file_id, "Metadata updated successfully");
Ok(Response::new(UpdateMetadataResponse {
file_id: req.file_id,
success: true,
error_message: None,
}))
}
Err(e) => {
warn!(file_id = req.file_id, error = %e, "Failed to update metadata");
Ok(Response::new(UpdateMetadataResponse {
if let Err(e) = self.db.update_metadata(file_id, &audio_meta) {
warn!(file_id = req.file_id, error = %e, "Failed to update metadata");
return Ok(Response::new(UpdateMetadataResponse {
file_id: req.file_id,
success: false,
error_message: Some(e.to_string()),
}));
}
if req.label.is_some() || req.album_type.is_some() || req.cover_url.is_some() {
let enrichment = EnrichmentUpdate {
label: req.label.clone(),
album_type: req.album_type.clone(),
cover_url: req.cover_url.clone(),
genres_json: None,
primary_genre: None,
source: "orchestrator".to_string(),
};
if let Err(e) = self.db.update_enrichment(file_id, &enrichment) {
warn!(file_id = req.file_id, error = %e, "Failed to update enrichment");
return Ok(Response::new(UpdateMetadataResponse {
file_id: req.file_id,
success: false,
error_message: Some(e.to_string()),
}))
}));
}
}
debug!(file_id = req.file_id, "Metadata updated successfully");
Ok(Response::new(UpdateMetadataResponse {
file_id: req.file_id,
success: true,
error_message: None,
}))
}
#[instrument(level = "info", skip(self, request), fields(method = "clear_overlay"))]
@@ -239,7 +258,28 @@ impl MetadataService for MetadataServiceImpl {
let error_message = if let Some(ref metadata_req) = item.metadata {
let audio_meta = MetadataServiceImpl::request_to_audio_meta(metadata_req);
match db.update_metadata(file_id, &audio_meta) {
Ok(()) => None,
Ok(()) => {
if metadata_req.label.is_some()
|| metadata_req.album_type.is_some()
|| metadata_req.cover_url.is_some()
{
let enrichment = EnrichmentUpdate {
label: metadata_req.label.clone(),
album_type: metadata_req.album_type.clone(),
cover_url: metadata_req.cover_url.clone(),
genres_json: None,
primary_genre: None,
source: "orchestrator".to_string(),
};
if let Err(e) = db.update_enrichment(file_id, &enrichment) {
Some(e.to_string())
} else {
None
}
} else {
None
}
}
Err(e) => Some(e.to_string()),
}
} else {
+261
View File
@@ -0,0 +1,261 @@
use musicfs_cache::{Database, VirtualTree};
use musicfs_cas::ContentFetcher;
use musicfs_core::{AudioMeta, Error, Event, EventBus, FileId, FileMeta, OriginId, RealPath, Result, VirtualPath};
use musicfs_metadata::MetadataParser;
use parking_lot::RwLock;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::UNIX_EPOCH;
use tokio::sync::mpsc;
use tracing::{info, warn};
pub struct ScanResult {
pub new_files: Vec<SyncedFileInfo>,
pub changed: u32,
pub deleted: u32,
pub unchanged: u32,
pub bytes_synced: u64,
}
pub struct SyncedFileInfo {
pub path: String,
pub file_id: FileId,
pub virtual_path: String,
}
#[derive(Debug, Clone)]
pub struct ScanProgress {
pub phase: String,
pub current: u32,
pub total: u32,
pub current_path: String,
pub bytes_synced: u64,
}
pub struct OriginScanner {
db: Arc<Database>,
event_bus: Arc<EventBus>,
tree: Arc<RwLock<VirtualTree>>,
fetcher: Arc<ContentFetcher>,
parser: MetadataParser,
}
impl OriginScanner {
pub fn new(
db: Arc<Database>,
event_bus: Arc<EventBus>,
tree: Arc<RwLock<VirtualTree>>,
fetcher: Arc<ContentFetcher>,
) -> Self {
Self {
db,
event_bus,
tree,
fetcher,
parser: MetadataParser,
}
}
pub async fn scan(
&self,
origin_id: &OriginId,
origin_root: &Path,
subdir: Option<&str>,
progress_tx: mpsc::Sender<ScanProgress>,
) -> Result<ScanResult> {
let scan_root = match subdir {
Some(sub) if !sub.is_empty() => origin_root.join(sub),
_ => origin_root.to_path_buf(),
};
if !scan_root.exists() {
return Err(Error::Origin(format!(
"scan path does not exist: {}",
scan_root.display()
)));
}
// Phase 1: Scanning
let audio_files = self.collect_audio_files(&scan_root, &progress_tx)?;
let total_files = audio_files.len() as u32;
info!(files = total_files, "scan phase complete");
// Phase 2: Hashing + categorization
let mut new_files = Vec::new();
let mut unchanged = 0u32;
for (i, abs_path) in audio_files.iter().enumerate() {
let _ = progress_tx.try_send(ScanProgress {
phase: "hashing".to_string(),
current: i as u32 + 1,
total: total_files,
current_path: abs_path.display().to_string(),
bytes_synced: 0,
});
let rel_path = abs_path.strip_prefix(origin_root).unwrap_or(abs_path);
let existing = self.db.get_file_by_real_path(origin_id, rel_path)?;
if existing.is_some() {
unchanged += 1;
continue;
}
let size = std::fs::metadata(abs_path)
.map(|m| m.len())
.unwrap_or(0);
new_files.push(DiscoveredFile {
abs_path: abs_path.clone(),
rel_path: rel_path.to_path_buf(),
size,
});
}
info!(
new = new_files.len(),
unchanged = unchanged,
"hash phase complete"
);
// Phase 3: Indexing
let mut synced = Vec::new();
let mut bytes_synced = 0u64;
let ingest_total = new_files.len() as u32;
for (i, file) in new_files.iter().enumerate() {
let _ = progress_tx.try_send(ScanProgress {
phase: "indexing".to_string(),
current: i as u32 + 1,
total: ingest_total,
current_path: file.abs_path.display().to_string(),
bytes_synced,
});
let audio_meta = match self.parser.parse_file(&file.abs_path) {
Ok(meta) => meta,
Err(e) => {
warn!(path = %file.abs_path.display(), error = %e, "parse failed, using defaults");
AudioMeta::default()
}
};
let virtual_path = derive_virtual_path(&audio_meta, &file.rel_path);
let file_id = self.db.upsert_file(
origin_id,
&file.rel_path,
&virtual_path,
&audio_meta,
UNIX_EPOCH,
file.size,
)?;
let file_meta = FileMeta {
id: file_id,
virtual_path: virtual_path.clone(),
real_path: RealPath {
origin_id: origin_id.clone(),
path: file.rel_path.clone(),
},
size: file.size,
mtime: UNIX_EPOCH,
content_hash: None,
audio: Some(audio_meta),
};
{
let mut tree = self.tree.write();
tree.insert_file(&file_meta);
}
self.fetcher.register_file(file_meta.clone());
self.event_bus.publish(Event::FileAdded {
path: virtual_path.clone(),
origin_id: origin_id.clone(),
});
bytes_synced += file.size;
synced.push(SyncedFileInfo {
path: file.abs_path.display().to_string(),
file_id,
virtual_path: virtual_path.as_str().to_string(),
});
}
Ok(ScanResult {
new_files: synced,
changed: 0,
deleted: 0,
unchanged,
bytes_synced,
})
}
fn collect_audio_files(
&self,
scan_root: &Path,
progress_tx: &mpsc::Sender<ScanProgress>,
) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
self.walk_dir(scan_root, &mut files, progress_tx)?;
Ok(files)
}
fn walk_dir(
&self,
dir: &Path,
files: &mut Vec<PathBuf>,
progress_tx: &mpsc::Sender<ScanProgress>,
) -> Result<()> {
let entries = std::fs::read_dir(dir)
.map_err(|e| Error::Origin(format!("read_dir {}: {}", dir.display(), e)))?;
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
self.walk_dir(&path, files, progress_tx)?;
} else if is_audio_file(&path) {
files.push(path.clone());
let _ = progress_tx.try_send(ScanProgress {
phase: "scanning".to_string(),
current: files.len() as u32,
total: 0,
current_path: path.display().to_string(),
bytes_synced: 0,
});
}
}
Ok(())
}
}
fn derive_virtual_path(meta: &AudioMeta, rel_path: &Path) -> VirtualPath {
let artist = meta.artist.as_deref().unwrap_or("Unknown Artist");
let album = meta.album.as_deref().unwrap_or("Unknown Album");
let filename = rel_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
VirtualPath::new(format!("/{}/{}/{}", artist, album, filename))
}
fn is_audio_file(path: &Path) -> bool {
matches!(
path.extension()
.and_then(|e| e.to_str())
.map(|e| e.to_lowercase())
.as_deref(),
Some("flac" | "mp3" | "ogg" | "wav" | "m4a" | "aac" | "opus")
)
}
struct DiscoveredFile {
abs_path: PathBuf,
rel_path: PathBuf,
size: u64,
}
+112 -20
View File
@@ -2,11 +2,11 @@ use crate::proto::musicfs::v1::{
music_fs_server::MusicFs, CacheStats, ClearCacheRequest, ClearCacheResponse, Empty, Event,
EventFilter, HealthStatus, MountState, OriginHealthResponse, OriginRequest, OriginsResponse,
PrefetchProgress, PrefetchRequest, SearchRequest, SearchResponse, SearchResult,
ShutdownRequest, StatusResponse, SyncProgress, TierStats,
ShutdownRequest, StatusResponse, SyncProgress, SyncedFile, TierStats,
};
use musicfs_core::{Event as CoreEvent, EventBus};
use std::sync::Arc;
use std::time::{Duration, Instant};
use std::time::Instant;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tonic::{Request, Response, Status};
@@ -16,14 +16,30 @@ pub struct MusicFsServer {
start_time: Instant,
event_bus: Arc<EventBus>,
version: String,
scanner: Arc<crate::scanner::OriginScanner>,
origin_root: std::path::PathBuf,
}
impl MusicFsServer {
pub fn new(event_bus: Arc<EventBus>) -> Self {
pub fn new(
event_bus: Arc<EventBus>,
db: Arc<musicfs_cache::Database>,
tree: Arc<parking_lot::RwLock<musicfs_cache::VirtualTree>>,
fetcher: Arc<musicfs_cas::ContentFetcher>,
origin_root: std::path::PathBuf,
) -> Self {
let scanner = Arc::new(crate::scanner::OriginScanner::new(
db,
event_bus.clone(),
tree,
fetcher,
));
Self {
start_time: Instant::now(),
event_bus,
version: env!("CARGO_PKG_VERSION").to_string(),
scanner,
origin_root,
}
}
@@ -368,24 +384,85 @@ impl MusicFs for MusicFsServer {
request: Request<OriginRequest>,
) -> Result<Response<Self::RescanOriginStream>, Status> {
let req = request.into_inner();
info!(origin_id = %req.origin_id, "gRPC rescan_origin started");
let subdir = req.subdir.as_deref().filter(|s| !s.is_empty());
info!(
origin_id = %req.origin_id,
subdir = ?subdir,
"gRPC rescan_origin started"
);
let (tx, rx) = mpsc::channel(32);
let (progress_tx, mut progress_rx) = mpsc::channel::<crate::scanner::ScanProgress>(64);
let origin_id = musicfs_core::OriginId::from(req.origin_id.as_str());
let scanner = self.scanner.clone();
let origin_root = self.origin_root.clone();
let subdir_owned = subdir.map(|s| s.to_string());
tokio::spawn(async move {
let phases = ["scanning", "indexing", "complete"];
for (i, phase) in phases.iter().enumerate() {
let progress = SyncProgress {
phase: phase.to_string(),
current: i as u32 + 1,
total: phases.len() as u32,
current_path: String::new(),
bytes_synced: 0,
};
if tx.send(Ok(progress)).await.is_err() {
break;
let forward_handle = {
let tx = tx.clone();
tokio::spawn(async move {
while let Some(progress) = progress_rx.recv().await {
let proto = SyncProgress {
phase: progress.phase,
current: progress.current,
total: progress.total,
current_path: progress.current_path,
bytes_synced: progress.bytes_synced,
new_files: vec![],
};
if tx.send(Ok(proto)).await.is_err() {
break;
}
}
})
};
let result = scanner
.scan(
&origin_id,
&origin_root,
subdir_owned.as_deref(),
progress_tx,
)
.await;
forward_handle.abort();
match result {
Ok(scan_result) => {
let synced_files: Vec<SyncedFile> = scan_result
.new_files
.iter()
.map(|f| SyncedFile {
path: f.path.clone(),
file_id: f.file_id.0,
virtual_path: f.virtual_path.clone(),
})
.collect();
let _ = tx
.send(Ok(SyncProgress {
phase: "complete".to_string(),
current: scan_result.new_files.len() as u32
+ scan_result.changed
+ scan_result.deleted,
total: scan_result.new_files.len() as u32
+ scan_result.changed
+ scan_result.deleted
+ scan_result.unchanged,
current_path: String::new(),
bytes_synced: scan_result.bytes_synced,
new_files: synced_files,
}))
.await;
}
Err(e) => {
let _ = tx
.send(Err(Status::internal(format!("rescan failed: {}", e))))
.await;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
});
@@ -438,10 +515,26 @@ impl MusicFs for MusicFsServer {
mod tests {
use super::*;
async fn make_test_server() -> (MusicFsServer, tempfile::TempDir) {
let event_bus = Arc::new(EventBus::new(16));
let db = Arc::new(musicfs_cache::Database::open_memory().unwrap());
let tree = Arc::new(parking_lot::RwLock::new(
musicfs_cache::TreeBuilder::new().build(),
));
let dir = tempfile::tempdir().unwrap();
let cfg = musicfs_cas::CasConfig {
chunks_dir: dir.path().join("chunks"),
..Default::default()
};
let store = Arc::new(musicfs_cas::CasStore::open(cfg).await.unwrap());
let fetcher = Arc::new(musicfs_cas::ContentFetcher::new(store));
let origin_root = std::path::PathBuf::from("/tmp/test-origin");
(MusicFsServer::new(event_bus, db, tree, fetcher, origin_root), dir)
}
#[tokio::test]
async fn test_get_status() {
let event_bus = Arc::new(EventBus::new(16));
let server = MusicFsServer::new(event_bus);
let (server, _dir) = make_test_server().await;
let response = server.get_status(Request::new(Empty {})).await.unwrap();
let status = response.into_inner();
@@ -452,8 +545,7 @@ mod tests {
#[tokio::test]
async fn test_get_cache_stats() {
let event_bus = Arc::new(EventBus::new(16));
let server = MusicFsServer::new(event_bus);
let (server, _dir) = make_test_server().await;
let response = server
.get_cache_stats(Request::new(Empty {}))