feat: initial implementation of metadata aggregator

- gRPC service with MusicBrainz provider
- PostgreSQL schema with migrations
- Service layer with database-first caching
- Repository pattern for data access
- YAML configuration support
- Research documentation for 17 music metadata projects
This commit is contained in:
Alexander
2026-04-28 16:27:14 +02:00
commit a1f6701bac
163 changed files with 95884 additions and 0 deletions
+23
View File
@@ -0,0 +1,23 @@
services:
postgres:
image: postgres:16-alpine
container_name: metadata-postgres
environment:
POSTGRES_USER: metadata
POSTGRES_PASSWORD: metadata
POSTGRES_DB: metadata
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./migrations:/docker-entrypoint-initdb.d:ro
- ./postgresql.conf:/etc/postgresql/postgresql.conf:ro
command: postgres -c config_file=/etc/postgresql/postgresql.conf
healthcheck:
test: ["CMD-SHELL", "pg_isready -U metadata -d metadata"]
interval: 5s
timeout: 5s
retries: 5
volumes:
postgres_data:
@@ -0,0 +1 @@
DROP EXTENSION IF EXISTS pg_prewarm;
@@ -0,0 +1 @@
CREATE EXTENSION IF NOT EXISTS pg_prewarm;
@@ -0,0 +1,33 @@
DROP INDEX IF EXISTS idx_playlist_tracks_position;
DROP INDEX IF EXISTS idx_lyrics_track_id;
DROP INDEX IF EXISTS idx_genres_name;
DROP INDEX IF EXISTS idx_albums_release_date;
DROP INDEX IF EXISTS idx_albums_source;
DROP INDEX IF EXISTS idx_albums_upc;
DROP INDEX IF EXISTS idx_tracks_source;
DROP INDEX IF EXISTS idx_tracks_isrc;
DROP INDEX IF EXISTS idx_artists_source;
DROP INDEX IF EXISTS idx_artists_name;
DROP TABLE IF EXISTS track_external_ids;
DROP TABLE IF EXISTS album_external_ids;
DROP TABLE IF EXISTS artist_external_ids;
DROP TABLE IF EXISTS playlist_tracks;
DROP TABLE IF EXISTS playlists;
DROP TABLE IF EXISTS lyrics;
DROP TABLE IF EXISTS similar_artists;
DROP TABLE IF EXISTS album_genres;
DROP TABLE IF EXISTS artist_genres;
DROP TABLE IF EXISTS work_artists;
DROP TABLE IF EXISTS album_tracks;
DROP TABLE IF EXISTS album_artists;
DROP TABLE IF EXISTS track_artists;
DROP TABLE IF EXISTS genres;
DROP TABLE IF EXISTS albums;
DROP TABLE IF EXISTS labels;
DROP TABLE IF EXISTS tracks;
DROP TABLE IF EXISTS works;
DROP TABLE IF EXISTS artists;
@@ -0,0 +1,199 @@
-- Core Entities
CREATE TABLE artists (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
sort_name TEXT,
artist_type TEXT,
country TEXT,
formed_date DATE,
disbanded_date DATE,
description TEXT,
image_url TEXT,
source TEXT NOT NULL,
source_id TEXT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE works (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title TEXT NOT NULL,
work_type TEXT,
language TEXT,
source TEXT NOT NULL,
source_id TEXT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE tracks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
work_id UUID REFERENCES works(id),
title TEXT NOT NULL,
duration_ms INT,
isrc TEXT,
explicit BOOLEAN DEFAULT false,
source TEXT NOT NULL,
source_id TEXT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE labels (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
country TEXT,
founded_date DATE,
source TEXT NOT NULL,
source_id TEXT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE albums (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
label_id UUID REFERENCES labels(id),
title TEXT NOT NULL,
album_type TEXT,
release_date DATE,
upc TEXT,
total_tracks INT,
total_discs INT DEFAULT 1,
cover_url TEXT,
source TEXT NOT NULL,
source_id TEXT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE genres (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL UNIQUE,
parent_id UUID REFERENCES genres(id)
);
-- Relationships
CREATE TABLE track_artists (
track_id UUID REFERENCES tracks(id) ON DELETE CASCADE,
artist_id UUID REFERENCES artists(id) ON DELETE CASCADE,
role TEXT DEFAULT 'primary',
position INT DEFAULT 0,
PRIMARY KEY (track_id, artist_id, role)
);
CREATE TABLE album_artists (
album_id UUID REFERENCES albums(id) ON DELETE CASCADE,
artist_id UUID REFERENCES artists(id) ON DELETE CASCADE,
role TEXT DEFAULT 'primary',
position INT DEFAULT 0,
PRIMARY KEY (album_id, artist_id, role)
);
CREATE TABLE album_tracks (
album_id UUID REFERENCES albums(id) ON DELETE CASCADE,
track_id UUID REFERENCES tracks(id) ON DELETE CASCADE,
disc_number INT DEFAULT 1,
track_number INT NOT NULL,
PRIMARY KEY (album_id, track_id)
);
CREATE TABLE work_artists (
work_id UUID REFERENCES works(id) ON DELETE CASCADE,
artist_id UUID REFERENCES artists(id) ON DELETE CASCADE,
role TEXT DEFAULT 'writer',
PRIMARY KEY (work_id, artist_id, role)
);
CREATE TABLE artist_genres (
artist_id UUID REFERENCES artists(id) ON DELETE CASCADE,
genre_id UUID REFERENCES genres(id) ON DELETE CASCADE,
PRIMARY KEY (artist_id, genre_id)
);
CREATE TABLE album_genres (
album_id UUID REFERENCES albums(id) ON DELETE CASCADE,
genre_id UUID REFERENCES genres(id) ON DELETE CASCADE,
PRIMARY KEY (album_id, genre_id)
);
CREATE TABLE similar_artists (
artist_id UUID REFERENCES artists(id) ON DELETE CASCADE,
similar_artist_id UUID REFERENCES artists(id) ON DELETE CASCADE,
score REAL DEFAULT 0.5,
PRIMARY KEY (artist_id, similar_artist_id)
);
-- Content
CREATE TABLE lyrics (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
track_id UUID REFERENCES tracks(id) ON DELETE CASCADE,
content TEXT,
synced_content JSONB,
language TEXT,
source TEXT NOT NULL,
source_id TEXT,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE playlists (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
description TEXT,
is_public BOOLEAN DEFAULT true,
cover_url TEXT,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE playlist_tracks (
playlist_id UUID REFERENCES playlists(id) ON DELETE CASCADE,
track_id UUID REFERENCES tracks(id) ON DELETE CASCADE,
position INT NOT NULL,
added_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (playlist_id, track_id)
);
-- External IDs
CREATE TABLE artist_external_ids (
artist_id UUID REFERENCES artists(id) ON DELETE CASCADE,
source TEXT NOT NULL,
source_id TEXT NOT NULL,
url TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (artist_id, source, source_id)
);
CREATE TABLE album_external_ids (
album_id UUID REFERENCES albums(id) ON DELETE CASCADE,
source TEXT NOT NULL,
source_id TEXT NOT NULL,
url TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (album_id, source, source_id)
);
CREATE TABLE track_external_ids (
track_id UUID REFERENCES tracks(id) ON DELETE CASCADE,
source TEXT NOT NULL,
source_id TEXT NOT NULL,
url TEXT,
fetched_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (track_id, source, source_id)
);
-- Indexes
CREATE INDEX idx_artists_name ON artists(name);
CREATE INDEX idx_artists_source ON artists(source, source_id);
CREATE INDEX idx_tracks_isrc ON tracks(isrc) WHERE isrc IS NOT NULL;
CREATE INDEX idx_tracks_source ON tracks(source, source_id);
CREATE INDEX idx_albums_upc ON albums(upc) WHERE upc IS NOT NULL;
CREATE INDEX idx_albums_source ON albums(source, source_id);
CREATE INDEX idx_albums_release_date ON albums(release_date);
CREATE INDEX idx_genres_name ON genres(name);
CREATE INDEX idx_lyrics_track_id ON lyrics(track_id);
CREATE INDEX idx_playlist_tracks_position ON playlist_tracks(playlist_id, position);
+9
View File
@@ -0,0 +1,9 @@
shared_preload_libraries = 'pg_prewarm'
pg_prewarm.autoprewarm = true
pg_prewarm.autoprewarm_interval = 300
shared_buffers = 256MB
effective_cache_size = 768MB
work_mem = 16MB
maintenance_work_mem = 128MB