feat: initial implementation of metadata aggregator

- gRPC service with MusicBrainz provider
- PostgreSQL schema with migrations
- Service layer with database-first caching
- Repository pattern for data access
- YAML configuration support
- Research documentation for 17 music metadata projects
This commit is contained in:
Alexander
2026-04-28 16:27:14 +02:00
commit a1f6701bac
163 changed files with 95884 additions and 0 deletions
+5
View File
@@ -0,0 +1,5 @@
package repository
import "errors"
var ErrNotFound = errors.New("not found")
+238
View File
@@ -0,0 +1,238 @@
package postgres
import (
"context"
"errors"
"time"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/metadata-agregator/internal/domain"
"github.com/metadata-agregator/internal/repository"
)
type AlbumRepository struct {
pool *pgxpool.Pool
}
func NewAlbumRepository(pool *pgxpool.Pool) *AlbumRepository {
return &AlbumRepository{pool: pool}
}
func (r *AlbumRepository) GetByID(ctx context.Context, id string) (*domain.Album, error) {
query := `
SELECT id, title, album_type, release_date, upc, total_tracks, total_discs,
cover_url, source, source_id
FROM albums
WHERE id = $1`
album, err := r.scanAlbum(ctx, query, id)
if err != nil {
return nil, err
}
if err := r.loadRelations(ctx, album); err != nil {
return nil, err
}
return album, nil
}
func (r *AlbumRepository) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Album, error) {
query := `
SELECT a.id, a.title, a.album_type, a.release_date, a.upc, a.total_tracks,
a.total_discs, a.cover_url, a.source, a.source_id
FROM albums a
JOIN album_external_ids e ON a.id = e.album_id
WHERE e.source = $1 AND e.source_id = $2`
album, err := r.scanAlbum(ctx, query, source, sourceID)
if err != nil {
return nil, err
}
if err := r.loadRelations(ctx, album); err != nil {
return nil, err
}
return album, nil
}
func (r *AlbumRepository) GetByArtistID(ctx context.Context, artistID string, limit, offset int) (*domain.SearchResult[domain.Album], error) {
countQuery := `
SELECT COUNT(DISTINCT a.id)
FROM albums a
JOIN album_artists aa ON a.id = aa.album_id
JOIN artist_external_ids ae ON aa.artist_id = ae.artist_id
WHERE ae.source_id = $1`
searchQuery := `
SELECT DISTINCT a.id, a.title, a.album_type, a.release_date, a.upc,
a.total_tracks, a.total_discs, a.cover_url, a.source, a.source_id
FROM albums a
JOIN album_artists aa ON a.id = aa.album_id
JOIN artist_external_ids ae ON aa.artist_id = ae.artist_id
WHERE ae.source_id = $1
ORDER BY a.release_date DESC NULLS LAST
LIMIT $2 OFFSET $3`
var total int
if err := r.pool.QueryRow(ctx, countQuery, artistID).Scan(&total); err != nil {
return nil, err
}
rows, err := r.pool.Query(ctx, searchQuery, artistID, limit, offset)
if err != nil {
return nil, err
}
defer rows.Close()
var albums []domain.Album
for rows.Next() {
album, err := r.scanAlbumFromRow(rows)
if err != nil {
return nil, err
}
albums = append(albums, *album)
}
return &domain.SearchResult[domain.Album]{
Items: albums,
Total: total,
Limit: limit,
Offset: offset,
}, nil
}
func (r *AlbumRepository) Save(ctx context.Context, album *domain.Album) error {
tx, err := r.pool.Begin(ctx)
if err != nil {
return err
}
defer tx.Rollback(ctx)
var source, sourceID string
if len(album.ExternalIDs) > 0 {
source = album.ExternalIDs[0].Source
sourceID = album.ExternalIDs[0].SourceID
}
query := `
INSERT INTO albums (id, title, album_type, release_date, upc, total_tracks,
total_discs, cover_url, source, source_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (id) DO UPDATE SET
title = EXCLUDED.title,
album_type = EXCLUDED.album_type,
release_date = EXCLUDED.release_date,
upc = EXCLUDED.upc,
total_tracks = EXCLUDED.total_tracks,
total_discs = EXCLUDED.total_discs,
cover_url = EXCLUDED.cover_url,
updated_at = now()`
_, err = tx.Exec(ctx, query,
album.ID, album.Title, nullString(album.Type), album.ReleaseDate,
nullString(album.UPC), album.TotalTracks, album.TotalDiscs,
nullString(album.CoverURL), source, sourceID)
if err != nil {
return err
}
for _, ext := range album.ExternalIDs {
extQuery := `
INSERT INTO album_external_ids (album_id, source, source_id, url)
VALUES ($1, $2, $3, $4)
ON CONFLICT (album_id, source, source_id) DO UPDATE SET
url = EXCLUDED.url,
fetched_at = now()`
_, err = tx.Exec(ctx, extQuery, album.ID, ext.Source, ext.SourceID, nullString(ext.URL))
if err != nil {
return err
}
}
for _, ac := range album.Artists {
artistQuery := `
INSERT INTO album_artists (album_id, artist_id, role, position)
VALUES ($1, $2, $3, $4)
ON CONFLICT (album_id, artist_id, role) DO NOTHING`
_, err = tx.Exec(ctx, artistQuery, album.ID, ac.Artist.ID, ac.Role, ac.Position)
if err != nil {
return err
}
}
return tx.Commit(ctx)
}
func (r *AlbumRepository) scanAlbum(ctx context.Context, query string, args ...any) (*domain.Album, error) {
row := r.pool.QueryRow(ctx, query, args...)
return r.scanAlbumRow(row)
}
func (r *AlbumRepository) scanAlbumFromRow(row pgx.Row) (*domain.Album, error) {
return r.scanAlbumRow(row)
}
func (r *AlbumRepository) scanAlbumRow(row pgx.Row) (*domain.Album, error) {
var (
album domain.Album
albumType *string
releaseDate *time.Time
upc *string
totalTracks *int
totalDiscs *int
coverURL *string
source string
sourceID *string
)
err := row.Scan(
&album.ID, &album.Title, &albumType, &releaseDate, &upc,
&totalTracks, &totalDiscs, &coverURL, &source, &sourceID,
)
if errors.Is(err, pgx.ErrNoRows) {
return nil, repository.ErrNotFound
}
if err != nil {
return nil, err
}
album.Type = derefString(albumType)
album.ReleaseDate = releaseDate
album.UPC = derefString(upc)
if totalTracks != nil {
album.TotalTracks = *totalTracks
}
if totalDiscs != nil {
album.TotalDiscs = *totalDiscs
}
album.CoverURL = derefString(coverURL)
return &album, nil
}
func (r *AlbumRepository) loadRelations(ctx context.Context, album *domain.Album) error {
extQuery := `SELECT source, source_id, url FROM album_external_ids WHERE album_id = $1`
rows, err := r.pool.Query(ctx, extQuery, album.ID)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var ext domain.ExternalID
var url *string
if err := rows.Scan(&ext.Source, &ext.SourceID, &url); err != nil {
return err
}
ext.URL = derefString(url)
album.ExternalIDs = append(album.ExternalIDs, ext)
}
return rows.Err()
}
+260
View File
@@ -0,0 +1,260 @@
package postgres
import (
"context"
"errors"
"time"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/metadata-agregator/internal/domain"
"github.com/metadata-agregator/internal/repository"
)
type ArtistRepository struct {
pool *pgxpool.Pool
}
func NewArtistRepository(pool *pgxpool.Pool) *ArtistRepository {
return &ArtistRepository{pool: pool}
}
func (r *ArtistRepository) GetByID(ctx context.Context, id string) (*domain.Artist, error) {
query := `
SELECT id, name, sort_name, artist_type, country, formed_date, disbanded_date,
description, image_url, source, source_id
FROM artists
WHERE id = $1`
artist, err := r.scanArtist(ctx, query, id)
if err != nil {
return nil, err
}
if err := r.loadExternalIDs(ctx, artist); err != nil {
return nil, err
}
return artist, nil
}
func (r *ArtistRepository) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Artist, error) {
query := `
SELECT a.id, a.name, a.sort_name, a.artist_type, a.country, a.formed_date,
a.disbanded_date, a.description, a.image_url, a.source, a.source_id
FROM artists a
JOIN artist_external_ids e ON a.id = e.artist_id
WHERE e.source = $1 AND e.source_id = $2`
artist, err := r.scanArtist(ctx, query, source, sourceID)
if err != nil {
return nil, err
}
if err := r.loadExternalIDs(ctx, artist); err != nil {
return nil, err
}
return artist, nil
}
func (r *ArtistRepository) Search(ctx context.Context, query string, limit, offset int) (*domain.SearchResult[domain.Artist], error) {
countQuery := `SELECT COUNT(*) FROM artists WHERE name ILIKE $1`
searchQuery := `
SELECT id, name, sort_name, artist_type, country, formed_date, disbanded_date,
description, image_url, source, source_id
FROM artists
WHERE name ILIKE $1
ORDER BY name
LIMIT $2 OFFSET $3`
pattern := "%" + query + "%"
var total int
if err := r.pool.QueryRow(ctx, countQuery, pattern).Scan(&total); err != nil {
return nil, err
}
rows, err := r.pool.Query(ctx, searchQuery, pattern, limit, offset)
if err != nil {
return nil, err
}
defer rows.Close()
var artists []domain.Artist
for rows.Next() {
artist, err := r.scanArtistFromRow(rows)
if err != nil {
return nil, err
}
artists = append(artists, *artist)
}
return &domain.SearchResult[domain.Artist]{
Items: artists,
Total: total,
Limit: limit,
Offset: offset,
}, nil
}
func (r *ArtistRepository) Save(ctx context.Context, artist *domain.Artist) error {
tx, err := r.pool.Begin(ctx)
if err != nil {
return err
}
defer tx.Rollback(ctx)
var source, sourceID string
if len(artist.ExternalIDs) > 0 {
source = artist.ExternalIDs[0].Source
sourceID = artist.ExternalIDs[0].SourceID
}
query := `
INSERT INTO artists (id, name, sort_name, artist_type, country, formed_date,
disbanded_date, description, image_url, source, source_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
sort_name = EXCLUDED.sort_name,
artist_type = EXCLUDED.artist_type,
country = EXCLUDED.country,
formed_date = EXCLUDED.formed_date,
disbanded_date = EXCLUDED.disbanded_date,
description = EXCLUDED.description,
image_url = EXCLUDED.image_url,
updated_at = now()`
_, err = tx.Exec(ctx, query,
artist.ID, artist.Name, nullString(artist.SortName), nullString(artist.Type),
nullString(artist.Country), artist.FormedDate, artist.DisbandedDate,
nullString(artist.Description), nullString(artist.ImageURL), source, sourceID)
if err != nil {
return err
}
for _, ext := range artist.ExternalIDs {
extQuery := `
INSERT INTO artist_external_ids (artist_id, source, source_id, url)
VALUES ($1, $2, $3, $4)
ON CONFLICT (artist_id, source, source_id) DO UPDATE SET
url = EXCLUDED.url,
fetched_at = now()`
_, err = tx.Exec(ctx, extQuery, artist.ID, ext.Source, ext.SourceID, nullString(ext.URL))
if err != nil {
return err
}
}
return tx.Commit(ctx)
}
func (r *ArtistRepository) scanArtist(ctx context.Context, query string, args ...any) (*domain.Artist, error) {
row := r.pool.QueryRow(ctx, query, args...)
var (
artist domain.Artist
sortName *string
artistType *string
country *string
formedDate *time.Time
disbandDate *time.Time
description *string
imageURL *string
source string
sourceID *string
)
err := row.Scan(
&artist.ID, &artist.Name, &sortName, &artistType, &country,
&formedDate, &disbandDate, &description, &imageURL, &source, &sourceID,
)
if errors.Is(err, pgx.ErrNoRows) {
return nil, repository.ErrNotFound
}
if err != nil {
return nil, err
}
artist.SortName = derefString(sortName)
artist.Type = derefString(artistType)
artist.Country = derefString(country)
artist.FormedDate = formedDate
artist.DisbandedDate = disbandDate
artist.Description = derefString(description)
artist.ImageURL = derefString(imageURL)
return &artist, nil
}
func (r *ArtistRepository) scanArtistFromRow(row pgx.Row) (*domain.Artist, error) {
var (
artist domain.Artist
sortName *string
artistType *string
country *string
formedDate *time.Time
disbandDate *time.Time
description *string
imageURL *string
source string
sourceID *string
)
err := row.Scan(
&artist.ID, &artist.Name, &sortName, &artistType, &country,
&formedDate, &disbandDate, &description, &imageURL, &source, &sourceID,
)
if err != nil {
return nil, err
}
artist.SortName = derefString(sortName)
artist.Type = derefString(artistType)
artist.Country = derefString(country)
artist.FormedDate = formedDate
artist.DisbandedDate = disbandDate
artist.Description = derefString(description)
artist.ImageURL = derefString(imageURL)
return &artist, nil
}
func (r *ArtistRepository) loadExternalIDs(ctx context.Context, artist *domain.Artist) error {
query := `SELECT source, source_id, url FROM artist_external_ids WHERE artist_id = $1`
rows, err := r.pool.Query(ctx, query, artist.ID)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var ext domain.ExternalID
var url *string
if err := rows.Scan(&ext.Source, &ext.SourceID, &url); err != nil {
return err
}
ext.URL = derefString(url)
artist.ExternalIDs = append(artist.ExternalIDs, ext)
}
return rows.Err()
}
func nullString(s string) *string {
if s == "" {
return nil
}
return &s
}
func derefString(s *string) string {
if s == nil {
return ""
}
return *s
}
+226
View File
@@ -0,0 +1,226 @@
package postgres
import (
"context"
"errors"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/metadata-agregator/internal/domain"
"github.com/metadata-agregator/internal/repository"
)
type TrackRepository struct {
pool *pgxpool.Pool
}
func NewTrackRepository(pool *pgxpool.Pool) *TrackRepository {
return &TrackRepository{pool: pool}
}
func (r *TrackRepository) GetByID(ctx context.Context, id string) (*domain.Track, error) {
query := `
SELECT id, title, duration_ms, isrc, explicit, source, source_id
FROM tracks
WHERE id = $1`
track, err := r.scanTrack(ctx, query, id)
if err != nil {
return nil, err
}
if err := r.loadExternalIDs(ctx, track); err != nil {
return nil, err
}
return track, nil
}
func (r *TrackRepository) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Track, error) {
query := `
SELECT t.id, t.title, t.duration_ms, t.isrc, t.explicit, t.source, t.source_id
FROM tracks t
JOIN track_external_ids e ON t.id = e.track_id
WHERE e.source = $1 AND e.source_id = $2`
track, err := r.scanTrack(ctx, query, source, sourceID)
if err != nil {
return nil, err
}
if err := r.loadExternalIDs(ctx, track); err != nil {
return nil, err
}
return track, nil
}
func (r *TrackRepository) GetByISRC(ctx context.Context, isrc string) (*domain.Track, error) {
query := `
SELECT id, title, duration_ms, isrc, explicit, source, source_id
FROM tracks
WHERE isrc = $1`
track, err := r.scanTrack(ctx, query, isrc)
if err != nil {
return nil, err
}
if err := r.loadExternalIDs(ctx, track); err != nil {
return nil, err
}
return track, nil
}
func (r *TrackRepository) GetByAlbumID(ctx context.Context, albumID string) ([]domain.Track, error) {
query := `
SELECT t.id, t.title, t.duration_ms, t.isrc, t.explicit, t.source, t.source_id,
at.disc_number, at.track_number
FROM tracks t
JOIN album_tracks at ON t.id = at.track_id
JOIN album_external_ids ae ON at.album_id = ae.album_id
WHERE ae.source_id = $1
ORDER BY at.disc_number, at.track_number`
rows, err := r.pool.Query(ctx, query, albumID)
if err != nil {
return nil, err
}
defer rows.Close()
var tracks []domain.Track
for rows.Next() {
var (
track domain.Track
durationMs *int
isrc *string
explicit *bool
source string
sourceID *string
)
err := rows.Scan(
&track.ID, &track.Title, &durationMs, &isrc, &explicit,
&source, &sourceID, &track.DiscNumber, &track.TrackNumber,
)
if err != nil {
return nil, err
}
if durationMs != nil {
track.DurationMs = *durationMs
}
track.ISRC = derefString(isrc)
if explicit != nil {
track.Explicit = *explicit
}
tracks = append(tracks, track)
}
return tracks, rows.Err()
}
func (r *TrackRepository) Save(ctx context.Context, track *domain.Track) error {
tx, err := r.pool.Begin(ctx)
if err != nil {
return err
}
defer tx.Rollback(ctx)
var source, sourceID string
if len(track.ExternalIDs) > 0 {
source = track.ExternalIDs[0].Source
sourceID = track.ExternalIDs[0].SourceID
}
query := `
INSERT INTO tracks (id, title, duration_ms, isrc, explicit, source, source_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (id) DO UPDATE SET
title = EXCLUDED.title,
duration_ms = EXCLUDED.duration_ms,
isrc = EXCLUDED.isrc,
explicit = EXCLUDED.explicit,
updated_at = now()`
_, err = tx.Exec(ctx, query,
track.ID, track.Title, track.DurationMs, nullString(track.ISRC),
track.Explicit, source, sourceID)
if err != nil {
return err
}
for _, ext := range track.ExternalIDs {
extQuery := `
INSERT INTO track_external_ids (track_id, source, source_id, url)
VALUES ($1, $2, $3, $4)
ON CONFLICT (track_id, source, source_id) DO UPDATE SET
url = EXCLUDED.url,
fetched_at = now()`
_, err = tx.Exec(ctx, extQuery, track.ID, ext.Source, ext.SourceID, nullString(ext.URL))
if err != nil {
return err
}
}
return tx.Commit(ctx)
}
func (r *TrackRepository) scanTrack(ctx context.Context, query string, args ...any) (*domain.Track, error) {
row := r.pool.QueryRow(ctx, query, args...)
var (
track domain.Track
durationMs *int
isrc *string
explicit *bool
source string
sourceID *string
)
err := row.Scan(
&track.ID, &track.Title, &durationMs, &isrc, &explicit, &source, &sourceID,
)
if errors.Is(err, pgx.ErrNoRows) {
return nil, repository.ErrNotFound
}
if err != nil {
return nil, err
}
if durationMs != nil {
track.DurationMs = *durationMs
}
track.ISRC = derefString(isrc)
if explicit != nil {
track.Explicit = *explicit
}
return &track, nil
}
func (r *TrackRepository) loadExternalIDs(ctx context.Context, track *domain.Track) error {
query := `SELECT source, source_id, url FROM track_external_ids WHERE track_id = $1`
rows, err := r.pool.Query(ctx, query, track.ID)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var ext domain.ExternalID
var url *string
if err := rows.Scan(&ext.Source, &ext.SourceID, &url); err != nil {
return err
}
ext.URL = derefString(url)
track.ExternalIDs = append(track.ExternalIDs, ext)
}
return rows.Err()
}
+29
View File
@@ -0,0 +1,29 @@
package repository
import (
"context"
"github.com/metadata-agregator/internal/domain"
)
type ArtistRepository interface {
GetByID(ctx context.Context, id string) (*domain.Artist, error)
GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Artist, error)
Search(ctx context.Context, query string, limit, offset int) (*domain.SearchResult[domain.Artist], error)
Save(ctx context.Context, artist *domain.Artist) error
}
type AlbumRepository interface {
GetByID(ctx context.Context, id string) (*domain.Album, error)
GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Album, error)
GetByArtistID(ctx context.Context, artistID string, limit, offset int) (*domain.SearchResult[domain.Album], error)
Save(ctx context.Context, album *domain.Album) error
}
type TrackRepository interface {
GetByID(ctx context.Context, id string) (*domain.Track, error)
GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Track, error)
GetByISRC(ctx context.Context, isrc string) (*domain.Track, error)
GetByAlbumID(ctx context.Context, albumID string) ([]domain.Track, error)
Save(ctx context.Context, track *domain.Track) error
}