feat: initial implementation of metadata aggregator

- gRPC service with MusicBrainz provider
- PostgreSQL schema with migrations
- Service layer with database-first caching
- Repository pattern for data access
- YAML configuration support
- Research documentation for 17 music metadata projects
This commit is contained in:
Alexander
2026-04-28 16:27:14 +02:00
parent a1f6701bac
commit de674376ed
5 changed files with 686 additions and 0 deletions
+104
View File
@@ -0,0 +1,104 @@
# Metadata Aggregator
gRPC service for aggregating music metadata from multiple providers with PostgreSQL caching.
## Features
- **Multi-provider support** - Currently MusicBrainz, extensible for Spotify, Discogs, etc.
- **Database-first caching** - Check local DB before hitting external APIs
- **Rate limiting** - Respects provider API limits (1 req/sec for MusicBrainz)
- **Provider-only mode** - Works without database for quick testing
## Quick Start
```bash
# Build
nix build .#server
# Run (no database)
./result/bin/server
# Run (with database)
./result/bin/server -config config.yaml
```
## Configuration
```yaml
server:
port: 50051
database:
host: localhost
port: 5432
user: metadata
password: metadata
name: metadata
sslmode: disable
```
Or via environment: `DATABASE_URL=postgres://user:pass@host:5432/db`
## Database
```bash
cd database
docker compose up -d
```
PostgreSQL 16 with `pg_prewarm` for automatic cache warming.
## API
```bash
# Search artists
grpcurl -plaintext -d '{"query": "Radiohead"}' localhost:50051 metadata.v1.MetadataService/SearchArtists
# Get artist
grpcurl -plaintext -d '{"id": "a74b1b7f-71a5-4011-9441-d0b5e4122711"}' localhost:50051 metadata.v1.MetadataService/GetArtist
# Get album
grpcurl -plaintext -d '{"id": "b1392450-e666-3926-a536-22c65f834433"}' localhost:50051 metadata.v1.MetadataService/GetAlbum
# Get album tracks
grpcurl -plaintext -d '{"album_id": "b1392450-e666-3926-a536-22c65f834433"}' localhost:50051 metadata.v1.MetadataService/GetAlbumTracks
```
## Project Structure
```
├── cmd/server/ # Server entrypoint
├── internal/
│ ├── config/ # YAML configuration
│ ├── domain/ # Domain types
│ ├── provider/ # External API clients
│ │ └── musicbrainz/ # MusicBrainz implementation
│ ├── repository/ # Database access
│ │ └── postgres/ # PostgreSQL implementation
│ ├── server/ # gRPC handlers
│ └── service/ # Business logic (DB-first caching)
├── pkg/gen/ # Generated protobuf code
├── proto/ # Proto definitions
├── database/ # Docker compose + migrations
└── tests/e2e/ # End-to-end tests
```
## Development
```bash
# Enter dev shell
nix develop
# Generate protobuf
buf generate
# Run tests
go test ./...
# Run e2e tests (requires network)
go test -v ./tests/e2e/...
```
## License
MIT
+137
View File
@@ -0,0 +1,137 @@
package main
import (
"context"
"flag"
"fmt"
"log"
"net"
"os"
"os/signal"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"google.golang.org/grpc"
"google.golang.org/grpc/reflection"
"github.com/metadata-agregator/internal/config"
"github.com/metadata-agregator/internal/provider/musicbrainz"
"github.com/metadata-agregator/internal/repository/postgres"
"github.com/metadata-agregator/internal/server"
"github.com/metadata-agregator/internal/service"
metadatav1 "github.com/metadata-agregator/pkg/gen/metadata/v1"
)
func main() {
configPath := flag.String("config", "", "path to config file")
flag.Parse()
cfg, err := config.Load(*configPath)
if err != nil {
log.Fatalf("failed to load config: %v", err)
}
ctx := context.Background()
services, cleanup := buildServices(ctx, cfg)
defer cleanup()
addr := fmt.Sprintf(":%d", cfg.Server.Port)
lis, err := net.Listen("tcp", addr)
if err != nil {
log.Fatalf("failed to listen: %v", err)
}
grpcServer := grpc.NewServer()
metadatav1.RegisterMetadataServiceServer(grpcServer, server.NewMetadataServer(services))
reflection.Register(grpcServer)
go gracefulShutdown(grpcServer)
log.Printf("gRPC server listening on %s", addr)
if err := grpcServer.Serve(lis); err != nil {
log.Fatalf("failed to serve: %v", err)
}
}
func buildServices(ctx context.Context, cfg *config.Config) (map[metadatav1.Provider]*service.MetadataService, func()) {
mb := musicbrainz.New()
services := make(map[metadatav1.Provider]*service.MetadataService)
dbURL := cfg.Database.DSN()
if dbURL == "" {
dbURL = os.Getenv("DATABASE_URL")
}
if dbURL == "" {
log.Println("no database configured, running in provider-only mode")
services[metadatav1.Provider_PROVIDER_MUSICBRAINZ] = service.NewMetadataService(
&noopArtistRepo{},
&noopAlbumRepo{},
&noopTrackRepo{},
mb,
)
return services, func() {}
}
pool, err := connectDB(ctx, dbURL)
if err != nil {
log.Printf("database connection failed: %v, running in provider-only mode", err)
services[metadatav1.Provider_PROVIDER_MUSICBRAINZ] = service.NewMetadataService(
&noopArtistRepo{},
&noopAlbumRepo{},
&noopTrackRepo{},
mb,
)
return services, func() {}
}
artistRepo := postgres.NewArtistRepository(pool)
albumRepo := postgres.NewAlbumRepository(pool)
trackRepo := postgres.NewTrackRepository(pool)
services[metadatav1.Provider_PROVIDER_MUSICBRAINZ] = service.NewMetadataService(
artistRepo,
albumRepo,
trackRepo,
mb,
)
log.Println("database connected, caching enabled")
return services, func() { pool.Close() }
}
func connectDB(ctx context.Context, dbURL string) (*pgxpool.Pool, error) {
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
config, err := pgxpool.ParseConfig(dbURL)
if err != nil {
return nil, err
}
config.MaxConns = 10
config.MinConns = 2
pool, err := pgxpool.NewWithConfig(ctx, config)
if err != nil {
return nil, err
}
if err := pool.Ping(ctx); err != nil {
pool.Close()
return nil, err
}
return pool, nil
}
func gracefulShutdown(server *grpc.Server) {
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
<-sigCh
log.Println("shutting down...")
server.GracefulStop()
}
+66
View File
@@ -0,0 +1,66 @@
package main
import (
"context"
"github.com/metadata-agregator/internal/domain"
"github.com/metadata-agregator/internal/repository"
)
type noopArtistRepo struct{}
func (r *noopArtistRepo) GetByID(ctx context.Context, id string) (*domain.Artist, error) {
return nil, repository.ErrNotFound
}
func (r *noopArtistRepo) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Artist, error) {
return nil, repository.ErrNotFound
}
func (r *noopArtistRepo) Search(ctx context.Context, query string, limit, offset int) (*domain.SearchResult[domain.Artist], error) {
return &domain.SearchResult[domain.Artist]{}, nil
}
func (r *noopArtistRepo) Save(ctx context.Context, artist *domain.Artist) error {
return nil
}
type noopAlbumRepo struct{}
func (r *noopAlbumRepo) GetByID(ctx context.Context, id string) (*domain.Album, error) {
return nil, repository.ErrNotFound
}
func (r *noopAlbumRepo) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Album, error) {
return nil, repository.ErrNotFound
}
func (r *noopAlbumRepo) GetByArtistID(ctx context.Context, artistID string, limit, offset int) (*domain.SearchResult[domain.Album], error) {
return &domain.SearchResult[domain.Album]{}, nil
}
func (r *noopAlbumRepo) Save(ctx context.Context, album *domain.Album) error {
return nil
}
type noopTrackRepo struct{}
func (r *noopTrackRepo) GetByID(ctx context.Context, id string) (*domain.Track, error) {
return nil, repository.ErrNotFound
}
func (r *noopTrackRepo) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Track, error) {
return nil, repository.ErrNotFound
}
func (r *noopTrackRepo) GetByISRC(ctx context.Context, isrc string) (*domain.Track, error) {
return nil, repository.ErrNotFound
}
func (r *noopTrackRepo) GetByAlbumID(ctx context.Context, albumID string) ([]domain.Track, error) {
return nil, nil
}
func (r *noopTrackRepo) Save(ctx context.Context, track *domain.Track) error {
return nil
}
+161
View File
@@ -0,0 +1,161 @@
package server
import (
"github.com/metadata-agregator/internal/domain"
metadatav1 "github.com/metadata-agregator/pkg/gen/metadata/v1"
)
func toProtoArtist(d *domain.Artist) *metadatav1.Artist {
if d == nil {
return nil
}
a := &metadatav1.Artist{
Id: d.ID,
Name: d.Name,
SortName: d.SortName,
ArtistType: d.Type,
Country: d.Country,
Description: d.Description,
ImageUrl: d.ImageURL,
}
if d.FormedDate != nil {
a.FormedDate = d.FormedDate.Format("2006-01-02")
}
if d.DisbandedDate != nil {
a.DisbandedDate = d.DisbandedDate.Format("2006-01-02")
}
for _, g := range d.Genres {
a.Genres = append(a.Genres, &metadatav1.Genre{
Id: g.ID,
Name: g.Name,
})
}
for _, e := range d.ExternalIDs {
a.ExternalIds = append(a.ExternalIds, &metadatav1.ExternalID{
Source: e.Source,
SourceId: e.SourceID,
Url: e.URL,
})
}
return a
}
func toProtoAlbum(d *domain.Album) *metadatav1.Album {
if d == nil {
return nil
}
a := &metadatav1.Album{
Id: d.ID,
Title: d.Title,
AlbumType: d.Type,
Upc: d.UPC,
TotalTracks: int32(d.TotalTracks),
TotalDiscs: int32(d.TotalDiscs),
CoverUrl: d.CoverURL,
}
if d.ReleaseDate != nil {
a.ReleaseDate = d.ReleaseDate.Format("2006-01-02")
}
for _, ac := range d.Artists {
a.Artists = append(a.Artists, toProtoArtistCredit(&ac))
}
if d.Label != nil {
a.Label = &metadatav1.Label{
Id: d.Label.ID,
Name: d.Label.Name,
Country: d.Label.Country,
}
}
for _, g := range d.Genres {
a.Genres = append(a.Genres, &metadatav1.Genre{
Id: g.ID,
Name: g.Name,
})
}
for _, e := range d.ExternalIDs {
a.ExternalIds = append(a.ExternalIds, &metadatav1.ExternalID{
Source: e.Source,
SourceId: e.SourceID,
Url: e.URL,
})
}
return a
}
func toProtoTrack(d *domain.Track) *metadatav1.Track {
if d == nil {
return nil
}
t := &metadatav1.Track{
Id: d.ID,
Title: d.Title,
DurationMs: int32(d.DurationMs),
Isrc: d.ISRC,
Explicit: d.Explicit,
DiscNumber: int32(d.DiscNumber),
TrackNumber: int32(d.TrackNumber),
}
for _, ac := range d.Artists {
t.Artists = append(t.Artists, toProtoArtistCredit(&ac))
}
if d.Work != nil {
t.Work = toProtoWork(d.Work)
}
for _, e := range d.ExternalIDs {
t.ExternalIds = append(t.ExternalIds, &metadatav1.ExternalID{
Source: e.Source,
SourceId: e.SourceID,
Url: e.URL,
})
}
return t
}
func toProtoWork(d *domain.Work) *metadatav1.Work {
if d == nil {
return nil
}
w := &metadatav1.Work{
Id: d.ID,
Title: d.Title,
WorkType: d.Type,
Language: d.Language,
}
for _, c := range d.Composers {
w.Composers = append(w.Composers, toProtoArtistCredit(&c))
}
return w
}
func toProtoArtistCredit(d *domain.ArtistCredit) *metadatav1.ArtistCredit {
if d == nil {
return nil
}
return &metadatav1.ArtistCredit{
Artist: toProtoArtist(&d.Artist),
Role: d.Role,
Position: int32(d.Position),
JoinPhrase: d.JoinPhrase,
}
}
+218
View File
@@ -0,0 +1,218 @@
package server
import (
"context"
"errors"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"github.com/metadata-agregator/internal/provider/musicbrainz"
"github.com/metadata-agregator/internal/repository"
"github.com/metadata-agregator/internal/service"
metadatav1 "github.com/metadata-agregator/pkg/gen/metadata/v1"
)
type MetadataServer struct {
metadatav1.UnimplementedMetadataServiceServer
services map[metadatav1.Provider]*service.MetadataService
}
func NewMetadataServer(services map[metadatav1.Provider]*service.MetadataService) *MetadataServer {
return &MetadataServer{services: services}
}
func (s *MetadataServer) getService(p metadatav1.Provider) (*service.MetadataService, error) {
if p == metadatav1.Provider_PROVIDER_UNSPECIFIED {
p = metadatav1.Provider_PROVIDER_MUSICBRAINZ
}
svc, ok := s.services[p]
if !ok {
return nil, status.Errorf(codes.InvalidArgument, "unknown provider: %v", p)
}
return svc, nil
}
func (s *MetadataServer) GetArtist(ctx context.Context, req *metadatav1.GetArtistRequest) (*metadatav1.Artist, error) {
svc, err := s.getService(req.Provider)
if err != nil {
return nil, err
}
var id string
switch v := req.Identifier.(type) {
case *metadatav1.GetArtistRequest_Id:
id = v.Id
case *metadatav1.GetArtistRequest_External:
id = v.External.SourceId
default:
return nil, status.Error(codes.InvalidArgument, "identifier required")
}
artist, err := svc.GetArtist(ctx, id)
if err != nil {
return nil, toGRPCError(err)
}
return toProtoArtist(artist), nil
}
func (s *MetadataServer) SearchArtists(ctx context.Context, req *metadatav1.SearchArtistsRequest) (*metadatav1.SearchArtistsResponse, error) {
svc, err := s.getService(req.Provider)
if err != nil {
return nil, err
}
limit := int(req.Limit)
if limit <= 0 {
limit = 25
}
result, err := svc.SearchArtists(ctx, req.Query, limit, int(req.Offset))
if err != nil {
return nil, toGRPCError(err)
}
resp := &metadatav1.SearchArtistsResponse{
Total: int32(result.Total),
}
for _, a := range result.Items {
resp.Artists = append(resp.Artists, toProtoArtist(&a))
}
return resp, nil
}
func (s *MetadataServer) GetAlbum(ctx context.Context, req *metadatav1.GetAlbumRequest) (*metadatav1.Album, error) {
svc, err := s.getService(req.Provider)
if err != nil {
return nil, err
}
var id string
switch v := req.Identifier.(type) {
case *metadatav1.GetAlbumRequest_Id:
id = v.Id
case *metadatav1.GetAlbumRequest_External:
id = v.External.SourceId
default:
return nil, status.Error(codes.InvalidArgument, "identifier required")
}
album, err := svc.GetAlbum(ctx, id)
if err != nil {
return nil, toGRPCError(err)
}
return toProtoAlbum(album), nil
}
func (s *MetadataServer) GetArtistAlbums(ctx context.Context, req *metadatav1.GetArtistAlbumsRequest) (*metadatav1.GetArtistAlbumsResponse, error) {
svc, err := s.getService(req.Provider)
if err != nil {
return nil, err
}
limit := int(req.Limit)
if limit <= 0 {
limit = 25
}
result, err := svc.GetArtistAlbums(ctx, req.ArtistId, limit, int(req.Offset))
if err != nil {
return nil, toGRPCError(err)
}
resp := &metadatav1.GetArtistAlbumsResponse{
Total: int32(result.Total),
}
for _, a := range result.Items {
resp.Albums = append(resp.Albums, toProtoAlbum(&a))
}
return resp, nil
}
func (s *MetadataServer) GetTrack(ctx context.Context, req *metadatav1.GetTrackRequest) (*metadatav1.Track, error) {
svc, err := s.getService(req.Provider)
if err != nil {
return nil, err
}
var track *metadatav1.Track
switch v := req.Identifier.(type) {
case *metadatav1.GetTrackRequest_Id:
t, err := svc.GetTrack(ctx, v.Id)
if err != nil {
return nil, toGRPCError(err)
}
track = toProtoTrack(t)
case *metadatav1.GetTrackRequest_External:
t, err := svc.GetTrack(ctx, v.External.SourceId)
if err != nil {
return nil, toGRPCError(err)
}
track = toProtoTrack(t)
case *metadatav1.GetTrackRequest_Isrc:
t, err := svc.GetTrackByISRC(ctx, v.Isrc)
if err != nil {
return nil, toGRPCError(err)
}
track = toProtoTrack(t)
default:
return nil, status.Error(codes.InvalidArgument, "identifier required")
}
return track, nil
}
func (s *MetadataServer) GetAlbumTracks(ctx context.Context, req *metadatav1.GetAlbumTracksRequest) (*metadatav1.GetAlbumTracksResponse, error) {
svc, err := s.getService(req.Provider)
if err != nil {
return nil, err
}
tracks, err := svc.GetAlbumTracks(ctx, req.AlbumId)
if err != nil {
return nil, toGRPCError(err)
}
resp := &metadatav1.GetAlbumTracksResponse{}
for _, t := range tracks {
resp.Tracks = append(resp.Tracks, toProtoTrack(&t))
}
return resp, nil
}
func (s *MetadataServer) SyncArtist(ctx context.Context, req *metadatav1.SyncArtistRequest) (*metadatav1.SyncArtistResponse, error) {
return nil, status.Error(codes.Unimplemented, "sync not yet implemented")
}
func toGRPCError(err error) error {
if err == nil {
return nil
}
if errors.Is(err, repository.ErrNotFound) {
return status.Error(codes.NotFound, "not found")
}
if errors.Is(err, musicbrainz.ErrNotFound) {
return status.Error(codes.NotFound, "not found")
}
if errors.Is(err, musicbrainz.ErrRateLimited) {
return status.Error(codes.ResourceExhausted, "rate limited")
}
return status.Errorf(codes.Internal, "internal error: %v", err)
}