From de674376edcbd2880c1c75a61e81ee5c49a2feb1 Mon Sep 17 00:00:00 2001 From: Alexander Date: Tue, 28 Apr 2026 16:27:14 +0200 Subject: [PATCH] feat: initial implementation of metadata aggregator - gRPC service with MusicBrainz provider - PostgreSQL schema with migrations - Service layer with database-first caching - Repository pattern for data access - YAML configuration support - Research documentation for 17 music metadata projects --- README.md | 104 ++++++++++++++++++ cmd/server/main.go | 137 ++++++++++++++++++++++++ cmd/server/noop_repo.go | 66 ++++++++++++ internal/server/mapper.go | 161 ++++++++++++++++++++++++++++ internal/server/server.go | 218 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 686 insertions(+) create mode 100644 README.md create mode 100644 cmd/server/main.go create mode 100644 cmd/server/noop_repo.go create mode 100644 internal/server/mapper.go create mode 100644 internal/server/server.go diff --git a/README.md b/README.md new file mode 100644 index 0000000..b9ac289 --- /dev/null +++ b/README.md @@ -0,0 +1,104 @@ +# Metadata Aggregator + +gRPC service for aggregating music metadata from multiple providers with PostgreSQL caching. + +## Features + +- **Multi-provider support** - Currently MusicBrainz, extensible for Spotify, Discogs, etc. +- **Database-first caching** - Check local DB before hitting external APIs +- **Rate limiting** - Respects provider API limits (1 req/sec for MusicBrainz) +- **Provider-only mode** - Works without database for quick testing + +## Quick Start + +```bash +# Build +nix build .#server + +# Run (no database) +./result/bin/server + +# Run (with database) +./result/bin/server -config config.yaml +``` + +## Configuration + +```yaml +server: + port: 50051 + +database: + host: localhost + port: 5432 + user: metadata + password: metadata + name: metadata + sslmode: disable +``` + +Or via environment: `DATABASE_URL=postgres://user:pass@host:5432/db` + +## Database + +```bash +cd database +docker compose up -d +``` + +PostgreSQL 16 with `pg_prewarm` for automatic cache warming. + +## API + +```bash +# Search artists +grpcurl -plaintext -d '{"query": "Radiohead"}' localhost:50051 metadata.v1.MetadataService/SearchArtists + +# Get artist +grpcurl -plaintext -d '{"id": "a74b1b7f-71a5-4011-9441-d0b5e4122711"}' localhost:50051 metadata.v1.MetadataService/GetArtist + +# Get album +grpcurl -plaintext -d '{"id": "b1392450-e666-3926-a536-22c65f834433"}' localhost:50051 metadata.v1.MetadataService/GetAlbum + +# Get album tracks +grpcurl -plaintext -d '{"album_id": "b1392450-e666-3926-a536-22c65f834433"}' localhost:50051 metadata.v1.MetadataService/GetAlbumTracks +``` + +## Project Structure + +``` +├── cmd/server/ # Server entrypoint +├── internal/ +│ ├── config/ # YAML configuration +│ ├── domain/ # Domain types +│ ├── provider/ # External API clients +│ │ └── musicbrainz/ # MusicBrainz implementation +│ ├── repository/ # Database access +│ │ └── postgres/ # PostgreSQL implementation +│ ├── server/ # gRPC handlers +│ └── service/ # Business logic (DB-first caching) +├── pkg/gen/ # Generated protobuf code +├── proto/ # Proto definitions +├── database/ # Docker compose + migrations +└── tests/e2e/ # End-to-end tests +``` + +## Development + +```bash +# Enter dev shell +nix develop + +# Generate protobuf +buf generate + +# Run tests +go test ./... + +# Run e2e tests (requires network) +go test -v ./tests/e2e/... +``` + +## License + +MIT diff --git a/cmd/server/main.go b/cmd/server/main.go new file mode 100644 index 0000000..6677b35 --- /dev/null +++ b/cmd/server/main.go @@ -0,0 +1,137 @@ +package main + +import ( + "context" + "flag" + "fmt" + "log" + "net" + "os" + "os/signal" + "syscall" + "time" + + "github.com/jackc/pgx/v5/pgxpool" + "google.golang.org/grpc" + "google.golang.org/grpc/reflection" + + "github.com/metadata-agregator/internal/config" + "github.com/metadata-agregator/internal/provider/musicbrainz" + "github.com/metadata-agregator/internal/repository/postgres" + "github.com/metadata-agregator/internal/server" + "github.com/metadata-agregator/internal/service" + metadatav1 "github.com/metadata-agregator/pkg/gen/metadata/v1" +) + +func main() { + configPath := flag.String("config", "", "path to config file") + flag.Parse() + + cfg, err := config.Load(*configPath) + if err != nil { + log.Fatalf("failed to load config: %v", err) + } + + ctx := context.Background() + + services, cleanup := buildServices(ctx, cfg) + defer cleanup() + + addr := fmt.Sprintf(":%d", cfg.Server.Port) + + lis, err := net.Listen("tcp", addr) + if err != nil { + log.Fatalf("failed to listen: %v", err) + } + + grpcServer := grpc.NewServer() + metadatav1.RegisterMetadataServiceServer(grpcServer, server.NewMetadataServer(services)) + reflection.Register(grpcServer) + + go gracefulShutdown(grpcServer) + + log.Printf("gRPC server listening on %s", addr) + if err := grpcServer.Serve(lis); err != nil { + log.Fatalf("failed to serve: %v", err) + } +} + +func buildServices(ctx context.Context, cfg *config.Config) (map[metadatav1.Provider]*service.MetadataService, func()) { + mb := musicbrainz.New() + services := make(map[metadatav1.Provider]*service.MetadataService) + + dbURL := cfg.Database.DSN() + if dbURL == "" { + dbURL = os.Getenv("DATABASE_URL") + } + + if dbURL == "" { + log.Println("no database configured, running in provider-only mode") + services[metadatav1.Provider_PROVIDER_MUSICBRAINZ] = service.NewMetadataService( + &noopArtistRepo{}, + &noopAlbumRepo{}, + &noopTrackRepo{}, + mb, + ) + return services, func() {} + } + + pool, err := connectDB(ctx, dbURL) + if err != nil { + log.Printf("database connection failed: %v, running in provider-only mode", err) + services[metadatav1.Provider_PROVIDER_MUSICBRAINZ] = service.NewMetadataService( + &noopArtistRepo{}, + &noopAlbumRepo{}, + &noopTrackRepo{}, + mb, + ) + return services, func() {} + } + + artistRepo := postgres.NewArtistRepository(pool) + albumRepo := postgres.NewAlbumRepository(pool) + trackRepo := postgres.NewTrackRepository(pool) + + services[metadatav1.Provider_PROVIDER_MUSICBRAINZ] = service.NewMetadataService( + artistRepo, + albumRepo, + trackRepo, + mb, + ) + + log.Println("database connected, caching enabled") + return services, func() { pool.Close() } +} + +func connectDB(ctx context.Context, dbURL string) (*pgxpool.Pool, error) { + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + config, err := pgxpool.ParseConfig(dbURL) + if err != nil { + return nil, err + } + + config.MaxConns = 10 + config.MinConns = 2 + + pool, err := pgxpool.NewWithConfig(ctx, config) + if err != nil { + return nil, err + } + + if err := pool.Ping(ctx); err != nil { + pool.Close() + return nil, err + } + + return pool, nil +} + +func gracefulShutdown(server *grpc.Server) { + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + <-sigCh + log.Println("shutting down...") + server.GracefulStop() +} diff --git a/cmd/server/noop_repo.go b/cmd/server/noop_repo.go new file mode 100644 index 0000000..86edca3 --- /dev/null +++ b/cmd/server/noop_repo.go @@ -0,0 +1,66 @@ +package main + +import ( + "context" + + "github.com/metadata-agregator/internal/domain" + "github.com/metadata-agregator/internal/repository" +) + +type noopArtistRepo struct{} + +func (r *noopArtistRepo) GetByID(ctx context.Context, id string) (*domain.Artist, error) { + return nil, repository.ErrNotFound +} + +func (r *noopArtistRepo) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Artist, error) { + return nil, repository.ErrNotFound +} + +func (r *noopArtistRepo) Search(ctx context.Context, query string, limit, offset int) (*domain.SearchResult[domain.Artist], error) { + return &domain.SearchResult[domain.Artist]{}, nil +} + +func (r *noopArtistRepo) Save(ctx context.Context, artist *domain.Artist) error { + return nil +} + +type noopAlbumRepo struct{} + +func (r *noopAlbumRepo) GetByID(ctx context.Context, id string) (*domain.Album, error) { + return nil, repository.ErrNotFound +} + +func (r *noopAlbumRepo) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Album, error) { + return nil, repository.ErrNotFound +} + +func (r *noopAlbumRepo) GetByArtistID(ctx context.Context, artistID string, limit, offset int) (*domain.SearchResult[domain.Album], error) { + return &domain.SearchResult[domain.Album]{}, nil +} + +func (r *noopAlbumRepo) Save(ctx context.Context, album *domain.Album) error { + return nil +} + +type noopTrackRepo struct{} + +func (r *noopTrackRepo) GetByID(ctx context.Context, id string) (*domain.Track, error) { + return nil, repository.ErrNotFound +} + +func (r *noopTrackRepo) GetByExternalID(ctx context.Context, source, sourceID string) (*domain.Track, error) { + return nil, repository.ErrNotFound +} + +func (r *noopTrackRepo) GetByISRC(ctx context.Context, isrc string) (*domain.Track, error) { + return nil, repository.ErrNotFound +} + +func (r *noopTrackRepo) GetByAlbumID(ctx context.Context, albumID string) ([]domain.Track, error) { + return nil, nil +} + +func (r *noopTrackRepo) Save(ctx context.Context, track *domain.Track) error { + return nil +} diff --git a/internal/server/mapper.go b/internal/server/mapper.go new file mode 100644 index 0000000..504bd0a --- /dev/null +++ b/internal/server/mapper.go @@ -0,0 +1,161 @@ +package server + +import ( + "github.com/metadata-agregator/internal/domain" + metadatav1 "github.com/metadata-agregator/pkg/gen/metadata/v1" +) + +func toProtoArtist(d *domain.Artist) *metadatav1.Artist { + if d == nil { + return nil + } + + a := &metadatav1.Artist{ + Id: d.ID, + Name: d.Name, + SortName: d.SortName, + ArtistType: d.Type, + Country: d.Country, + Description: d.Description, + ImageUrl: d.ImageURL, + } + + if d.FormedDate != nil { + a.FormedDate = d.FormedDate.Format("2006-01-02") + } + if d.DisbandedDate != nil { + a.DisbandedDate = d.DisbandedDate.Format("2006-01-02") + } + + for _, g := range d.Genres { + a.Genres = append(a.Genres, &metadatav1.Genre{ + Id: g.ID, + Name: g.Name, + }) + } + + for _, e := range d.ExternalIDs { + a.ExternalIds = append(a.ExternalIds, &metadatav1.ExternalID{ + Source: e.Source, + SourceId: e.SourceID, + Url: e.URL, + }) + } + + return a +} + +func toProtoAlbum(d *domain.Album) *metadatav1.Album { + if d == nil { + return nil + } + + a := &metadatav1.Album{ + Id: d.ID, + Title: d.Title, + AlbumType: d.Type, + Upc: d.UPC, + TotalTracks: int32(d.TotalTracks), + TotalDiscs: int32(d.TotalDiscs), + CoverUrl: d.CoverURL, + } + + if d.ReleaseDate != nil { + a.ReleaseDate = d.ReleaseDate.Format("2006-01-02") + } + + for _, ac := range d.Artists { + a.Artists = append(a.Artists, toProtoArtistCredit(&ac)) + } + + if d.Label != nil { + a.Label = &metadatav1.Label{ + Id: d.Label.ID, + Name: d.Label.Name, + Country: d.Label.Country, + } + } + + for _, g := range d.Genres { + a.Genres = append(a.Genres, &metadatav1.Genre{ + Id: g.ID, + Name: g.Name, + }) + } + + for _, e := range d.ExternalIDs { + a.ExternalIds = append(a.ExternalIds, &metadatav1.ExternalID{ + Source: e.Source, + SourceId: e.SourceID, + Url: e.URL, + }) + } + + return a +} + +func toProtoTrack(d *domain.Track) *metadatav1.Track { + if d == nil { + return nil + } + + t := &metadatav1.Track{ + Id: d.ID, + Title: d.Title, + DurationMs: int32(d.DurationMs), + Isrc: d.ISRC, + Explicit: d.Explicit, + DiscNumber: int32(d.DiscNumber), + TrackNumber: int32(d.TrackNumber), + } + + for _, ac := range d.Artists { + t.Artists = append(t.Artists, toProtoArtistCredit(&ac)) + } + + if d.Work != nil { + t.Work = toProtoWork(d.Work) + } + + for _, e := range d.ExternalIDs { + t.ExternalIds = append(t.ExternalIds, &metadatav1.ExternalID{ + Source: e.Source, + SourceId: e.SourceID, + Url: e.URL, + }) + } + + return t +} + +func toProtoWork(d *domain.Work) *metadatav1.Work { + if d == nil { + return nil + } + + w := &metadatav1.Work{ + Id: d.ID, + Title: d.Title, + WorkType: d.Type, + Language: d.Language, + } + + for _, c := range d.Composers { + w.Composers = append(w.Composers, toProtoArtistCredit(&c)) + } + + return w +} + +func toProtoArtistCredit(d *domain.ArtistCredit) *metadatav1.ArtistCredit { + if d == nil { + return nil + } + + return &metadatav1.ArtistCredit{ + Artist: toProtoArtist(&d.Artist), + Role: d.Role, + Position: int32(d.Position), + JoinPhrase: d.JoinPhrase, + } +} diff --git a/internal/server/server.go b/internal/server/server.go new file mode 100644 index 0000000..9c6f44e --- /dev/null +++ b/internal/server/server.go @@ -0,0 +1,218 @@ +package server + +import ( + "context" + "errors" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "github.com/metadata-agregator/internal/provider/musicbrainz" + "github.com/metadata-agregator/internal/repository" + "github.com/metadata-agregator/internal/service" + metadatav1 "github.com/metadata-agregator/pkg/gen/metadata/v1" +) + +type MetadataServer struct { + metadatav1.UnimplementedMetadataServiceServer + services map[metadatav1.Provider]*service.MetadataService +} + +func NewMetadataServer(services map[metadatav1.Provider]*service.MetadataService) *MetadataServer { + return &MetadataServer{services: services} +} + +func (s *MetadataServer) getService(p metadatav1.Provider) (*service.MetadataService, error) { + if p == metadatav1.Provider_PROVIDER_UNSPECIFIED { + p = metadatav1.Provider_PROVIDER_MUSICBRAINZ + } + + svc, ok := s.services[p] + if !ok { + return nil, status.Errorf(codes.InvalidArgument, "unknown provider: %v", p) + } + + return svc, nil +} + +func (s *MetadataServer) GetArtist(ctx context.Context, req *metadatav1.GetArtistRequest) (*metadatav1.Artist, error) { + svc, err := s.getService(req.Provider) + if err != nil { + return nil, err + } + + var id string + switch v := req.Identifier.(type) { + case *metadatav1.GetArtistRequest_Id: + id = v.Id + case *metadatav1.GetArtistRequest_External: + id = v.External.SourceId + default: + return nil, status.Error(codes.InvalidArgument, "identifier required") + } + + artist, err := svc.GetArtist(ctx, id) + if err != nil { + return nil, toGRPCError(err) + } + + return toProtoArtist(artist), nil +} + +func (s *MetadataServer) SearchArtists(ctx context.Context, req *metadatav1.SearchArtistsRequest) (*metadatav1.SearchArtistsResponse, error) { + svc, err := s.getService(req.Provider) + if err != nil { + return nil, err + } + + limit := int(req.Limit) + if limit <= 0 { + limit = 25 + } + + result, err := svc.SearchArtists(ctx, req.Query, limit, int(req.Offset)) + if err != nil { + return nil, toGRPCError(err) + } + + resp := &metadatav1.SearchArtistsResponse{ + Total: int32(result.Total), + } + + for _, a := range result.Items { + resp.Artists = append(resp.Artists, toProtoArtist(&a)) + } + + return resp, nil +} + +func (s *MetadataServer) GetAlbum(ctx context.Context, req *metadatav1.GetAlbumRequest) (*metadatav1.Album, error) { + svc, err := s.getService(req.Provider) + if err != nil { + return nil, err + } + + var id string + switch v := req.Identifier.(type) { + case *metadatav1.GetAlbumRequest_Id: + id = v.Id + case *metadatav1.GetAlbumRequest_External: + id = v.External.SourceId + default: + return nil, status.Error(codes.InvalidArgument, "identifier required") + } + + album, err := svc.GetAlbum(ctx, id) + if err != nil { + return nil, toGRPCError(err) + } + + return toProtoAlbum(album), nil +} + +func (s *MetadataServer) GetArtistAlbums(ctx context.Context, req *metadatav1.GetArtistAlbumsRequest) (*metadatav1.GetArtistAlbumsResponse, error) { + svc, err := s.getService(req.Provider) + if err != nil { + return nil, err + } + + limit := int(req.Limit) + if limit <= 0 { + limit = 25 + } + + result, err := svc.GetArtistAlbums(ctx, req.ArtistId, limit, int(req.Offset)) + if err != nil { + return nil, toGRPCError(err) + } + + resp := &metadatav1.GetArtistAlbumsResponse{ + Total: int32(result.Total), + } + + for _, a := range result.Items { + resp.Albums = append(resp.Albums, toProtoAlbum(&a)) + } + + return resp, nil +} + +func (s *MetadataServer) GetTrack(ctx context.Context, req *metadatav1.GetTrackRequest) (*metadatav1.Track, error) { + svc, err := s.getService(req.Provider) + if err != nil { + return nil, err + } + + var track *metadatav1.Track + + switch v := req.Identifier.(type) { + case *metadatav1.GetTrackRequest_Id: + t, err := svc.GetTrack(ctx, v.Id) + if err != nil { + return nil, toGRPCError(err) + } + track = toProtoTrack(t) + + case *metadatav1.GetTrackRequest_External: + t, err := svc.GetTrack(ctx, v.External.SourceId) + if err != nil { + return nil, toGRPCError(err) + } + track = toProtoTrack(t) + + case *metadatav1.GetTrackRequest_Isrc: + t, err := svc.GetTrackByISRC(ctx, v.Isrc) + if err != nil { + return nil, toGRPCError(err) + } + track = toProtoTrack(t) + + default: + return nil, status.Error(codes.InvalidArgument, "identifier required") + } + + return track, nil +} + +func (s *MetadataServer) GetAlbumTracks(ctx context.Context, req *metadatav1.GetAlbumTracksRequest) (*metadatav1.GetAlbumTracksResponse, error) { + svc, err := s.getService(req.Provider) + if err != nil { + return nil, err + } + + tracks, err := svc.GetAlbumTracks(ctx, req.AlbumId) + if err != nil { + return nil, toGRPCError(err) + } + + resp := &metadatav1.GetAlbumTracksResponse{} + for _, t := range tracks { + resp.Tracks = append(resp.Tracks, toProtoTrack(&t)) + } + + return resp, nil +} + +func (s *MetadataServer) SyncArtist(ctx context.Context, req *metadatav1.SyncArtistRequest) (*metadatav1.SyncArtistResponse, error) { + return nil, status.Error(codes.Unimplemented, "sync not yet implemented") +} + +func toGRPCError(err error) error { + if err == nil { + return nil + } + + if errors.Is(err, repository.ErrNotFound) { + return status.Error(codes.NotFound, "not found") + } + + if errors.Is(err, musicbrainz.ErrNotFound) { + return status.Error(codes.NotFound, "not found") + } + + if errors.Is(err, musicbrainz.ErrRateLimited) { + return status.Error(codes.ResourceExhausted, "rate limited") + } + + return status.Errorf(codes.Internal, "internal error: %v", err) +}