feat: initial implementation of metadata aggregator

- gRPC service with MusicBrainz provider
- PostgreSQL schema with migrations
- Service layer with database-first caching
- Repository pattern for data access
- YAML configuration support
- Research documentation for 17 music metadata projects
This commit is contained in:
Alexander
2026-04-28 16:27:14 +02:00
commit a1f6701bac
163 changed files with 95884 additions and 0 deletions
+127
View File
@@ -0,0 +1,127 @@
package musicbrainz
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
)
const (
baseURL = "https://musicbrainz.org/ws/2"
userAgent = "MetadataAggregator/0.1.0 (https://github.com/metadata-agregator)"
)
type client struct {
http *http.Client
limiter *rate.Limiter
}
func newClient() *client {
return &client{
http: &http.Client{
Timeout: 30 * time.Second,
},
limiter: rate.NewLimiter(rate.Every(time.Second), 1),
}
}
func (c *client) get(ctx context.Context, endpoint string, params url.Values) ([]byte, error) {
if err := c.limiter.Wait(ctx); err != nil {
return nil, fmt.Errorf("rate limiter: %w", err)
}
if params == nil {
params = url.Values{}
}
params.Set("fmt", "json")
reqURL := fmt.Sprintf("%s/%s?%s", baseURL, endpoint, params.Encode())
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
if err != nil {
return nil, fmt.Errorf("create request: %w", err)
}
req.Header.Set("User-Agent", userAgent)
req.Header.Set("Accept", "application/json")
resp, err := c.http.Do(req)
if err != nil {
return nil, fmt.Errorf("do request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound {
return nil, ErrNotFound
}
if resp.StatusCode == http.StatusServiceUnavailable {
return nil, ErrRateLimited
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
return io.ReadAll(resp.Body)
}
func (c *client) lookup(ctx context.Context, entity, id string, inc []string) ([]byte, error) {
params := url.Values{}
if len(inc) > 0 {
incStr := ""
for i, v := range inc {
if i > 0 {
incStr += "+"
}
incStr += v
}
params.Set("inc", incStr)
}
return c.get(ctx, fmt.Sprintf("%s/%s", entity, id), params)
}
func (c *client) browse(ctx context.Context, entity, linkedEntity, linkedID string, limit, offset int, inc []string) ([]byte, error) {
params := url.Values{}
params.Set(linkedEntity, linkedID)
params.Set("limit", fmt.Sprintf("%d", limit))
params.Set("offset", fmt.Sprintf("%d", offset))
if len(inc) > 0 {
incStr := ""
for i, v := range inc {
if i > 0 {
incStr += "+"
}
incStr += v
}
params.Set("inc", incStr)
}
return c.get(ctx, entity, params)
}
func (c *client) search(ctx context.Context, entity, query string, limit, offset int) ([]byte, error) {
params := url.Values{}
params.Set("query", query)
params.Set("limit", fmt.Sprintf("%d", limit))
params.Set("offset", fmt.Sprintf("%d", offset))
return c.get(ctx, entity, params)
}
func decode[T any](data []byte) (*T, error) {
var result T
if err := json.Unmarshal(data, &result); err != nil {
return nil, fmt.Errorf("decode: %w", err)
}
return &result, nil
}
+8
View File
@@ -0,0 +1,8 @@
package musicbrainz
import "errors"
var (
ErrNotFound = errors.New("not found")
ErrRateLimited = errors.New("rate limited")
)
+212
View File
@@ -0,0 +1,212 @@
package musicbrainz
import (
"fmt"
"time"
"github.com/metadata-agregator/internal/domain"
)
func mapArtist(mb *mbArtist) *domain.Artist {
if mb == nil {
return nil
}
artist := &domain.Artist{
ID: mb.ID,
Name: mb.Name,
SortName: mb.SortName,
Type: mb.Type,
Country: mb.Country,
Description: mb.Disambiguation,
ExternalIDs: []domain.ExternalID{{
Source: "musicbrainz",
SourceID: mb.ID,
URL: fmt.Sprintf("https://musicbrainz.org/artist/%s", mb.ID),
}},
}
if mb.LifeSpan.Begin != "" {
if t := parseDate(mb.LifeSpan.Begin); t != nil {
artist.FormedDate = t
}
}
if mb.LifeSpan.End != "" {
if t := parseDate(mb.LifeSpan.End); t != nil {
artist.DisbandedDate = t
}
}
for _, g := range mb.Genres {
artist.Genres = append(artist.Genres, domain.Genre{
ID: g.ID,
Name: g.Name,
})
}
for _, rel := range mb.Relations {
if rel.Type == "image" && rel.URL != nil {
artist.ImageURL = rel.URL.Resource
break
}
}
return artist
}
func mapAlbum(mb *mbReleaseGroup, release *mbRelease) *domain.Album {
if mb == nil {
return nil
}
album := &domain.Album{
ID: mb.ID,
Title: mb.Title,
Type: mb.PrimaryType,
ExternalIDs: []domain.ExternalID{{
Source: "musicbrainz",
SourceID: mb.ID,
URL: fmt.Sprintf("https://musicbrainz.org/release-group/%s", mb.ID),
}},
}
if mb.FirstReleaseDate != "" {
album.ReleaseDate = parseDate(mb.FirstReleaseDate)
}
for _, ac := range mb.ArtistCredit {
album.Artists = append(album.Artists, mapArtistCredit(&ac, "primary"))
}
for _, g := range mb.Genres {
album.Genres = append(album.Genres, domain.Genre{
ID: g.ID,
Name: g.Name,
})
}
if release != nil {
album.UPC = release.Barcode
if len(release.LabelInfo) > 0 && release.LabelInfo[0].Label != nil {
album.Label = mapLabel(release.LabelInfo[0].Label)
}
for _, m := range release.Media {
album.TotalTracks += m.TrackCount
}
album.TotalDiscs = len(release.Media)
if release.CoverArtArchive.Front {
album.CoverURL = fmt.Sprintf("https://coverartarchive.org/release/%s/front", release.ID)
}
}
return album
}
func mapTrack(mb *mbRecording, discNum, trackNum int) *domain.Track {
if mb == nil {
return nil
}
track := &domain.Track{
ID: mb.ID,
Title: mb.Title,
DurationMs: mb.Length,
DiscNumber: discNum,
TrackNumber: trackNum,
ExternalIDs: []domain.ExternalID{{
Source: "musicbrainz",
SourceID: mb.ID,
URL: fmt.Sprintf("https://musicbrainz.org/recording/%s", mb.ID),
}},
}
if len(mb.ISRCs) > 0 {
track.ISRC = mb.ISRCs[0]
}
for _, ac := range mb.ArtistCredit {
track.Artists = append(track.Artists, mapArtistCredit(&ac, "primary"))
}
for _, rel := range mb.Relations {
if rel.TargetType == "work" && rel.Work != nil {
track.Work = mapWork(rel.Work)
break
}
}
return track
}
func mapWork(mb *mbWork) *domain.Work {
if mb == nil {
return nil
}
work := &domain.Work{
ID: mb.ID,
Title: mb.Title,
Type: mb.Type,
Language: mb.Language,
}
for _, rel := range mb.Relations {
if rel.TargetType == "artist" && rel.Artist != nil {
role := "writer"
if rel.Type == "composer" || rel.Type == "lyricist" || rel.Type == "writer" {
role = rel.Type
}
work.Composers = append(work.Composers, domain.ArtistCredit{
Artist: *mapArtist(rel.Artist),
Role: role,
})
}
}
return work
}
func mapLabel(mb *mbLabel) *domain.Label {
if mb == nil {
return nil
}
return &domain.Label{
ID: mb.ID,
Name: mb.Name,
Country: mb.Country,
}
}
func mapArtistCredit(ac *mbArtistCredit, defaultRole string) domain.ArtistCredit {
credit := domain.ArtistCredit{
Role: defaultRole,
JoinPhrase: ac.JoinPhrase,
}
if ac.Artist != nil {
credit.Artist = *mapArtist(ac.Artist)
}
return credit
}
func parseDate(s string) *time.Time {
formats := []string{
"2006-01-02",
"2006-01",
"2006",
}
for _, f := range formats {
if t, err := time.Parse(f, s); err == nil {
return &t
}
}
return nil
}
+282
View File
@@ -0,0 +1,282 @@
package musicbrainz
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/metadata-agregator/internal/domain"
)
type Provider struct {
client *client
}
func New() *Provider {
return &Provider{
client: newClient(),
}
}
func (p *Provider) Name() string {
return "musicbrainz"
}
func (p *Provider) GetArtist(ctx context.Context, id string) (*domain.Artist, error) {
data, err := p.client.lookup(ctx, "artist", id, []string{"genres", "url-rels"})
if err != nil {
return nil, fmt.Errorf("lookup artist: %w", err)
}
mb, err := decode[mbArtist](data)
if err != nil {
return nil, err
}
return mapArtist(mb), nil
}
func (p *Provider) SearchArtists(ctx context.Context, query string, limit, offset int) (*domain.SearchResult[domain.Artist], error) {
if limit <= 0 || limit > 100 {
limit = 25
}
escapedQuery := escapeQuery(query)
data, err := p.client.search(ctx, "artist", fmt.Sprintf("artist:%s", escapedQuery), limit, offset)
if err != nil {
return nil, fmt.Errorf("search artists: %w", err)
}
var resp struct {
Count int `json:"count"`
Offset int `json:"offset"`
Artists []*mbArtist `json:"artists"`
}
if err := decodeInto(data, &resp); err != nil {
return nil, err
}
result := &domain.SearchResult[domain.Artist]{
Total: resp.Count,
Limit: limit,
Offset: offset,
}
for _, mb := range resp.Artists {
if artist := mapArtist(mb); artist != nil {
result.Items = append(result.Items, *artist)
}
}
return result, nil
}
func (p *Provider) GetAlbum(ctx context.Context, id string) (*domain.Album, error) {
data, err := p.client.lookup(ctx, "release-group", id, []string{"releases", "artist-credits", "genres"})
if err != nil {
return nil, fmt.Errorf("lookup release-group: %w", err)
}
mb, err := decode[mbReleaseGroup](data)
if err != nil {
return nil, err
}
var release *mbRelease
if len(mb.Releases) > 0 {
release = selectCanonicalRelease(mb.Releases)
}
return mapAlbum(mb, release), nil
}
func (p *Provider) GetArtistAlbums(ctx context.Context, artistID string, limit, offset int) (*domain.SearchResult[domain.Album], error) {
if limit <= 0 || limit > 100 {
limit = 25
}
data, err := p.client.browse(ctx, "release-group", "artist", artistID, limit, offset, []string{"artist-credits"})
if err != nil {
return nil, fmt.Errorf("browse release-groups: %w", err)
}
var resp struct {
ReleaseGroupCount int `json:"release-group-count"`
ReleaseGroupOffset int `json:"release-group-offset"`
ReleaseGroups []*mbReleaseGroup `json:"release-groups"`
}
if err := decodeInto(data, &resp); err != nil {
return nil, err
}
result := &domain.SearchResult[domain.Album]{
Total: resp.ReleaseGroupCount,
Limit: limit,
Offset: resp.ReleaseGroupOffset,
}
for _, mb := range resp.ReleaseGroups {
if album := mapAlbum(mb, nil); album != nil {
result.Items = append(result.Items, *album)
}
}
return result, nil
}
func (p *Provider) GetTrack(ctx context.Context, id string) (*domain.Track, error) {
data, err := p.client.lookup(ctx, "recording", id, []string{"artist-credits", "isrcs", "work-rels"})
if err != nil {
return nil, fmt.Errorf("lookup recording: %w", err)
}
mb, err := decode[mbRecording](data)
if err != nil {
return nil, err
}
return mapTrack(mb, 0, 0), nil
}
func (p *Provider) GetAlbumTracks(ctx context.Context, albumID string) ([]domain.Track, error) {
data, err := p.client.browse(ctx, "release", "release-group", albumID, 100, 0, nil)
if err != nil {
return nil, fmt.Errorf("browse releases: %w", err)
}
var resp struct {
Releases []*mbRelease `json:"releases"`
}
if err := decodeInto(data, &resp); err != nil {
return nil, err
}
if len(resp.Releases) == 0 {
return nil, ErrNotFound
}
release := selectCanonicalRelease(resp.Releases)
releaseData, err := p.client.lookup(ctx, "release", release.ID, []string{"recordings", "artist-credits", "isrcs"})
if err != nil {
return nil, fmt.Errorf("lookup release: %w", err)
}
fullRelease, err := decode[mbRelease](releaseData)
if err != nil {
return nil, err
}
var tracks []domain.Track
for _, medium := range fullRelease.Media {
for _, t := range medium.Tracks {
if track := mapTrack(&t.Recording, medium.Position, t.Position); track != nil {
tracks = append(tracks, *track)
}
}
}
return tracks, nil
}
func (p *Provider) GetTrackByISRC(ctx context.Context, isrc string) (*domain.Track, error) {
data, err := p.client.get(ctx, fmt.Sprintf("isrc/%s", isrc), nil)
if err != nil {
return nil, fmt.Errorf("lookup isrc: %w", err)
}
var resp struct {
Recordings []*mbRecording `json:"recordings"`
}
if err := decodeInto(data, &resp); err != nil {
return nil, err
}
if len(resp.Recordings) == 0 {
return nil, ErrNotFound
}
return p.GetTrack(ctx, resp.Recordings[0].ID)
}
func (p *Provider) GetLabel(ctx context.Context, id string) (*domain.Label, error) {
data, err := p.client.lookup(ctx, "label", id, nil)
if err != nil {
return nil, fmt.Errorf("lookup label: %w", err)
}
mb, err := decode[mbLabel](data)
if err != nil {
return nil, err
}
return mapLabel(mb), nil
}
func (p *Provider) GetWork(ctx context.Context, id string) (*domain.Work, error) {
data, err := p.client.lookup(ctx, "work", id, []string{"artist-rels"})
if err != nil {
return nil, fmt.Errorf("lookup work: %w", err)
}
mb, err := decode[mbWork](data)
if err != nil {
return nil, err
}
return mapWork(mb), nil
}
func selectCanonicalRelease(releases []*mbRelease) *mbRelease {
if len(releases) == 0 {
return nil
}
var best *mbRelease
bestScore := -1
for _, r := range releases {
score := 0
switch r.Status {
case "Official":
score += 100
case "Promotion":
score += 50
}
if len(r.Media) > 0 {
switch r.Media[0].Format {
case "Digital Media":
score += 20
case "CD":
score += 15
}
}
if r.Barcode != "" {
score += 5
}
if score > bestScore {
bestScore = score
best = r
}
}
return best
}
func escapeQuery(s string) string {
special := []string{`+`, `-`, `&`, `|`, `!`, `(`, `)`, `{`, `}`, `[`, `]`, `^`, `"`, `~`, `*`, `?`, `:`, `/`, `\`}
result := s
for _, char := range special {
result = strings.ReplaceAll(result, char, `\`+char)
}
return `"` + result + `"`
}
func decodeInto(data []byte, v any) error {
return json.Unmarshal(data, v)
}
+138
View File
@@ -0,0 +1,138 @@
package musicbrainz
type mbArtist struct {
ID string `json:"id"`
Name string `json:"name"`
SortName string `json:"sort-name"`
Type string `json:"type"`
Country string `json:"country"`
Disambiguation string `json:"disambiguation"`
LifeSpan mbLifeSpan `json:"life-span"`
Genres []mbGenre `json:"genres"`
Relations []mbRelation `json:"relations"`
}
type mbLifeSpan struct {
Begin string `json:"begin"`
End string `json:"end"`
Ended bool `json:"ended"`
}
type mbReleaseGroup struct {
ID string `json:"id"`
Title string `json:"title"`
PrimaryType string `json:"primary-type"`
FirstReleaseDate string `json:"first-release-date"`
ArtistCredit []mbArtistCredit `json:"artist-credit"`
Genres []mbGenre `json:"genres"`
Releases []*mbRelease `json:"releases"`
}
type mbRelease struct {
ID string `json:"id"`
Title string `json:"title"`
Status string `json:"status"`
Date string `json:"date"`
Country string `json:"country"`
Barcode string `json:"barcode"`
LabelInfo []mbLabelInfo `json:"label-info"`
Media []mbMedium `json:"media"`
ReleaseGroup *mbReleaseGroup `json:"release-group"`
ArtistCredit []mbArtistCredit `json:"artist-credit"`
CoverArtArchive mbCoverArtArchive `json:"cover-art-archive"`
}
type mbCoverArtArchive struct {
Artwork bool `json:"artwork"`
Front bool `json:"front"`
Back bool `json:"back"`
}
type mbLabelInfo struct {
CatalogNumber string `json:"catalog-number"`
Label *mbLabel `json:"label"`
}
type mbLabel struct {
ID string `json:"id"`
Name string `json:"name"`
Country string `json:"country"`
}
type mbMedium struct {
Position int `json:"position"`
Format string `json:"format"`
TrackCount int `json:"track-count"`
Tracks []mbTrack `json:"tracks"`
}
type mbTrack struct {
ID string `json:"id"`
Number string `json:"number"`
Title string `json:"title"`
Length int `json:"length"`
Position int `json:"position"`
Recording mbRecording `json:"recording"`
}
type mbRecording struct {
ID string `json:"id"`
Title string `json:"title"`
Length int `json:"length"`
ISRCs []string `json:"isrcs"`
ArtistCredit []mbArtistCredit `json:"artist-credit"`
Relations []mbRelation `json:"relations"`
}
type mbWork struct {
ID string `json:"id"`
Title string `json:"title"`
Type string `json:"type"`
Language string `json:"language"`
ISWCs []string `json:"iswcs"`
Relations []mbRelation `json:"relations"`
}
type mbArtistCredit struct {
Name string `json:"name"`
JoinPhrase string `json:"joinphrase"`
Artist *mbArtist `json:"artist"`
}
type mbGenre struct {
ID string `json:"id"`
Name string `json:"name"`
Count int `json:"count"`
}
type mbRelation struct {
Type string `json:"type"`
TypeID string `json:"type-id"`
Direction string `json:"direction"`
TargetType string `json:"target-type"`
URL *mbURL `json:"url"`
Artist *mbArtist `json:"artist"`
Work *mbWork `json:"work"`
Attributes []string `json:"attributes"`
}
type mbURL struct {
ID string `json:"id"`
Resource string `json:"resource"`
}
type mbSearchResponse[T any] struct {
Created string `json:"created"`
Count int `json:"count"`
Offset int `json:"offset"`
Artists []T `json:"artists,omitempty"`
}
type mbBrowseResponse[T any] struct {
ReleaseGroupCount int `json:"release-group-count"`
ReleaseGroupOffset int `json:"release-group-offset"`
ReleaseGroups []T `json:"release-groups,omitempty"`
ReleaseCount int `json:"release-count"`
ReleaseOffset int `json:"release-offset"`
Releases []T `json:"releases,omitempty"`
}
+25
View File
@@ -0,0 +1,25 @@
package provider
import (
"context"
"github.com/metadata-agregator/internal/domain"
)
type Provider interface {
Name() string
GetArtist(ctx context.Context, id string) (*domain.Artist, error)
SearchArtists(ctx context.Context, query string, limit, offset int) (*domain.SearchResult[domain.Artist], error)
GetAlbum(ctx context.Context, id string) (*domain.Album, error)
GetArtistAlbums(ctx context.Context, artistID string, limit, offset int) (*domain.SearchResult[domain.Album], error)
GetTrack(ctx context.Context, id string) (*domain.Track, error)
GetAlbumTracks(ctx context.Context, albumID string) ([]domain.Track, error)
GetTrackByISRC(ctx context.Context, isrc string) (*domain.Track, error)
GetLabel(ctx context.Context, id string) (*domain.Label, error)
GetWork(ctx context.Context, id string) (*domain.Work, error)
}