Files
metadata-agregator/docs/research/bedrock-api/analysis/INTEGRATIONS.md
T
Alexander a1f6701bac feat: initial implementation of metadata aggregator
- gRPC service with MusicBrainz provider
- PostgreSQL schema with migrations
- Service layer with database-first caching
- Repository pattern for data access
- YAML configuration support
- Research documentation for 17 music metadata projects
2026-04-28 16:28:53 +02:00

38 KiB

Bedrock-API Platform Integrations

Integration Overview

Platform Status API Type Auth Method Streaming Special Features
Spotify Full Partner API OAuth 2.0 No Full discography, high-quality metadata
SoundCloud Full api-v2 Client ID Yes Progressive MP3, batch hydration, /resolve
Deezer Full Public API None No Concurrent fetching, no auth required
YouTube Music Full Innertube Cookies Yes 7-client fallback, itag priority, WEB_REMIX
Yandex Music Stub N/A N/A No Placeholder only
VK Music Stub N/A N/A No Placeholder only

Active Integrations: 4
Stub Integrations: 2

Spotify Integration

API Details

File: providers/spotify.go
Library: spotapi-go (submodule wrapping zmb3/spotify/v2)
API Type: Spotify Partner API (not Web API)
Authentication: OAuth 2.0 Client Credentials flow

Authentication

Environment Variables:

SPOTIFY_CLIENT_ID=your_client_id
SPOTIFY_CLIENT_SECRET=your_client_secret

OAuth Flow:

func NewSpotifyProvider() *SpotifyProvider {
    clientID := os.Getenv("SPOTIFY_CLIENT_ID")
    clientSecret := os.Getenv("SPOTIFY_CLIENT_SECRET")
    
    if clientID == "" || clientSecret == "" {
        log.Println("[spotify] Credentials not configured, provider disabled")
        return nil
    }
    
    auth := spotifyauth.New(
        spotifyauth.WithClientID(clientID),
        spotifyauth.WithClientSecret(clientSecret),
    )
    
    ctx := context.Background()
    token, err := auth.Token(ctx)
    if err != nil {
        log.Printf("[spotify] Auth failed: %v", err)
        return nil
    }
    
    client := spotify.New(auth.Client(ctx, token))
    
    return &SpotifyProvider{
        client: client,
        auth:   auth,
    }
}

Token Refresh: Handled automatically by spotapi-go wrapper

Search Implementation

Track Search:

func (p *SpotifyProvider) SearchTracks(ctx context.Context, query string, limit int32) ([]*pb.Track, error) {
    results, err := p.client.Search(ctx, query, spotify.SearchTypeTrack, spotify.Limit(int(limit)))
    if err != nil {
        return nil, fmt.Errorf("spotify search: %w", err)
    }
    
    tracks := make([]*pb.Track, 0, len(results.Tracks.Tracks))
    for _, t := range results.Tracks.Tracks {
        tracks = append(tracks, &pb.Track{
            Id:        fmt.Sprintf("spotify:track:%s", t.ID),
            Title:     t.Name,
            Artist:    t.Artists[0].Name,
            ArtistId:  fmt.Sprintf("spotify:artist:%s", t.Artists[0].ID),
            Album:     t.Album.Name,
            AlbumId:   fmt.Sprintf("spotify:album:%s", t.Album.ID),
            Duration:  int32(t.Duration / 1000), // ms to seconds
            CoverUrl:  getCoverURL(t.Album.Images),
            Year:      extractYear(t.Album.ReleaseDate),
            Explicit:  t.Explicit,
            Platform:  pb.Platform_SPOTIFY,
        })
    }
    
    return tracks, nil
}

Album Search: Similar pattern, uses spotify.SearchTypeAlbum
Artist Search: Similar pattern, uses spotify.SearchTypeArtist
Playlist Search: Similar pattern, uses spotify.SearchTypePlaylist

Metadata Retrieval

Get Track:

func (p *SpotifyProvider) GetTrack(ctx context.Context, id string) (*pb.Track, error) {
    track, err := p.client.GetTrack(ctx, spotify.ID(id))
    if err != nil {
        return nil, fmt.Errorf("get track: %w", err)
    }
    
    return &pb.Track{
        Id:        fmt.Sprintf("spotify:track:%s", track.ID),
        Title:     track.Name,
        Artist:    track.Artists[0].Name,
        ArtistId:  fmt.Sprintf("spotify:artist:%s", track.Artists[0].ID),
        Album:     track.Album.Name,
        AlbumId:   fmt.Sprintf("spotify:album:%s", track.Album.ID),
        Duration:  int32(track.Duration / 1000),
        CoverUrl:  getCoverURL(track.Album.Images),
        Year:      extractYear(track.Album.ReleaseDate),
        Explicit:  track.Explicit,
        Isrc:      track.ExternalIDs.ISRC,
        Platform:  pb.Platform_SPOTIFY,
    }, nil
}

Get Album (with tracks):

func (p *SpotifyProvider) GetAlbum(ctx context.Context, id string) (*pb.Album, error) {
    album, err := p.client.GetAlbum(ctx, spotify.ID(id))
    if err != nil {
        return nil, fmt.Errorf("get album: %w", err)
    }
    
    tracks := make([]*pb.Track, 0, len(album.Tracks.Tracks))
    for _, t := range album.Tracks.Tracks {
        tracks = append(tracks, &pb.Track{
            Id:       fmt.Sprintf("spotify:track:%s", t.ID),
            Title:    t.Name,
            Artist:   t.Artists[0].Name,
            Duration: int32(t.Duration / 1000),
            Platform: pb.Platform_SPOTIFY,
        })
    }
    
    return &pb.Album{
        Id:         fmt.Sprintf("spotify:album:%s", album.ID),
        Title:      album.Name,
        Artist:     album.Artists[0].Name,
        ArtistId:   fmt.Sprintf("spotify:artist:%s", album.Artists[0].ID),
        Year:       extractYear(album.ReleaseDate),
        CoverUrl:   getCoverURL(album.Images),
        TrackCount: int32(album.Tracks.Total),
        Tracks:     tracks,
        Genre:      getGenre(album.Genres),
        Label:      album.Label,
        Platform:   pb.Platform_SPOTIFY,
    }, nil
}

Get Artist (with discography):

func (p *SpotifyProvider) GetArtist(ctx context.Context, id string) (*pb.Artist, error) {
    artist, err := p.client.GetArtist(ctx, spotify.ID(id))
    if err != nil {
        return nil, fmt.Errorf("get artist: %w", err)
    }
    
    // Fetch artist albums
    albumsPage, err := p.client.GetArtistAlbums(ctx, spotify.ID(id), spotify.Limit(50))
    if err != nil {
        return nil, fmt.Errorf("get artist albums: %w", err)
    }
    
    albums := make([]*pb.Album, 0, len(albumsPage.Albums))
    for _, a := range albumsPage.Albums {
        albums = append(albums, &pb.Album{
            Id:       fmt.Sprintf("spotify:album:%s", a.ID),
            Title:    a.Name,
            Year:     extractYear(a.ReleaseDate),
            CoverUrl: getCoverURL(a.Images),
            Platform: pb.Platform_SPOTIFY,
        })
    }
    
    return &pb.Artist{
        Id:        fmt.Sprintf("spotify:artist:%s", artist.ID),
        Name:      artist.Name,
        ImageUrl:  getCoverURL(artist.Images),
        Genres:    artist.Genres,
        Followers: int64(artist.Followers.Total),
        Albums:    albums,
        Platform:  pb.Platform_SPOTIFY,
    }, nil
}

Streaming

No Direct Streaming:

func (p *SpotifyProvider) GetStreamURL(ctx context.Context, id string) (string, error) {
    return "", errors.New("spotify does not provide streaming URLs via partner API")
}

Bridge Resolution: Handled by resolver.go (searches SoundCloud/YouTube Music for matching track)

ID Namespacing

Format: spotify:{type}:{native_id}

Examples:

  • Track: spotify:track:3n3Ppam7vgaVa1iaRUc9Lp
  • Album: spotify:album:6DEjYFkNZh67HP7R9PSZvv
  • Artist: spotify:artist:0TnOYISbd1XYRBk9myaseg
  • Playlist: spotify:playlist:37i9dQZF1DXcBWIGoYBM5M

Rate Limiting

Spotify Limits: 180 requests per minute (partner API)

No Client-Side Limiting: Relies on Spotify API returning 429 errors

Error Handling:

if err != nil {
    if strings.Contains(err.Error(), "429") {
        return nil, errors.New("spotify rate limit exceeded")
    }
    return nil, err
}

Unique Features

  • ISRC Support: Returns International Standard Recording Code for tracks
  • Full Discography: Artist endpoint returns all albums
  • High-Quality Metadata: Rich metadata (genres, followers, release dates)
  • Explicit Content Flags: Tracks marked as explicit

SoundCloud Integration

API Details

File: providers/soundcloud.go
Library: Custom HTTP client (no official SDK)
API Type: SoundCloud api-v2 (public, undocumented)
Authentication: Client ID (no OAuth required)

Client ID Rotation

Environment Variable:

SOUNDCLOUD_CLIENT_IDS=id1,id2,id3,id4

Rotation Logic:

type SoundCloudProvider struct {
    clientIDs   []string
    currentID   int
    mu          sync.Mutex
    httpClient  *http.Client
}

func NewSoundCloudProvider() *SoundCloudProvider {
    clientIDsStr := os.Getenv("SOUNDCLOUD_CLIENT_IDS")
    if clientIDsStr == "" {
        log.Println("[soundcloud] Client IDs not configured, provider disabled")
        return nil
    }
    
    clientIDs := strings.Split(clientIDsStr, ",")
    
    return &SoundCloudProvider{
        clientIDs:  clientIDs,
        currentID:  0,
        httpClient: &http.Client{Timeout: 10 * time.Second},
    }
}

func (p *SoundCloudProvider) getClientID() string {
    p.mu.Lock()
    defer p.mu.Unlock()
    
    id := p.clientIDs[p.currentID]
    p.currentID = (p.currentID + 1) % len(p.clientIDs)
    
    return id
}

Purpose: Avoid rate limiting by rotating through multiple client IDs

Search Implementation

Track Search:

func (p *SoundCloudProvider) SearchTracks(ctx context.Context, query string, limit int32) ([]*pb.Track, error) {
    url := fmt.Sprintf("https://api-v2.soundcloud.com/search/tracks?q=%s&limit=%d&client_id=%s",
        url.QueryEscape(query),
        limit,
        p.getClientID(),
    )
    
    resp, err := p.httpClient.Get(url)
    if err != nil {
        return nil, fmt.Errorf("soundcloud search: %w", err)
    }
    defer resp.Body.Close()
    
    var result struct {
        Collection []struct {
            ID          int64  `json:"id"`
            Title       string `json:"title"`
            User        struct {
                Username string `json:"username"`
            } `json:"user"`
            ArtworkURL  string `json:"artwork_url"`
            Duration    int32  `json:"duration"` // milliseconds
            Genre       string `json:"genre"`
            PlayCount   int64  `json:"playback_count"`
        } `json:"collection"`
    }
    
    if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
        return nil, fmt.Errorf("decode response: %w", err)
    }
    
    tracks := make([]*pb.Track, 0, len(result.Collection))
    for _, t := range result.Collection {
        tracks = append(tracks, &pb.Track{
            Id:        fmt.Sprintf("soundcloud:track:%d", t.ID),
            Title:     t.Title,
            Artist:    t.User.Username,
            Duration:  t.Duration / 1000, // ms to seconds
            CoverUrl:  t.ArtworkURL,
            Genre:     t.Genre,
            PlayCount: t.PlayCount,
            Platform:  pb.Platform_SOUNDCLOUD,
        })
    }
    
    return tracks, nil
}

Batch Hydration

Purpose: Fetch full track details for multiple IDs in single request

Implementation:

func (p *SoundCloudProvider) hydrateTracks(ctx context.Context, ids []string) ([]*pb.Track, error) {
    // SoundCloud allows up to 30 IDs per request
    const batchSize = 30
    
    var allTracks []*pb.Track
    
    for i := 0; i < len(ids); i += batchSize {
        end := i + batchSize
        if end > len(ids) {
            end = len(ids)
        }
        
        batch := ids[i:end]
        url := fmt.Sprintf("https://api-v2.soundcloud.com/tracks?ids=%s&client_id=%s",
            strings.Join(batch, ","),
            p.getClientID(),
        )
        
        resp, err := p.httpClient.Get(url)
        if err != nil {
            return nil, fmt.Errorf("hydrate batch: %w", err)
        }
        defer resp.Body.Close()
        
        var tracks []struct {
            ID       int64  `json:"id"`
            Title    string `json:"title"`
            Duration int32  `json:"duration"`
            // ... other fields
        }
        
        if err := json.NewDecoder(resp.Body).Decode(&tracks); err != nil {
            return nil, fmt.Errorf("decode batch: %w", err)
        }
        
        for _, t := range tracks {
            allTracks = append(allTracks, &pb.Track{
                Id:       fmt.Sprintf("soundcloud:track:%d", t.ID),
                Title:    t.Title,
                Duration: t.Duration / 1000,
                Platform: pb.Platform_SOUNDCLOUD,
            })
        }
    }
    
    return allTracks, nil
}

Use Case: Playlist retrieval (fetch details for all track IDs in playlist)

Stream URL Resolution

Progressive MP3 Selection:

func (p *SoundCloudProvider) GetStreamURL(ctx context.Context, id string) (string, error) {
    // Get track info
    trackURL := fmt.Sprintf("https://api-v2.soundcloud.com/tracks/%s?client_id=%s",
        id,
        p.getClientID(),
    )
    
    resp, err := p.httpClient.Get(trackURL)
    if err != nil {
        return "", fmt.Errorf("get track: %w", err)
    }
    defer resp.Body.Close()
    
    var track struct {
        Media struct {
            Transcodings []struct {
                URL    string `json:"url"`
                Format struct {
                    Protocol string `json:"protocol"`
                    MimeType string `json:"mime_type"`
                } `json:"format"`
            } `json:"transcodings"`
        } `json:"media"`
    }
    
    if err := json.NewDecoder(resp.Body).Decode(&track); err != nil {
        return "", fmt.Errorf("decode track: %w", err)
    }
    
    // Select progressive MP3 transcoding
    for _, t := range track.Media.Transcodings {
        if t.Format.Protocol == "progressive" && strings.Contains(t.Format.MimeType, "mp3") {
            // Fetch actual stream URL from transcoding URL
            streamResp, err := p.httpClient.Get(fmt.Sprintf("%s?client_id=%s", t.URL, p.getClientID()))
            if err != nil {
                continue
            }
            defer streamResp.Body.Close()
            
            var streamData struct {
                URL string `json:"url"`
            }
            
            if err := json.NewDecoder(streamResp.Body).Decode(&streamData); err != nil {
                continue
            }
            
            return streamData.URL, nil
        }
    }
    
    return "", errors.New("no progressive stream found")
}

Stream Types:

  • Progressive: Direct HTTP download (preferred)
  • HLS: HTTP Live Streaming (not used)

Bitrate: Typically 128 kbps MP3

URL Resolution

Purpose: Convert SoundCloud URLs to track IDs

Implementation:

func (p *SoundCloudProvider) ResolveURL(ctx context.Context, trackURL string) (string, error) {
    resolveURL := fmt.Sprintf("https://api-v2.soundcloud.com/resolve?url=%s&client_id=%s",
        url.QueryEscape(trackURL),
        p.getClientID(),
    )
    
    resp, err := p.httpClient.Get(resolveURL)
    if err != nil {
        return "", fmt.Errorf("resolve url: %w", err)
    }
    defer resp.Body.Close()
    
    var result struct {
        ID int64 `json:"id"`
    }
    
    if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
        return "", fmt.Errorf("decode response: %w", err)
    }
    
    return fmt.Sprintf("%d", result.ID), nil
}

Example:

Input:  https://soundcloud.com/artist/track-name
Output: 1234567890

Rate Limiting

SoundCloud Limits: Undocumented (estimated 1000 requests/hour per client ID)

Mitigation: Client ID rotation (4 IDs = 4000 requests/hour)

Error Handling:

if resp.StatusCode == 429 {
    log.Printf("[soundcloud] Rate limit hit, rotating client ID")
    return p.SearchTracks(ctx, query, limit) // Retry with next client ID
}

Unique Features

  • Client ID Rotation: Automatic rotation to avoid rate limits
  • Batch Hydration: Fetch 30 tracks in single request
  • URL Resolution: Convert web URLs to track IDs
  • Progressive Streaming: Direct MP3 download (no HLS complexity)

Deezer Integration

API Details

File: providers/deezer.go
Library: Custom HTTP client (no official Go SDK)
API Type: Deezer Public API
Authentication: None required

Search Implementation

Track Search:

func (p *DeezerProvider) SearchTracks(ctx context.Context, query string, limit int32) ([]*pb.Track, error) {
    url := fmt.Sprintf("https://api.deezer.com/search/track?q=%s&limit=%d",
        url.QueryEscape(query),
        limit,
    )
    
    resp, err := http.Get(url)
    if err != nil {
        return nil, fmt.Errorf("deezer search: %w", err)
    }
    defer resp.Body.Close()
    
    var result struct {
        Data []struct {
            ID       int64  `json:"id"`
            Title    string `json:"title"`
            Artist   struct {
                ID   int64  `json:"id"`
                Name string `json:"name"`
            } `json:"artist"`
            Album struct {
                ID    int64  `json:"id"`
                Title string `json:"title"`
                Cover string `json:"cover_medium"`
            } `json:"album"`
            Duration int32 `json:"duration"` // seconds (not milliseconds)
        } `json:"data"`
    }
    
    if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
        return nil, fmt.Errorf("decode response: %w", err)
    }
    
    tracks := make([]*pb.Track, 0, len(result.Data))
    for _, t := range result.Data {
        tracks = append(tracks, &pb.Track{
            Id:       fmt.Sprintf("deezer:track:%d", t.ID),
            Title:    t.Title,
            Artist:   t.Artist.Name,
            ArtistId: fmt.Sprintf("deezer:artist:%d", t.Artist.ID),
            Album:    t.Album.Title,
            AlbumId:  fmt.Sprintf("deezer:album:%d", t.Album.ID),
            Duration: t.Duration, // Already in seconds
            CoverUrl: t.Album.Cover,
            Platform: pb.Platform_DEEZER,
        })
    }
    
    return tracks, nil
}

Concurrent Artist Data Fetching

Get Artist (parallel goroutines):

func (p *DeezerProvider) GetArtist(ctx context.Context, id string) (*pb.Artist, error) {
    var (
        wg        sync.WaitGroup
        mu        sync.Mutex
        artist    *pb.Artist
        albums    []*pb.Album
        topTracks []*pb.Track
        errors    []error
    )
    
    wg.Add(3)
    
    // Fetch artist info
    go func() {
        defer wg.Done()
        
        url := fmt.Sprintf("https://api.deezer.com/artist/%s", id)
        resp, err := http.Get(url)
        if err != nil {
            mu.Lock()
            errors = append(errors, err)
            mu.Unlock()
            return
        }
        defer resp.Body.Close()
        
        var data struct {
            ID        int64    `json:"id"`
            Name      string   `json:"name"`
            Picture   string   `json:"picture_medium"`
            NbFan     int64    `json:"nb_fan"`
        }
        
        json.NewDecoder(resp.Body).Decode(&data)
        
        mu.Lock()
        artist = &pb.Artist{
            Id:        fmt.Sprintf("deezer:artist:%d", data.ID),
            Name:      data.Name,
            ImageUrl:  data.Picture,
            Followers: data.NbFan,
            Platform:  pb.Platform_DEEZER,
        }
        mu.Unlock()
    }()
    
    // Fetch artist albums
    go func() {
        defer wg.Done()
        
        url := fmt.Sprintf("https://api.deezer.com/artist/%s/albums", id)
        resp, err := http.Get(url)
        if err != nil {
            mu.Lock()
            errors = append(errors, err)
            mu.Unlock()
            return
        }
        defer resp.Body.Close()
        
        var data struct {
            Data []struct {
                ID      int64  `json:"id"`
                Title   string `json:"title"`
                Cover   string `json:"cover_medium"`
                ReleaseDate string `json:"release_date"`
            } `json:"data"`
        }
        
        json.NewDecoder(resp.Body).Decode(&data)
        
        mu.Lock()
        for _, a := range data.Data {
            albums = append(albums, &pb.Album{
                Id:       fmt.Sprintf("deezer:album:%d", a.ID),
                Title:    a.Title,
                CoverUrl: a.Cover,
                Year:     extractYear(a.ReleaseDate),
                Platform: pb.Platform_DEEZER,
            })
        }
        mu.Unlock()
    }()
    
    // Fetch artist top tracks
    go func() {
        defer wg.Done()
        
        url := fmt.Sprintf("https://api.deezer.com/artist/%s/top?limit=10", id)
        resp, err := http.Get(url)
        if err != nil {
            mu.Lock()
            errors = append(errors, err)
            mu.Unlock()
            return
        }
        defer resp.Body.Close()
        
        var data struct {
            Data []struct {
                ID    int64  `json:"id"`
                Title string `json:"title"`
            } `json:"data"`
        }
        
        json.NewDecoder(resp.Body).Decode(&data)
        
        mu.Lock()
        for _, t := range data.Data {
            topTracks = append(topTracks, &pb.Track{
                Id:       fmt.Sprintf("deezer:track:%d", t.ID),
                Title:    t.Title,
                Platform: pb.Platform_DEEZER,
            })
        }
        mu.Unlock()
    }()
    
    wg.Wait()
    
    if len(errors) > 0 {
        return nil, errors[0]
    }
    
    artist.Albums = albums
    // topTracks not included in response (no field in Artist proto)
    
    return artist, nil
}

Performance: 3 API calls in parallel instead of sequential (3x faster)

Streaming

No Public Streaming:

func (p *DeezerProvider) GetStreamURL(ctx context.Context, id string) (string, error) {
    return "", errors.New("deezer public API does not provide streaming URLs")
}

Note: Deezer has streaming API, but requires paid partnership (not public)

Duration Handling

Deezer Returns Seconds (not milliseconds like Spotify):

track := &pb.Track{
    Duration: deezerTrack.Duration, // Already in seconds, no conversion needed
}

Rate Limiting

Deezer Limits: 50 requests per 5 seconds (public API)

No Client-Side Limiting: Relies on Deezer API returning 403 errors

Error Handling:

if resp.StatusCode == 403 {
    return nil, errors.New("deezer rate limit exceeded")
}

Unique Features

  • No Authentication: Public API, no credentials required
  • Concurrent Fetching: Artist data fetched in parallel
  • Fan Count: Returns follower count (nb_fan field)
  • Simple Integration: No OAuth, no client IDs, just HTTP GET

YouTube Music Integration

API Details

File: providers/youtube.go
Library: github.com/kkdai/youtube/v2
API Type: YouTube Innertube API (internal, undocumented)
Authentication: Cookies (optional, for age-restricted content)

7-Client Fallback Pool

Client Configurations:

var youtubeClients = []struct {
    name   string
    config youtube.ClientConfig
}{
    {
        name: "TVHTML5_SIMPLY_EMBEDDED",
        config: youtube.ClientConfig{
            ClientName:    "TVHTML5_SIMPLY_EMBEDDED_PLAYER",
            ClientVersion: "2.0",
        },
    },
    {
        name: "TVHTML5",
        config: youtube.ClientConfig{
            ClientName:    "TVHTML5",
            ClientVersion: "7.20230622",
        },
    },
    {
        name: "ANDROID_VR_1",
        config: youtube.ClientConfig{
            ClientName:    "ANDROID_VR",
            ClientVersion: "1.37.35",
            AndroidSDKVersion: 30,
        },
    },
    {
        name: "ANDROID_VR_2",
        config: youtube.ClientConfig{
            ClientName:    "ANDROID_VR",
            ClientVersion: "1.38.50",
            AndroidSDKVersion: 31,
        },
    },
    {
        name: "ANDROID",
        config: youtube.ClientConfig{
            ClientName:    "ANDROID",
            ClientVersion: "18.20.39",
            AndroidSDKVersion: 33,
        },
    },
    {
        name: "IOS",
        config: youtube.ClientConfig{
            ClientName:    "IOS",
            ClientVersion: "18.20.3",
            DeviceModel:   "iPhone14,5",
        },
    },
    {
        name: "WEB",
        config: youtube.ClientConfig{
            ClientName:    "WEB",
            ClientVersion: "2.20230622.01.00",
        },
    },
}

Fallback Logic:

func (p *YouTubeProvider) GetStreamURL(ctx context.Context, id string) (string, error) {
    for _, clientConfig := range youtubeClients {
        client := youtube.Client{Config: clientConfig.config}
        
        if p.cookies != "" {
            client.HTTPClient = &http.Client{
                Transport: &cookieTransport{cookies: p.cookies},
            }
        }
        
        video, err := client.GetVideoContext(ctx, id)
        if err != nil {
            log.Printf("[youtube] Client %s failed: %v", clientConfig.name, err)
            continue
        }
        
        // Check for cipher (encrypted stream)
        if len(video.Formats) > 0 && video.Formats[0].Cipher != "" {
            log.Printf("[youtube] Client %s returned ciphered stream, skipping", clientConfig.name)
            continue
        }
        
        // Select best format
        streamURL := p.selectBestFormat(video.Formats)
        if streamURL != "" {
            log.Printf("[youtube] Client %s succeeded", clientConfig.name)
            return streamURL, nil
        }
    }
    
    // All clients failed, fallback to SoundCloud
    log.Println("[youtube] All clients failed, falling back to SoundCloud")
    return p.fallbackToSoundCloud(ctx, id)
}

Why 7 Clients: Different clients have different capabilities and restrictions. Some work for age-restricted content, some avoid ciphered streams, some have better format availability.

Itag Priority (Audio Quality)

Format Selection:

func (p *YouTubeProvider) selectBestFormat(formats youtube.FormatList) string {
    // Priority: 251 (opus, ~160kbps) > 140 (aac, ~128kbps)
    itagPriority := []int{251, 140}
    
    for _, itag := range itagPriority {
        for _, format := range formats {
            if format.ItagNo == itag {
                return format.URL
            }
        }
    }
    
    // Fallback: first audio-only format
    for _, format := range formats {
        if strings.Contains(format.MimeType, "audio") && !strings.Contains(format.MimeType, "video") {
            return format.URL
        }
    }
    
    return ""
}

Itag Reference:

  • 251: Opus audio, ~160 kbps (best quality)
  • 140: AAC audio, ~128 kbps (good quality, better compatibility)

Metadata Client (WEB_REMIX)

Search Implementation:

func (p *YouTubeProvider) SearchTracks(ctx context.Context, query string, limit int32) ([]*pb.Track, error) {
    // Use WEB_REMIX client (YouTube Music, not regular YouTube)
    client := youtube.Client{
        Config: youtube.ClientConfig{
            ClientName:    "WEB_REMIX",
            ClientVersion: "1.20231122.01.00",
        },
    }
    
    // YouTube Music search endpoint
    searchURL := "https://music.youtube.com/youtubei/v1/search"
    
    payload := map[string]interface{}{
        "context": map[string]interface{}{
            "client": map[string]interface{}{
                "clientName":    "WEB_REMIX",
                "clientVersion": "1.20231122.01.00",
            },
        },
        "query": query,
    }
    
    // Make request, parse music-specific results
    // ...
}

WEB_REMIX vs WEB: WEB_REMIX returns YouTube Music results (songs, albums, artists), WEB returns regular YouTube videos

Environment Variable:

YOUTUBE_COOKIES=cookie-string

Cookie Injection:

type cookieTransport struct {
    cookies string
    base    http.RoundTripper
}

func (t *cookieTransport) RoundTrip(req *http.Request) (*http.Response, error) {
    req.Header.Set("Cookie", t.cookies)
    
    base := t.base
    if base == nil {
        base = http.DefaultTransport
    }
    
    return base.RoundTrip(req)
}

func NewYouTubeProvider() *YouTubeProvider {
    cookies := os.Getenv("YOUTUBE_COOKIES")
    
    return &YouTubeProvider{
        cookies: cookies,
    }
}

Use Case: Access age-restricted music videos (requires logged-in YouTube account cookies)

Cipher Handling

Problem: Some YouTube streams are encrypted (ciphered) and require JavaScript decryption

Solution: Skip ciphered streams, try next client

if len(video.Formats) > 0 && video.Formats[0].Cipher != "" {
    log.Printf("[youtube] Client %s returned ciphered stream, skipping", clientConfig.name)
    continue // Try next client
}

Fallback: If all clients return ciphered streams, fall back to SoundCloud

SoundCloud Fallback

Implementation:

func (p *YouTubeProvider) fallbackToSoundCloud(ctx context.Context, videoID string) (string, error) {
    // Get video metadata
    video, err := p.getVideoMetadata(ctx, videoID)
    if err != nil {
        return "", err
    }
    
    // Search SoundCloud for "{artist} - {title}"
    query := fmt.Sprintf("%s - %s", video.Artist, video.Title)
    
    soundcloudProvider := NewSoundCloudProvider()
    tracks, err := soundcloudProvider.SearchTracks(ctx, query, 1)
    if err != nil || len(tracks) == 0 {
        return "", errors.New("soundcloud fallback failed")
    }
    
    // Get stream URL from first SoundCloud result
    return soundcloudProvider.GetStreamURL(ctx, tracks[0].Id)
}

Use Case: When all YouTube clients fail (ciphered streams, geo-restrictions, etc.)

Rate Limiting

YouTube Limits: Undocumented (estimated 10,000 requests/day for Innertube API)

No Client-Side Limiting: Relies on YouTube API returning 429 errors

Error Handling:

if err != nil && strings.Contains(err.Error(), "429") {
    return nil, errors.New("youtube rate limit exceeded")
}

Unique Features

  • 7-Client Fallback: Maximizes stream availability
  • Itag Priority: Selects best audio quality
  • WEB_REMIX Metadata: YouTube Music-specific search results
  • Cookie Support: Access age-restricted content
  • Cipher Avoidance: Skips encrypted streams
  • SoundCloud Fallback: Ultimate fallback when YouTube fails

Lyrics Integrations

LrcLib (Synced Lyrics)

File: bedrock_server/lrclib.go
API: https://lrclib.net/api/get
Authentication: None required
Format: LRC (timestamped lyrics)

Implementation:

func (s *server) GetSyncedLyrics(ctx context.Context, req *pb.LyricsRequest) (*pb.SyncedLyricsResponse, error) {
    client := &http.Client{Timeout: 5 * time.Second}
    
    url := fmt.Sprintf("https://lrclib.net/api/get?artist_name=%s&track_name=%s&album_name=%s&duration=%d",
        url.QueryEscape(req.Artist),
        url.QueryEscape(req.Title),
        url.QueryEscape(req.Album),
        req.Duration,
    )
    
    resp, err := client.Get(url)
    if err != nil {
        return nil, fmt.Errorf("lrclib request: %w", err)
    }
    defer resp.Body.Close()
    
    if resp.StatusCode == 404 {
        return nil, errors.New("lyrics not found")
    }
    
    var result struct {
        SyncedLyrics string `json:"syncedLyrics"`
    }
    
    if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
        return nil, fmt.Errorf("decode response: %w", err)
    }
    
    // Parse LRC format
    lines := parseLRC(result.SyncedLyrics)
    
    return &pb.SyncedLyricsResponse{
        Lines:  lines,
        Source: "lrclib",
    }, nil
}

func parseLRC(lrc string) []*pb.LyricLine {
    var lines []*pb.LyricLine
    
    for _, line := range strings.Split(lrc, "\n") {
        // Parse [mm:ss.xx] timestamp
        if !strings.HasPrefix(line, "[") {
            continue
        }
        
        parts := strings.SplitN(line, "]", 2)
        if len(parts) != 2 {
            continue
        }
        
        timestamp := parseTimestamp(parts[0][1:]) // Remove leading [
        text := parts[1]
        
        lines = append(lines, &pb.LyricLine{
            Timestamp: timestamp,
            Text:      text,
        })
    }
    
    return lines
}

func parseTimestamp(ts string) int32 {
    // Parse "mm:ss.xx" format
    parts := strings.Split(ts, ":")
    if len(parts) != 2 {
        return 0
    }
    
    minutes, _ := strconv.Atoi(parts[0])
    secondsParts := strings.Split(parts[1], ".")
    seconds, _ := strconv.Atoi(secondsParts[0])
    centiseconds := 0
    if len(secondsParts) > 1 {
        centiseconds, _ = strconv.Atoi(secondsParts[1])
    }
    
    return int32(minutes*60*1000 + seconds*1000 + centiseconds*10)
}

Matching: Artist + title + album + duration (all parameters improve match accuracy)

Timeout: 5 seconds (fast API)

Genius (Plain Lyrics)

File: bedrock_server/genius.go
Library: github.com/rhnvrm/lyric-api-go
Authentication: GENIUS_ACCESS_TOKEN environment variable
Format: Plain text + annotations

Implementation:

func (s *server) GetLyrics(ctx context.Context, req *pb.LyricsRequest) (*pb.LyricsResponse, error) {
    accessToken := os.Getenv("GENIUS_ACCESS_TOKEN")
    if accessToken == "" {
        return nil, errors.New("GENIUS_ACCESS_TOKEN not configured")
    }
    
    geniusClient := genius.NewClient(accessToken)
    
    // Search for song
    query := fmt.Sprintf("%s %s", req.Artist, req.Title)
    searchResults, err := geniusClient.Search(query)
    if err != nil {
        return nil, fmt.Errorf("genius search: %w", err)
    }
    
    if len(searchResults.Hits) == 0 {
        return nil, errors.New("lyrics not found")
    }
    
    songID := searchResults.Hits[0].Result.ID
    
    // Fetch lyrics
    lyrics, err := geniusClient.GetLyrics(songID)
    if err != nil {
        return nil, fmt.Errorf("get lyrics: %w", err)
    }
    
    // Fetch annotations
    annotations, err := geniusClient.GetAnnotations(songID)
    if err != nil {
        log.Printf("[genius] Failed to fetch annotations: %v", err)
        annotations = nil // Continue without annotations
    }
    
    pbAnnotations := make([]*pb.Annotation, 0, len(annotations))
    for _, a := range annotations {
        pbAnnotations = append(pbAnnotations, &pb.Annotation{
            Fragment:   a.Fragment,
            Annotation: a.Annotation,
        })
    }
    
    return &pb.LyricsResponse{
        Lyrics:      lyrics,
        Source:      "genius",
        Annotations: pbAnnotations,
    }, nil
}

Annotations: Explanations of lyric meanings (unique to Genius)

No Timeout: Uses library default (30 seconds)

Stub Integrations

Yandex Music

File: providers/yandex.go

Implementation:

type YandexProvider struct{}

func (p *YandexProvider) Name() string {
    return "yandex"
}

func (p *YandexProvider) SearchTracks(ctx context.Context, query string, limit int32) ([]*pb.Track, error) {
    return nil, errors.New("yandex provider not implemented")
}

// All other methods return errors

Status: Placeholder only, no actual implementation

Reason: Yandex Music API requires partnership agreement (not publicly available)

VK Music

File: providers/vk.go

Implementation: Same as Yandex (stub only)

Status: Placeholder only, no actual implementation

Reason: VK Music API requires VK developer account and OAuth (complex setup)

Integration Comparison

Feature Spotify SoundCloud Deezer YouTube Music
Authentication OAuth 2.0 Client ID None Cookies (optional)
Streaming No Yes (MP3) No Yes (Opus/AAC)
Search Quality Excellent Good Good Excellent
Metadata Richness High Medium Medium High
Rate Limits 180/min ~1000/hr 50/5s ~10k/day
Reliability High Medium High Medium
Unique Features ISRC, discography Batch hydration No auth 7-client fallback
Complexity Medium Low Low High

Error Handling Patterns

Provider-Level Errors

Pattern: Log and continue (don't fail entire request)

tracks, err := provider.SearchTracks(ctx, query, limit)
if err != nil {
    log.Printf("[%s] Search failed: %v", provider.Name(), err)
    errors = append(errors, &pb.ProviderError{
        Provider: provider.Name(),
        Message:  err.Error(),
    })
    continue // Don't return, try other providers
}

Partial Response Handling

Pattern: Return successful results even if some providers fail

if len(errors) > 0 {
    if len(allTracks) == 0 {
        status = pb.ResponseStatus_ERROR
    } else {
        status = pb.ResponseStatus_PARTIAL
    }
}

return &pb.SearchTracksResponse{
    Tracks: allTracks,
    Status: status,
    Errors: errors,
}

Retry Logic

No Automatic Retries: Failed requests are not retried

Client Responsibility: Clients must implement retry logic if needed

Performance Optimization

Parallel Queries

All Providers Queried Simultaneously:

var wg sync.WaitGroup

for _, provider := range providers {
    wg.Add(1)
    go func(p trackProvider) {
        defer wg.Done()
        results, err := p.SearchTracks(ctx, query, limit)
        // Aggregate results
    }(provider)
}

wg.Wait()

Response Time: Limited by slowest provider (not sum of all providers)

Connection Pooling

HTTP Client Reuse: Each provider maintains persistent HTTP client

type SoundCloudProvider struct {
    httpClient *http.Client
}

func NewSoundCloudProvider() *SoundCloudProvider {
    return &SoundCloudProvider{
        httpClient: &http.Client{
            Timeout: 10 * time.Second,
            Transport: &http.Transport{
                MaxIdleConns:        100,
                MaxIdleConnsPerHost: 10,
                IdleConnTimeout:     90 * time.Second,
            },
        },
    }
}

Benefit: Avoid TCP handshake overhead on every request

Integration Recommendations for Metadata Aggregator

Adopt

  • Provider Interface Pattern: Clean abstraction for platform-specific logic
  • Parallel Queries: Fan-out concurrency for fast responses
  • Partial Response Handling: Resilient to individual provider failures
  • ID Namespacing: Prevents collisions, enables explicit routing

Avoid

  • No Caching: Implement Redis caching for metadata
  • No Rate Limiting: Add client-side rate limiting per provider
  • Manual HTTP Clients: Consider using official SDKs where available

Enhance

  • Add More Providers: Discogs, MusicBrainz, Last.fm, etc.
  • Implement Caching: Cache metadata, search results, stream URLs
  • Add Circuit Breakers: Temporarily disable failing providers
  • Add Metrics: Track provider success rates, latencies, errors
  • Add Retry Logic: Exponential backoff for transient failures