Files
metadata-agregator/docs/research/meelo/analysis/CODEBASE.md
T
Alexander a1f6701bac feat: initial implementation of metadata aggregator
- gRPC service with MusicBrainz provider
- PostgreSQL schema with migrations
- Service layer with database-first caching
- Repository pattern for data access
- YAML configuration support
- Research documentation for 17 music metadata projects
2026-04-28 16:28:53 +02:00

23 KiB

Meelo Codebase

Repository Structure

Meelo/
├── server/           # NestJS backend
│   ├── src/
│   │   ├── artist/
│   │   ├── album/
│   │   ├── song/
│   │   ├── track/
│   │   ├── auth/
│   │   ├── search/
│   │   └── ...
│   ├── prisma/
│   │   ├── schema.prisma
│   │   └── migrations/
│   ├── test/
│   └── package.json
├── scanner/          # Go file scanner
│   ├── cmd/
│   ├── internal/
│   │   ├── scanner/
│   │   ├── fingerprint/
│   │   └── parser/
│   ├── go.mod
│   └── main.go
├── matcher/          # Python metadata matcher
│   ├── providers/
│   │   ├── musicbrainz.py
│   │   ├── genius.py
│   │   ├── wikipedia.py
│   │   └── ...
│   ├── main.py
│   ├── requirements.txt
│   └── tests/
├── front/            # Next.js frontend
│   ├── web/
│   │   ├── pages/
│   │   ├── components/
│   │   └── package.json
│   ├── mobile/
│   │   ├── App.tsx
│   │   └── package.json
│   └── shared/
│       ├── components/
│       ├── hooks/
│       └── state/
├── docker-compose.yml
├── docker-compose.dev.yml
├── docker-compose.local.yml
├── .env.example
├── biome.json
└── README.md

Server (NestJS)

Module Organization

NestJS organizes code into modules. Each module encapsulates related functionality.

Core Modules:

  • ArtistModule: Artist CRUD, relationships
  • AlbumModule: Album CRUD, releases
  • SongModule: Song CRUD, lyrics
  • TrackModule: Track CRUD, streaming
  • ReleaseModule: Release CRUD
  • GenreModule: Genre management
  • VideoModule: Video CRUD, streaming

Supporting Modules:

  • AuthModule: JWT authentication
  • UserModule: User management
  • LibraryModule: Library configuration
  • FileModule: File metadata
  • PlaylistModule: Playlist CRUD
  • LyricsModule: Lyrics storage

Integration Modules:

  • ExternalMetadataModule: Provider data
  • SearchModule: MeiliSearch integration
  • ScrobblerModule: Last.fm/ListenBrainz
  • StreamModule: Audio/video streaming
  • EventsModule: WebSocket events

Infrastructure Modules:

  • PrismaModule: Database ORM
  • MeiliSearchModule: Search client
  • RabbitMQModule: Message queue

Module Structure

Each module follows consistent structure:

artist/
├── artist.module.ts       # Module definition
├── artist.controller.ts   # HTTP endpoints
├── artist.service.ts      # Business logic
├── artist.entity.ts       # Prisma entity (generated)
├── dto/
│   ├── create-artist.dto.ts
│   ├── update-artist.dto.ts
│   └── artist-response.dto.ts
└── artist.spec.ts         # Unit tests

Controller Example

@Controller('artists')
@UseGuards(JwtAuthGuard)
export class ArtistController {
  constructor(private readonly artistService: ArtistService) {}

  @Get()
  async findAll(
    @Query('skip') skip?: number,
    @Query('take') take?: number,
    @Query('sortBy') sortBy?: string,
    @Query('sortOrder') sortOrder?: 'asc' | 'desc',
  ) {
    return this.artistService.findAll({ skip, take, sortBy, sortOrder });
  }

  @Get(':id')
  async findOne(
    @Param('id', ParseIntPipe) id: number,
    @Query('include') include?: string[],
  ) {
    return this.artistService.findOne(id, include);
  }

  @Post()
  @UseGuards(AdminGuard)
  async create(@Body() createArtistDto: CreateArtistDto) {
    return this.artistService.create(createArtistDto);
  }

  @Patch(':id')
  @UseGuards(AdminGuard)
  async update(
    @Param('id', ParseIntPipe) id: number,
    @Body() updateArtistDto: UpdateArtistDto,
  ) {
    return this.artistService.update(id, updateArtistDto);
  }

  @Delete(':id')
  @UseGuards(AdminGuard)
  async remove(@Param('id', ParseIntPipe) id: number) {
    return this.artistService.remove(id);
  }
}

Service Example

@Injectable()
export class ArtistService {
  constructor(
    private readonly prisma: PrismaService,
    private readonly meilisearch: MeiliSearchService,
  ) {}

  async findAll(params: {
    skip?: number;
    take?: number;
    sortBy?: string;
    sortOrder?: 'asc' | 'desc';
  }) {
    const { skip = 0, take = 20, sortBy = 'name', sortOrder = 'asc' } = params;

    const [items, total] = await Promise.all([
      this.prisma.artist.findMany({
        skip,
        take,
        orderBy: { [sortBy]: sortOrder },
        include: {
          illustration: true,
          _count: {
            select: { albums: true, songs: true },
          },
        },
      }),
      this.prisma.artist.count(),
    ]);

    return { items, total, skip, take };
  }

  async findOne(id: number, include?: string[]) {
    const includeOptions = this.buildIncludeOptions(include);

    const artist = await this.prisma.artist.findUnique({
      where: { id },
      include: includeOptions,
    });

    if (!artist) {
      throw new NotFoundException(`Artist with ID ${id} not found`);
    }

    return artist;
  }

  async create(data: CreateArtistDto) {
    const slug = this.generateSlug(data.name);

    const artist = await this.prisma.artist.create({
      data: {
        ...data,
        slug,
      },
    });

    await this.meilisearch.index('artists', artist);

    return artist;
  }

  async update(id: number, data: UpdateArtistDto) {
    const artist = await this.prisma.artist.update({
      where: { id },
      data,
    });

    await this.meilisearch.update('artists', artist);

    return artist;
  }

  async remove(id: number) {
    await this.prisma.artist.delete({
      where: { id },
    });

    await this.meilisearch.delete('artists', id);
  }

  private buildIncludeOptions(include?: string[]) {
    if (!include) return {};

    const options: any = {};
    if (include.includes('albums')) options.albums = true;
    if (include.includes('songs')) options.songs = true;
    if (include.includes('videos')) options.videos = true;
    if (include.includes('areas')) options.areas = { include: { area: true } };
    if (include.includes('externalMetadata')) {
      options.externalMetadata = { include: { sources: true } };
    }

    return options;
  }

  private generateSlug(name: string): string {
    return name
      .toLowerCase()
      .replace(/[^a-z0-9]+/g, '-')
      .replace(/^-|-$/g, '');
  }
}

DTO Example

export class CreateArtistDto {
  @IsString()
  @IsNotEmpty()
  name: string;

  @IsString()
  @IsOptional()
  sortName?: string;

  @IsArray()
  @IsInt({ each: true })
  @IsOptional()
  areaIds?: number[];
}

export class UpdateArtistDto extends PartialType(CreateArtistDto) {}

export class ArtistResponseDto {
  id: number;
  name: string;
  slug: string;
  sortName?: string;
  illustration?: IllustrationDto;
  albumCount?: number;
  songCount?: number;
}

Testing

Jest tests for services and controllers:

describe('ArtistService', () => {
  let service: ArtistService;
  let prisma: PrismaService;

  beforeEach(async () => {
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        ArtistService,
        {
          provide: PrismaService,
          useValue: {
            artist: {
              findMany: jest.fn(),
              findUnique: jest.fn(),
              create: jest.fn(),
              update: jest.fn(),
              delete: jest.fn(),
            },
          },
        },
        {
          provide: MeiliSearchService,
          useValue: {
            index: jest.fn(),
            update: jest.fn(),
            delete: jest.fn(),
          },
        },
      ],
    }).compile();

    service = module.get<ArtistService>(ArtistService);
    prisma = module.get<PrismaService>(PrismaService);
  });

  it('should find all artists', async () => {
    const mockArtists = [{ id: 1, name: 'Test Artist', slug: 'test-artist' }];
    jest.spyOn(prisma.artist, 'findMany').mockResolvedValue(mockArtists);
    jest.spyOn(prisma.artist, 'count').mockResolvedValue(1);

    const result = await service.findAll({});

    expect(result.items).toEqual(mockArtists);
    expect(result.total).toBe(1);
  });
});

Scanner (Go)

Package Structure

scanner/
├── cmd/
│   └── scanner/
│       └── main.go        # Entry point
├── internal/
│   ├── scanner/
│   │   ├── scanner.go     # Main scanner logic
│   │   └── watcher.go     # Filesystem watcher
│   ├── fingerprint/
│   │   └── acoustid.go    # AcoustID fingerprinting
│   ├── parser/
│   │   ├── metadata.go    # FFprobe metadata extraction
│   │   └── filename.go    # Regex filename parsing
│   ├── api/
│   │   └── client.go      # Server API client
│   └── config/
│       └── config.go      # Configuration loading
├── go.mod
└── go.sum

Main Entry Point

package main

import (
	"log"
	"os"

	"github.com/labstack/echo/v5"
	"meelo/scanner/internal/scanner"
	"meelo/scanner/internal/config"
)

func main() {
	cfg, err := config.Load()
	if err != nil {
		log.Fatalf("Failed to load config: %v", err)
	}

	s := scanner.New(cfg)

	e := echo.New()
	e.GET("/", s.HealthCheck)
	e.GET("/tasks", s.ListTasks)
	e.POST("/scan", s.ScanAll)
	e.POST("/scan/:libraryId", s.ScanLibrary)
	e.POST("/clean", s.CleanOrphans)
	e.POST("/refresh", s.RefreshMetadata)

	log.Fatal(e.Start(":8133"))
}

Scanner Logic

package scanner

import (
	"context"
	"log"
	"path/filepath"

	"meelo/scanner/internal/fingerprint"
	"meelo/scanner/internal/parser"
	"meelo/scanner/internal/api"
)

type Scanner struct {
	client      *api.Client
	fingerprint *fingerprint.Generator
	parser      *parser.Parser
}

func New(cfg *config.Config) *Scanner {
	return &Scanner{
		client:      api.NewClient(cfg.ServerURL, cfg.APIKey),
		fingerprint: fingerprint.New(),
		parser:      parser.New(cfg.TrackRegex),
	}
}

func (s *Scanner) ScanLibrary(ctx context.Context, libraryID int) error {
	library, err := s.client.GetLibrary(libraryID)
	if err != nil {
		return err
	}

	return filepath.Walk(library.Path, func(path string, info os.FileInfo, err error) error {
		if err != nil {
			return err
		}

		if info.IsDir() {
			return nil
		}

		if !s.isAudioFile(path) {
			return nil
		}

		return s.processFile(ctx, path, libraryID)
	})
}

func (s *Scanner) processFile(ctx context.Context, path string, libraryID int) error {
	// Extract metadata using FFprobe
	metadata, err := s.parser.ExtractMetadata(path)
	if err != nil {
		log.Printf("Failed to extract metadata from %s: %v", path, err)
		return nil // Skip file, continue scan
	}

	// Generate AcoustID fingerprint
	fp, err := s.fingerprint.Generate(path)
	if err != nil {
		log.Printf("Failed to generate fingerprint for %s: %v", path, err)
		// Continue without fingerprint
	}

	// Calculate checksum
	checksum, err := s.calculateChecksum(path)
	if err != nil {
		return err
	}

	// Register file with Server
	file := &api.FileRegistration{
		Path:        path,
		Checksum:    checksum,
		Fingerprint: fp,
		LibraryID:   libraryID,
		Metadata:    metadata,
	}

	if err := s.client.RegisterFile(file); err != nil {
		return err
	}

	log.Printf("Registered file: %s", path)
	return nil
}

func (s *Scanner) isAudioFile(path string) bool {
	ext := filepath.Ext(path)
	audioExts := []string{".mp3", ".flac", ".m4a", ".ogg", ".opus", ".wav"}
	for _, audioExt := range audioExts {
		if ext == audioExt {
			return true
		}
	}
	return false
}

Metadata Extraction

package parser

import (
	"encoding/json"
	"os/exec"
)

type Parser struct {
	trackRegex *regexp.Regexp
}

func New(regex string) *Parser {
	return &Parser{
		trackRegex: regexp.MustCompile(regex),
	}
}

func (p *Parser) ExtractMetadata(path string) (*Metadata, error) {
	// Run FFprobe
	cmd := exec.Command("ffprobe",
		"-v", "quiet",
		"-print_format", "json",
		"-show_format",
		"-show_streams",
		path,
	)

	output, err := cmd.Output()
	if err != nil {
		return nil, err
	}

	var probe ProbeResult
	if err := json.Unmarshal(output, &probe); err != nil {
		return nil, err
	}

	// Extract metadata from tags
	metadata := &Metadata{
		Title:    probe.Format.Tags.Title,
		Artist:   probe.Format.Tags.Artist,
		Album:    probe.Format.Tags.Album,
		Duration: probe.Format.Duration,
		Bitrate:  probe.Format.BitRate,
		Codec:    probe.Streams[0].CodecName,
	}

	// Parse filename if tags missing
	if metadata.Title == "" || metadata.Artist == "" {
		fileMetadata := p.parseFilename(path)
		if metadata.Title == "" {
			metadata.Title = fileMetadata.Title
		}
		if metadata.Artist == "" {
			metadata.Artist = fileMetadata.Artist
		}
	}

	return metadata, nil
}

func (p *Parser) parseFilename(path string) *Metadata {
	matches := p.trackRegex.FindStringSubmatch(path)
	if matches == nil {
		return &Metadata{}
	}

	return &Metadata{
		Artist: matches[p.trackRegex.SubexpIndex("artist")],
		Album:  matches[p.trackRegex.SubexpIndex("album")],
		Title:  matches[p.trackRegex.SubexpIndex("title")],
	}
}

Testing

package scanner

import (
	"testing"
)

func TestIsAudioFile(t *testing.T) {
	s := &Scanner{}

	tests := []struct {
		path     string
		expected bool
	}{
		{"song.mp3", true},
		{"song.flac", true},
		{"song.txt", false},
		{"song.jpg", false},
	}

	for _, tt := range tests {
		result := s.isAudioFile(tt.path)
		if result != tt.expected {
			t.Errorf("isAudioFile(%s) = %v, want %v", tt.path, result, tt.expected)
		}
	}
}

Matcher (Python)

Package Structure

matcher/
├── providers/
│   ├── __init__.py
│   ├── base.py            # Base provider interface
│   ├── musicbrainz.py
│   ├── genius.py
│   ├── wikipedia.py
│   ├── wikidata.py
│   ├── discogs.py
│   ├── allmusic.py
│   ├── metacritic.py
│   └── lrclib.py
├── main.py                # FastAPI app + RabbitMQ consumer
├── config.py              # Configuration loading
├── aggregator.py          # Result aggregation
├── requirements.txt
└── tests/
    ├── test_musicbrainz.py
    ├── test_genius.py
    └── ...

Main Entry Point

from fastapi import FastAPI
from aio_pika import connect_robust
import asyncio

from providers import ProviderFactory
from aggregator import MetadataAggregator
from config import load_config

app = FastAPI()
config = load_config()

@app.get("/health")
async def health():
    return {"status": "healthy"}

async def consume_events():
    connection = await connect_robust(config.rabbitmq_url)
    channel = await connection.channel()
    queue = await channel.declare_queue("file.added")

    async with queue.iterator() as queue_iter:
        async for message in queue_iter:
            async with message.process():
                await process_file(message.body)

async def process_file(file_id: int):
    # Fetch file metadata from Server
    file_data = await fetch_file(file_id)

    # Query providers in parallel
    factory = ProviderFactory(config)
    providers = factory.get_enabled_providers()

    tasks = [provider.fetch_metadata(file_data) for provider in providers]
    results = await asyncio.gather(*tasks, return_exceptions=True)

    # Aggregate results
    aggregator = MetadataAggregator(config.provider_order)
    metadata = aggregator.aggregate(results)

    # Push to Server
    await push_metadata(file_id, metadata)

if __name__ == "__main__":
    import uvicorn
    loop = asyncio.get_event_loop()
    loop.create_task(consume_events())
    uvicorn.run(app, host="0.0.0.0", port=6789)

Provider Base Class

from abc import ABC, abstractmethod
from typing import Optional

class Provider(ABC):
    def __init__(self, config):
        self.config = config

    @abstractmethod
    async def fetch_metadata(self, file_data: dict) -> Optional[dict]:
        """Fetch metadata for file."""
        pass

    @abstractmethod
    async def search_artist(self, name: str) -> Optional[dict]:
        """Search for artist by name."""
        pass

    @abstractmethod
    async def search_album(self, artist: str, album: str) -> Optional[dict]:
        """Search for album by artist and title."""
        pass

MusicBrainz Provider

import musicbrainzngs as mb
from aiolimiter import AsyncLimiter

from providers.base import Provider

class MusicBrainzProvider(Provider):
    def __init__(self, config):
        super().__init__(config)
        mb.set_useragent("Meelo", "1.0", "https://github.com/Arthi-chaud/Meelo")
        self.limiter = AsyncLimiter(1, 1)  # 1 request per second

    async def fetch_metadata(self, file_data: dict) -> Optional[dict]:
        async with self.limiter:
            # Try AcoustID fingerprint first
            if file_data.get("fingerprint"):
                result = await self._query_by_fingerprint(file_data["fingerprint"])
                if result:
                    return result

            # Fallback to text search
            return await self._query_by_text(
                file_data["metadata"]["artist"],
                file_data["metadata"]["album"],
                file_data["metadata"]["title"]
            )

    async def _query_by_fingerprint(self, fingerprint: str) -> Optional[dict]:
        try:
            result = mb.get_recordings_by_puid(fingerprint)
            if result["recording-list"]:
                recording = result["recording-list"][0]
                return self._extract_metadata(recording)
        except mb.WebServiceError:
            return None

    async def _query_by_text(self, artist: str, album: str, title: str) -> Optional[dict]:
        try:
            result = mb.search_recordings(
                artist=artist,
                release=album,
                recording=title,
                limit=1
            )
            if result["recording-list"]:
                recording = result["recording-list"][0]
                return self._extract_metadata(recording)
        except mb.WebServiceError:
            return None

    def _extract_metadata(self, recording: dict) -> dict:
        return {
            "title": recording["title"],
            "artist": recording["artist-credit"][0]["artist"]["name"],
            "album": recording["release-list"][0]["title"] if recording.get("release-list") else None,
            "duration": recording.get("length"),
            "mbid": recording["id"],
        }

Testing

import pytest
from providers.musicbrainz import MusicBrainzProvider

@pytest.mark.asyncio
async def test_musicbrainz_search():
    provider = MusicBrainzProvider({})
    result = await provider.search_artist("The Beatles")

    assert result is not None
    assert result["name"] == "The Beatles"
    assert "mbid" in result

Front (Next.js)

Directory Structure

front/web/
├── pages/
│   ├── index.tsx          # Home page
│   ├── artists/
│   │   ├── index.tsx      # Artist list
│   │   └── [id].tsx       # Artist detail
│   ├── albums/
│   ├── songs/
│   ├── playlists/
│   └── settings/
├── components/
│   ├── ArtistCard.tsx
│   ├── AlbumCard.tsx
│   ├── TrackList.tsx
│   └── Player.tsx
├── hooks/
│   ├── useArtists.ts
│   ├── useAlbums.ts
│   └── usePlayback.ts
├── state/
│   ├── auth.ts            # Jotai atoms
│   ├── playback.ts
│   └── settings.ts
├── lib/
│   └── api.ts             # API client
└── styles/
    └── globals.css

API Client

import axios from 'axios';

const api = axios.create({
  baseURL: process.env.NEXT_PUBLIC_API_URL,
});

api.interceptors.request.use((config) => {
  const token = localStorage.getItem('token');
  if (token) {
    config.headers.Authorization = `Bearer ${token}`;
  }
  return config;
});

export const artistsApi = {
  getAll: (params?: { skip?: number; take?: number }) =>
    api.get('/artists', { params }),
  getOne: (id: number, include?: string[]) =>
    api.get(`/artists/${id}`, { params: { include } }),
  create: (data: CreateArtistDto) => api.post('/artists', data),
  update: (id: number, data: UpdateArtistDto) => api.patch(`/artists/${id}`, data),
  delete: (id: number) => api.delete(`/artists/${id}`),
};

TanStack Query Hook

import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { artistsApi } from '../lib/api';

export function useArtists(params?: { skip?: number; take?: number }) {
  return useQuery({
    queryKey: ['artists', params],
    queryFn: () => artistsApi.getAll(params),
  });
}

export function useArtist(id: number, include?: string[]) {
  return useQuery({
    queryKey: ['artists', id, include],
    queryFn: () => artistsApi.getOne(id, include),
  });
}

export function useCreateArtist() {
  const queryClient = useQueryClient();

  return useMutation({
    mutationFn: artistsApi.create,
    onSuccess: () => {
      queryClient.invalidateQueries({ queryKey: ['artists'] });
    },
  });
}

Component Example

import { useArtists } from '../hooks/useArtists';
import ArtistCard from '../components/ArtistCard';

export default function ArtistsPage() {
  const { data, isLoading, error } = useArtists({ take: 20 });

  if (isLoading) return <div>Loading...</div>;
  if (error) return <div>Error loading artists</div>;

  return (
    <div>
      <h1>Artists</h1>
      <div className="grid">
        {data.items.map((artist) => (
          <ArtistCard key={artist.id} artist={artist} />
        ))}
      </div>
    </div>
  );
}

Code Quality

Biome Configuration

{
  "formatter": {
    "enabled": true,
    "indentStyle": "tab",
    "lineWidth": 100
  },
  "linter": {
    "enabled": true,
    "rules": {
      "recommended": true
    }
  },
  "javascript": {
    "formatter": {
      "quoteStyle": "double"
    }
  }
}

Logging

Server (NestJS):

import { Logger } from '@nestjs/common';

const logger = new Logger('ArtistService');
logger.log('Artist created', { id: artist.id });
logger.error('Failed to create artist', error.stack);

Scanner (Go):

import "github.com/rs/zerolog/log"

log.Info().Str("path", path).Msg("File registered")
log.Error().Err(err).Msg("Failed to extract metadata")

Matcher (Python):

import logging

logger = logging.getLogger(__name__)
logger.info(f"Fetching metadata for file {file_id}")
logger.error(f"Provider failed: {provider_name}", exc_info=True)

Summary

Meelo's codebase is organized into four microservices with clear separation of concerns. Server uses NestJS modules for domain logic, Prisma for database access, and Jest for testing. Scanner uses Go packages for file processing, FFprobe for metadata extraction, and AcoustID for fingerprinting. Matcher uses Python provider modules for external queries, asyncio for parallelism, and pytest for testing. Front uses Next.js pages for routing, TanStack Query for data fetching, and Jotai for state management. Code quality is enforced via Biome linting, type checking (TypeScript, Pyright, Go), and SonarCloud quality gates. Logging uses structured formats (JSON) for easy parsing. The monorepo structure simplifies version coordination and cross-service changes.