From 1e8506f146a7751f9a03a280b3732da2d91da924 Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 9 May 2026 22:09:17 +0200 Subject: [PATCH] Deduce bit depth, sample rate, and bitrate from file sizes; add comprehensive parser tests --- internal/tracker/generic_parser.go | 73 ++++ internal/tracker/generic_parser_test.go | 537 +++++++++++++++++++++--- 2 files changed, 542 insertions(+), 68 deletions(-) diff --git a/internal/tracker/generic_parser.go b/internal/tracker/generic_parser.go index ecb1d2f..324a1cd 100644 --- a/internal/tracker/generic_parser.go +++ b/internal/tracker/generic_parser.go @@ -196,6 +196,7 @@ func (p *GenericParser) fillFromTorrent(r *release.Release, torrentData []byte) } p.fillFromTitle(r, info.Name) + p.deduceFromFileSize(r) log.Trace(). Str("hash", r.InfoHash). @@ -205,6 +206,9 @@ func (p *GenericParser) fillFromTorrent(r *release.Release, torrentData []byte) Bool("cover", r.HasCoverArt). Bool("cue", r.HasCueSheet). Bool("log", r.HasRipLog). + Int("bit_depth", r.BitDepth). + Int("sample_rate", r.SampleRate). + Str("bitrate", r.Bitrate). Msg("filled from torrent") } @@ -267,6 +271,75 @@ func (p *GenericParser) fillFromTitle(r *release.Release, title string) { Msg("filled from title") } +func (p *GenericParser) deduceFromFileSize(r *release.Release) { + if r.AudioFileCount == 0 || r.TotalAudioSize == 0 { + return + } + + avgFileSize := r.TotalAudioSize / int64(r.AudioFileCount) + avgFileSizeMB := float64(avgFileSize) / (1024 * 1024) + + switch { + case r.Format.IsLossless(): + if r.BitDepth > 0 && r.SampleRate > 0 { + return + } + + // Average FLAC file size per ~4 min track: + // 16/44.1 ≈ 25-35 MB 24/48 ≈ 40-60 MB + // 24/96 ≈ 80-120 MB 24/192 ≈ 160-240 MB + switch { + case avgFileSizeMB >= 130: + p.setIfMissing(r, 24, 192000) + case avgFileSizeMB >= 65: + p.setIfMissing(r, 24, 96000) + case avgFileSizeMB >= 38: + p.setIfMissing(r, 24, 48000) + default: + p.setIfMissing(r, 16, 44100) + } + + log.Trace(). + Float64("avg_file_mb", avgFileSizeMB). + Int("deduced_bit_depth", r.BitDepth). + Int("deduced_sample_rate", r.SampleRate). + Msg("deduced lossless quality from file size") + + case r.Format == release.FormatMP3: + if r.Bitrate != "" { + return + } + + // Average MP3 file size per ~4 min track: + // 128 kbps ≈ 3.5-4 MB 192 kbps ≈ 5-6 MB + // 256 kbps ≈ 7-8 MB 320 kbps ≈ 9-10 MB + switch { + case avgFileSizeMB >= 8.5: + r.Bitrate = "320 kbps" + case avgFileSizeMB >= 6.5: + r.Bitrate = "256 kbps" + case avgFileSizeMB >= 4.5: + r.Bitrate = "192 kbps" + default: + r.Bitrate = "128 kbps" + } + + log.Trace(). + Float64("avg_file_mb", avgFileSizeMB). + Str("deduced_bitrate", r.Bitrate). + Msg("deduced mp3 bitrate from file size") + } +} + +func (p *GenericParser) setIfMissing(r *release.Release, bitDepth int, sampleRate int) { + if r.BitDepth == 0 { + r.BitDepth = bitDepth + } + if r.SampleRate == 0 { + r.SampleRate = sampleRate + } +} + var trackNumberPrefix = regexp.MustCompile(`^\d{1,3}[\s.\-]+`) func cleanTrackName(name string) string { diff --git a/internal/tracker/generic_parser_test.go b/internal/tracker/generic_parser_test.go index 314090a..3ea7a56 100644 --- a/internal/tracker/generic_parser_test.go +++ b/internal/tracker/generic_parser_test.go @@ -1,13 +1,39 @@ package tracker import ( - "os" + "bytes" + "fmt" "testing" metadataPb "homelab.lan/music-agregator/gen/metadata/v1" "homelab.lan/music-agregator/internal/release" ) +type testFile struct { + path string + size int64 +} + +func buildTorrentData(name string, files []testFile) []byte { + var buf bytes.Buffer + buf.WriteString("d8:announce35:http://tracker.example.com/announce4:infod") + + if len(files) == 0 { + buf.WriteString(fmt.Sprintf("6:lengthi0e4:name%d:%s12:piece lengthi16384e6:pieces20:01234567890123456789", len(name), name)) + } else if len(files) == 1 { + buf.WriteString(fmt.Sprintf("6:lengthi%de4:name%d:%s12:piece lengthi16384e6:pieces20:01234567890123456789", files[0].size, len(files[0].path), files[0].path)) + } else { + buf.WriteString("5:filesl") + for _, f := range files { + buf.WriteString(fmt.Sprintf("d6:lengthi%de4:pathl%d:%see", f.size, len(f.path), f.path)) + } + buf.WriteString(fmt.Sprintf("e4:name%d:%s12:piece lengthi16384e6:pieces20:01234567890123456789", len(name), name)) + } + + buf.WriteString("ee") + return buf.Bytes() +} + func TestGenericParser_Parse(t *testing.T) { p := NewGenericParser() @@ -95,77 +121,452 @@ func TestGenericParser_Parse(t *testing.T) { } func TestGenericParser_ParseTorrent(t *testing.T) { - torrentData, err := os.ReadFile("/tmp/metallica.torrent") - if err != nil { - t.Skip("metallica.torrent not available") - } - - album := &metadataPb.Album{ - Title: "72 Seasons", - AlbumType: "Album", - ReleaseDate: "2023-04-14", - TotalTracks: 12, - TotalDiscs: 1, - Artists: []*metadataPb.ArtistCredit{ - {Artist: &metadataPb.Artist{Name: "Metallica"}}, - }, - Genres: []*metadataPb.Genre{ - {Name: "Thrash Metal"}, - {Name: "Heavy Metal"}, - }, - Label: &metadataPb.Label{Name: "Blackened Recordings"}, - } - p := NewGenericParser() - r := p.ParseTorrent(torrentData, album) - t.Logf("Artist: %s", r.Artist) - t.Logf("Album: %s", r.Album) - t.Logf("Year: %d", r.Year) - t.Logf("Type: %s", r.Type) - t.Logf("Genres: %v", r.Genres) - t.Logf("Format: %s", r.Format) - t.Logf("Source: %s", r.Source) - t.Logf("Label: %s", r.Label) - t.Logf("InfoHash: %s", r.InfoHash) - t.Logf("TrackCount: %d", r.TrackCount) - t.Logf("AudioFiles: %d", r.AudioFileCount) - t.Logf("AudioSize: %d bytes", r.TotalAudioSize) - t.Logf("HasCover: %v", r.HasCoverArt) - t.Logf("HasCue: %v", r.HasCueSheet) - t.Logf("HasLog: %v", r.HasRipLog) - t.Logf("TrackNames: %v", r.TrackNames) - t.Logf("Parsed OK: %v", r.ParsedSuccessfully) - t.Logf("Errors: %v", r.ParseErrors) + makeFlacFiles := func(count int, sizeMB float64) []testFile { + files := make([]testFile, count) + for i := range files { + files[i] = testFile{ + path: fmt.Sprintf("%02d - Track %d.flac", i+1, i+1), + size: int64(sizeMB * 1024 * 1024), + } + } + return files + } - if r.Artist != "Metallica" { - t.Errorf("Artist = %q, want Metallica", r.Artist) + makeMp3Files := func(count int, sizeMB float64) []testFile { + files := make([]testFile, count) + for i := range files { + files[i] = testFile{ + path: fmt.Sprintf("%02d - Track %d.mp3", i+1, i+1), + size: int64(sizeMB * 1024 * 1024), + } + } + return files } - if r.Album != "72 Seasons" { - t.Errorf("Album = %q, want 72 Seasons", r.Album) + + tests := []struct { + name string + torrentName string + files []testFile + album *metadataPb.Album + wantFormat release.AudioFormat + wantAudioFileCount int + wantHasCoverArt bool + wantHasCueSheet bool + wantHasRipLog bool + wantSource release.Source + wantInfoHashEmpty bool + wantBitDepth int + wantSampleRate int + wantTrackNames []string + wantArtist string + wantAlbum string + wantYear int + wantType release.Type + wantGenres []string + wantLabel string + wantParseErrors bool + }{ + { + name: "flac album with cover cue log", + torrentName: "Test Artist - Test Album (2024) [FLAC]", + files: append(append(makeFlacFiles(12, 30), + testFile{path: "cover.jpg", size: 500000}, + testFile{path: "album.cue", size: 2000}), + testFile{path: "rip.log", size: 5000}), + album: &metadataPb.Album{ + Title: "Test Album", + AlbumType: "Album", + ReleaseDate: "2024-01-15", + TotalTracks: 12, + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Test Artist"}}}, + Genres: []*metadataPb.Genre{{Name: "Rock"}}, + Label: &metadataPb.Label{Name: "Test Label"}, + }, + wantFormat: release.FormatFLAC, + wantAudioFileCount: 12, + wantHasCoverArt: true, + wantHasCueSheet: true, + wantHasRipLog: true, + wantSource: release.SourceCD, + wantArtist: "Test Artist", + wantAlbum: "Test Album", + wantYear: 2024, + wantType: release.TypeAlbum, + wantGenres: []string{"Rock"}, + wantLabel: "Test Label", + }, + { + name: "mp3 album with cover", + torrentName: "Artist - MP3 Album (2023)", + files: append(makeMp3Files(10, 10), + testFile{path: "cover.jpg", size: 300000}), + album: &metadataPb.Album{ + Title: "MP3 Album", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}, + ReleaseDate: "2023-05-20", + }, + wantFormat: release.FormatMP3, + wantAudioFileCount: 10, + wantHasCoverArt: true, + wantHasCueSheet: false, + wantHasRipLog: false, + wantArtist: "Artist", + wantAlbum: "MP3 Album", + wantYear: 2023, + }, + { + name: "mixed format dominant wins", + torrentName: "Mixed Format Album", + files: append(makeFlacFiles(10, 30), + testFile{path: "bonus1.mp3", size: 10485760}, + testFile{path: "bonus2.mp3", size: 10485760}), + album: &metadataPb.Album{ + Title: "Mixed Format Album", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}, + }, + wantFormat: release.FormatFLAC, + wantAudioFileCount: 10, + }, + { + name: "single file torrent flac", + torrentName: "Single Track.flac", + files: []testFile{{path: "Single Track.flac", size: 50 * 1024 * 1024}}, + album: &metadataPb.Album{ + Title: "Single Track", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}, + }, + wantFormat: release.FormatFLAC, + wantAudioFileCount: 1, + }, + { + name: "single file torrent mp3", + torrentName: "Single.mp3", + files: []testFile{{path: "Single.mp3", size: 10 * 1024 * 1024}}, + album: &metadataPb.Album{ + Title: "Single", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}, + }, + wantFormat: release.FormatMP3, + wantAudioFileCount: 1, + }, + { + name: "no audio files", + torrentName: "Not Music", + files: []testFile{ + {path: "readme.txt", size: 1000}, + {path: "image.jpg", size: 500000}, + }, + album: &metadataPb.Album{ + Title: "Not Music", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Someone"}}}, + }, + wantFormat: release.FormatUnknown, + wantAudioFileCount: 0, + wantHasCoverArt: true, + }, + { + name: "hires in title", + torrentName: "Artist - Album (2024) [24Bit-96kHz] FLAC", + files: makeFlacFiles(12, 100), + album: &metadataPb.Album{ + Title: "Album", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}, + }, + wantFormat: release.FormatFLAC, + wantAudioFileCount: 12, + wantBitDepth: 24, + wantSampleRate: 96000, + }, + { + name: "source from title", + torrentName: "Artist - Album [WEB] FLAC", + files: makeFlacFiles(10, 30), + album: &metadataPb.Album{ + Title: "Album", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}, + }, + wantFormat: release.FormatFLAC, + wantAudioFileCount: 10, + wantSource: release.SourceWEB, + }, + { + name: "track names cleaned", + torrentName: "Artist - Album", + files: []testFile{ + {path: "01 - First Track.flac", size: 30 * 1024 * 1024}, + {path: "02 - Second Track.flac", size: 30 * 1024 * 1024}, + }, + album: &metadataPb.Album{ + Title: "Album", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}, + }, + wantFormat: release.FormatFLAC, + wantAudioFileCount: 2, + wantTrackNames: []string{"First Track", "Second Track"}, + }, + { + name: "metadata fills release fields", + torrentName: "Test Torrent", + files: makeFlacFiles(8, 30), + album: &metadataPb.Album{ + Title: "Metadata Album", + AlbumType: "EP", + ReleaseDate: "2020-06-15", + TotalTracks: 8, + TotalDiscs: 1, + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Metadata Artist"}}}, + Genres: []*metadataPb.Genre{{Name: "Electronic"}, {Name: "Ambient"}}, + Label: &metadataPb.Label{Name: "Metadata Label"}, + }, + wantFormat: release.FormatFLAC, + wantAudioFileCount: 8, + wantArtist: "Metadata Artist", + wantAlbum: "Metadata Album", + wantYear: 2020, + wantType: release.TypeEP, + wantGenres: []string{"Electronic", "Ambient"}, + wantLabel: "Metadata Label", + }, + { + name: "empty torrent data", + torrentName: "", + files: nil, + album: &metadataPb.Album{ + Title: "Album Only", + ReleaseDate: "2022-01-01", + Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist Only"}}}, + }, + wantFormat: release.FormatUnknown, + wantAudioFileCount: 0, + wantInfoHashEmpty: true, + wantArtist: "Artist Only", + wantAlbum: "Album Only", + wantYear: 2022, + }, + { + name: "invalid torrent data", + torrentName: "invalid", + files: nil, + album: &metadataPb.Album{Title: "Album", Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}}, + wantArtist: "Artist", + wantAlbum: "Album", + wantParseErrors: true, + }, } - if r.Year != 2023 { - t.Errorf("Year = %d, want 2023", r.Year) - } - if r.Format != release.FormatFLAC { - t.Errorf("Format = %v, want FLAC", r.Format) - } - if r.AudioFileCount != 12 { - t.Errorf("AudioFileCount = %d, want 12", r.AudioFileCount) - } - if !r.HasCoverArt { - t.Error("expected HasCoverArt") - } - if !r.HasCueSheet { - t.Error("expected HasCueSheet") - } - if !r.HasRipLog { - t.Error("expected HasRipLog") - } - if r.Source != release.SourceCD { - t.Errorf("Source = %v, want CD (inferred from log)", r.Source) - } - if !r.ParsedSuccessfully { - t.Errorf("ParsedSuccessfully = false, errors: %v", r.ParseErrors) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var torrentData []byte + if tt.name == "empty torrent data" { + torrentData = nil + } else if tt.name == "invalid torrent data" { + torrentData = []byte("garbage data that is not valid bencode") + } else { + torrentData = buildTorrentData(tt.torrentName, tt.files) + } + + r := p.ParseTorrent(torrentData, tt.album) + + if r.Format != tt.wantFormat { + t.Errorf("Format = %v, want %v", r.Format, tt.wantFormat) + } + if r.AudioFileCount != tt.wantAudioFileCount { + t.Errorf("AudioFileCount = %d, want %d", r.AudioFileCount, tt.wantAudioFileCount) + } + if r.HasCoverArt != tt.wantHasCoverArt { + t.Errorf("HasCoverArt = %v, want %v", r.HasCoverArt, tt.wantHasCoverArt) + } + if r.HasCueSheet != tt.wantHasCueSheet { + t.Errorf("HasCueSheet = %v, want %v", r.HasCueSheet, tt.wantHasCueSheet) + } + if r.HasRipLog != tt.wantHasRipLog { + t.Errorf("HasRipLog = %v, want %v", r.HasRipLog, tt.wantHasRipLog) + } + if tt.wantSource != release.SourceUnknown && r.Source != tt.wantSource { + t.Errorf("Source = %v, want %v", r.Source, tt.wantSource) + } + if tt.wantInfoHashEmpty && r.InfoHash != "" { + t.Errorf("InfoHash = %q, want empty", r.InfoHash) + } + if !tt.wantInfoHashEmpty && tt.name != "invalid torrent data" && r.InfoHash == "" { + t.Error("InfoHash should not be empty") + } + if tt.wantBitDepth != 0 && r.BitDepth != tt.wantBitDepth { + t.Errorf("BitDepth = %d, want %d", r.BitDepth, tt.wantBitDepth) + } + if tt.wantSampleRate != 0 && r.SampleRate != tt.wantSampleRate { + t.Errorf("SampleRate = %d, want %d", r.SampleRate, tt.wantSampleRate) + } + if len(tt.wantTrackNames) > 0 { + if len(r.TrackNames) != len(tt.wantTrackNames) { + t.Errorf("TrackNames length = %d, want %d", len(r.TrackNames), len(tt.wantTrackNames)) + } else { + for i, name := range tt.wantTrackNames { + if r.TrackNames[i] != name { + t.Errorf("TrackNames[%d] = %q, want %q", i, r.TrackNames[i], name) + } + } + } + } + if tt.wantArtist != "" && r.Artist != tt.wantArtist { + t.Errorf("Artist = %q, want %q", r.Artist, tt.wantArtist) + } + if tt.wantAlbum != "" && r.Album != tt.wantAlbum { + t.Errorf("Album = %q, want %q", r.Album, tt.wantAlbum) + } + if tt.wantYear != 0 && r.Year != tt.wantYear { + t.Errorf("Year = %d, want %d", r.Year, tt.wantYear) + } + if tt.wantType != release.TypeUnknown && r.Type != tt.wantType { + t.Errorf("Type = %v, want %v", r.Type, tt.wantType) + } + if len(tt.wantGenres) > 0 { + if len(r.Genres) != len(tt.wantGenres) { + t.Errorf("Genres length = %d, want %d", len(r.Genres), len(tt.wantGenres)) + } else { + for i, g := range tt.wantGenres { + if r.Genres[i] != g { + t.Errorf("Genres[%d] = %q, want %q", i, r.Genres[i], g) + } + } + } + } + if tt.wantLabel != "" && r.Label != tt.wantLabel { + t.Errorf("Label = %q, want %q", r.Label, tt.wantLabel) + } + if tt.wantParseErrors && len(r.ParseErrors) == 0 { + t.Error("expected ParseErrors but got none") + } + }) + } +} + +func TestGenericParser_DeduceFromFileSize(t *testing.T) { + p := NewGenericParser() + + makeFlacRelease := func(count int, avgSizeMB float64) *release.Release { + return &release.Release{ + Format: release.FormatFLAC, + AudioFileCount: count, + TotalAudioSize: int64(float64(count) * avgSizeMB * 1024 * 1024), + } + } + + makeMp3Release := func(count int, avgSizeMB float64) *release.Release { + return &release.Release{ + Format: release.FormatMP3, + AudioFileCount: count, + TotalAudioSize: int64(float64(count) * avgSizeMB * 1024 * 1024), + } + } + + tests := []struct { + name string + release *release.Release + wantBitDepth int + wantSampleRate int + wantBitrate string + }{ + { + name: "flac 16/44.1 from small files", + release: makeFlacRelease(12, 30), + wantBitDepth: 16, + wantSampleRate: 44100, + }, + { + name: "flac 24/48 from medium files", + release: makeFlacRelease(12, 50), + wantBitDepth: 24, + wantSampleRate: 48000, + }, + { + name: "flac 24/96 from large files", + release: makeFlacRelease(12, 100), + wantBitDepth: 24, + wantSampleRate: 96000, + }, + { + name: "flac 24/192 from very large files", + release: makeFlacRelease(12, 200), + wantBitDepth: 24, + wantSampleRate: 192000, + }, + { + name: "title overrides heuristic", + release: &release.Release{ + Format: release.FormatFLAC, + AudioFileCount: 12, + TotalAudioSize: int64(12 * 30 * 1024 * 1024), + BitDepth: 24, + SampleRate: 48000, + }, + wantBitDepth: 24, + wantSampleRate: 48000, + }, + { + name: "mp3 320kbps from large files", + release: makeMp3Release(12, 10), + wantBitrate: "320 kbps", + }, + { + name: "mp3 128kbps from small files", + release: makeMp3Release(12, 3.5), + wantBitrate: "128 kbps", + }, + { + name: "mp3 title overrides", + release: &release.Release{ + Format: release.FormatMP3, + AudioFileCount: 12, + TotalAudioSize: int64(12 * 3.5 * 1024 * 1024), + Bitrate: "320 kbps", + }, + wantBitrate: "320 kbps", + }, + { + name: "no audio files skips deduction", + release: &release.Release{ + Format: release.FormatFLAC, + AudioFileCount: 0, + TotalAudioSize: 0, + }, + wantBitDepth: 0, + wantSampleRate: 0, + }, + { + name: "aac files no deduction", + release: &release.Release{ + Format: release.FormatAAC, + AudioFileCount: 12, + TotalAudioSize: int64(12 * 50 * 1024 * 1024), + }, + wantBitDepth: 0, + wantSampleRate: 0, + wantBitrate: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p.deduceFromFileSize(tt.release) + + if tt.wantBitDepth != 0 && tt.release.BitDepth != tt.wantBitDepth { + t.Errorf("BitDepth = %d, want %d", tt.release.BitDepth, tt.wantBitDepth) + } + if tt.wantSampleRate != 0 && tt.release.SampleRate != tt.wantSampleRate { + t.Errorf("SampleRate = %d, want %d", tt.release.SampleRate, tt.wantSampleRate) + } + if tt.wantBitrate != "" && tt.release.Bitrate != tt.wantBitrate { + t.Errorf("Bitrate = %q, want %q", tt.release.Bitrate, tt.wantBitrate) + } + if tt.name == "no audio files skips deduction" || tt.name == "aac files no deduction" { + if tt.release.BitDepth != 0 || tt.release.SampleRate != 0 || tt.release.Bitrate != "" { + t.Errorf("expected no deduction, got BitDepth=%d, SampleRate=%d, Bitrate=%q", + tt.release.BitDepth, tt.release.SampleRate, tt.release.Bitrate) + } + } + }) } }