Deduce bit depth, sample rate, and bitrate from file sizes; add comprehensive parser tests

This commit is contained in:
Alexander
2026-05-09 22:09:17 +02:00
parent 7fa859e815
commit 1e8506f146
2 changed files with 542 additions and 68 deletions
+73
View File
@@ -196,6 +196,7 @@ func (p *GenericParser) fillFromTorrent(r *release.Release, torrentData []byte)
} }
p.fillFromTitle(r, info.Name) p.fillFromTitle(r, info.Name)
p.deduceFromFileSize(r)
log.Trace(). log.Trace().
Str("hash", r.InfoHash). Str("hash", r.InfoHash).
@@ -205,6 +206,9 @@ func (p *GenericParser) fillFromTorrent(r *release.Release, torrentData []byte)
Bool("cover", r.HasCoverArt). Bool("cover", r.HasCoverArt).
Bool("cue", r.HasCueSheet). Bool("cue", r.HasCueSheet).
Bool("log", r.HasRipLog). Bool("log", r.HasRipLog).
Int("bit_depth", r.BitDepth).
Int("sample_rate", r.SampleRate).
Str("bitrate", r.Bitrate).
Msg("filled from torrent") Msg("filled from torrent")
} }
@@ -267,6 +271,75 @@ func (p *GenericParser) fillFromTitle(r *release.Release, title string) {
Msg("filled from title") Msg("filled from title")
} }
func (p *GenericParser) deduceFromFileSize(r *release.Release) {
if r.AudioFileCount == 0 || r.TotalAudioSize == 0 {
return
}
avgFileSize := r.TotalAudioSize / int64(r.AudioFileCount)
avgFileSizeMB := float64(avgFileSize) / (1024 * 1024)
switch {
case r.Format.IsLossless():
if r.BitDepth > 0 && r.SampleRate > 0 {
return
}
// Average FLAC file size per ~4 min track:
// 16/44.1 ≈ 25-35 MB 24/48 ≈ 40-60 MB
// 24/96 ≈ 80-120 MB 24/192 ≈ 160-240 MB
switch {
case avgFileSizeMB >= 130:
p.setIfMissing(r, 24, 192000)
case avgFileSizeMB >= 65:
p.setIfMissing(r, 24, 96000)
case avgFileSizeMB >= 38:
p.setIfMissing(r, 24, 48000)
default:
p.setIfMissing(r, 16, 44100)
}
log.Trace().
Float64("avg_file_mb", avgFileSizeMB).
Int("deduced_bit_depth", r.BitDepth).
Int("deduced_sample_rate", r.SampleRate).
Msg("deduced lossless quality from file size")
case r.Format == release.FormatMP3:
if r.Bitrate != "" {
return
}
// Average MP3 file size per ~4 min track:
// 128 kbps ≈ 3.5-4 MB 192 kbps ≈ 5-6 MB
// 256 kbps ≈ 7-8 MB 320 kbps ≈ 9-10 MB
switch {
case avgFileSizeMB >= 8.5:
r.Bitrate = "320 kbps"
case avgFileSizeMB >= 6.5:
r.Bitrate = "256 kbps"
case avgFileSizeMB >= 4.5:
r.Bitrate = "192 kbps"
default:
r.Bitrate = "128 kbps"
}
log.Trace().
Float64("avg_file_mb", avgFileSizeMB).
Str("deduced_bitrate", r.Bitrate).
Msg("deduced mp3 bitrate from file size")
}
}
func (p *GenericParser) setIfMissing(r *release.Release, bitDepth int, sampleRate int) {
if r.BitDepth == 0 {
r.BitDepth = bitDepth
}
if r.SampleRate == 0 {
r.SampleRate = sampleRate
}
}
var trackNumberPrefix = regexp.MustCompile(`^\d{1,3}[\s.\-]+`) var trackNumberPrefix = regexp.MustCompile(`^\d{1,3}[\s.\-]+`)
func cleanTrackName(name string) string { func cleanTrackName(name string) string {
+462 -61
View File
@@ -1,13 +1,39 @@
package tracker package tracker
import ( import (
"os" "bytes"
"fmt"
"testing" "testing"
metadataPb "homelab.lan/music-agregator/gen/metadata/v1" metadataPb "homelab.lan/music-agregator/gen/metadata/v1"
"homelab.lan/music-agregator/internal/release" "homelab.lan/music-agregator/internal/release"
) )
type testFile struct {
path string
size int64
}
func buildTorrentData(name string, files []testFile) []byte {
var buf bytes.Buffer
buf.WriteString("d8:announce35:http://tracker.example.com/announce4:infod")
if len(files) == 0 {
buf.WriteString(fmt.Sprintf("6:lengthi0e4:name%d:%s12:piece lengthi16384e6:pieces20:01234567890123456789", len(name), name))
} else if len(files) == 1 {
buf.WriteString(fmt.Sprintf("6:lengthi%de4:name%d:%s12:piece lengthi16384e6:pieces20:01234567890123456789", files[0].size, len(files[0].path), files[0].path))
} else {
buf.WriteString("5:filesl")
for _, f := range files {
buf.WriteString(fmt.Sprintf("d6:lengthi%de4:pathl%d:%see", f.size, len(f.path), f.path))
}
buf.WriteString(fmt.Sprintf("e4:name%d:%s12:piece lengthi16384e6:pieces20:01234567890123456789", len(name), name))
}
buf.WriteString("ee")
return buf.Bytes()
}
func TestGenericParser_Parse(t *testing.T) { func TestGenericParser_Parse(t *testing.T) {
p := NewGenericParser() p := NewGenericParser()
@@ -95,77 +121,452 @@ func TestGenericParser_Parse(t *testing.T) {
} }
func TestGenericParser_ParseTorrent(t *testing.T) { func TestGenericParser_ParseTorrent(t *testing.T) {
torrentData, err := os.ReadFile("/tmp/metallica.torrent")
if err != nil {
t.Skip("metallica.torrent not available")
}
album := &metadataPb.Album{
Title: "72 Seasons",
AlbumType: "Album",
ReleaseDate: "2023-04-14",
TotalTracks: 12,
TotalDiscs: 1,
Artists: []*metadataPb.ArtistCredit{
{Artist: &metadataPb.Artist{Name: "Metallica"}},
},
Genres: []*metadataPb.Genre{
{Name: "Thrash Metal"},
{Name: "Heavy Metal"},
},
Label: &metadataPb.Label{Name: "Blackened Recordings"},
}
p := NewGenericParser() p := NewGenericParser()
r := p.ParseTorrent(torrentData, album)
t.Logf("Artist: %s", r.Artist) makeFlacFiles := func(count int, sizeMB float64) []testFile {
t.Logf("Album: %s", r.Album) files := make([]testFile, count)
t.Logf("Year: %d", r.Year) for i := range files {
t.Logf("Type: %s", r.Type) files[i] = testFile{
t.Logf("Genres: %v", r.Genres) path: fmt.Sprintf("%02d - Track %d.flac", i+1, i+1),
t.Logf("Format: %s", r.Format) size: int64(sizeMB * 1024 * 1024),
t.Logf("Source: %s", r.Source) }
t.Logf("Label: %s", r.Label) }
t.Logf("InfoHash: %s", r.InfoHash) return files
t.Logf("TrackCount: %d", r.TrackCount) }
t.Logf("AudioFiles: %d", r.AudioFileCount)
t.Logf("AudioSize: %d bytes", r.TotalAudioSize)
t.Logf("HasCover: %v", r.HasCoverArt)
t.Logf("HasCue: %v", r.HasCueSheet)
t.Logf("HasLog: %v", r.HasRipLog)
t.Logf("TrackNames: %v", r.TrackNames)
t.Logf("Parsed OK: %v", r.ParsedSuccessfully)
t.Logf("Errors: %v", r.ParseErrors)
if r.Artist != "Metallica" { makeMp3Files := func(count int, sizeMB float64) []testFile {
t.Errorf("Artist = %q, want Metallica", r.Artist) files := make([]testFile, count)
for i := range files {
files[i] = testFile{
path: fmt.Sprintf("%02d - Track %d.mp3", i+1, i+1),
size: int64(sizeMB * 1024 * 1024),
} }
if r.Album != "72 Seasons" {
t.Errorf("Album = %q, want 72 Seasons", r.Album)
} }
if r.Year != 2023 { return files
t.Errorf("Year = %d, want 2023", r.Year)
} }
if r.Format != release.FormatFLAC {
t.Errorf("Format = %v, want FLAC", r.Format) tests := []struct {
name string
torrentName string
files []testFile
album *metadataPb.Album
wantFormat release.AudioFormat
wantAudioFileCount int
wantHasCoverArt bool
wantHasCueSheet bool
wantHasRipLog bool
wantSource release.Source
wantInfoHashEmpty bool
wantBitDepth int
wantSampleRate int
wantTrackNames []string
wantArtist string
wantAlbum string
wantYear int
wantType release.Type
wantGenres []string
wantLabel string
wantParseErrors bool
}{
{
name: "flac album with cover cue log",
torrentName: "Test Artist - Test Album (2024) [FLAC]",
files: append(append(makeFlacFiles(12, 30),
testFile{path: "cover.jpg", size: 500000},
testFile{path: "album.cue", size: 2000}),
testFile{path: "rip.log", size: 5000}),
album: &metadataPb.Album{
Title: "Test Album",
AlbumType: "Album",
ReleaseDate: "2024-01-15",
TotalTracks: 12,
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Test Artist"}}},
Genres: []*metadataPb.Genre{{Name: "Rock"}},
Label: &metadataPb.Label{Name: "Test Label"},
},
wantFormat: release.FormatFLAC,
wantAudioFileCount: 12,
wantHasCoverArt: true,
wantHasCueSheet: true,
wantHasRipLog: true,
wantSource: release.SourceCD,
wantArtist: "Test Artist",
wantAlbum: "Test Album",
wantYear: 2024,
wantType: release.TypeAlbum,
wantGenres: []string{"Rock"},
wantLabel: "Test Label",
},
{
name: "mp3 album with cover",
torrentName: "Artist - MP3 Album (2023)",
files: append(makeMp3Files(10, 10),
testFile{path: "cover.jpg", size: 300000}),
album: &metadataPb.Album{
Title: "MP3 Album",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}},
ReleaseDate: "2023-05-20",
},
wantFormat: release.FormatMP3,
wantAudioFileCount: 10,
wantHasCoverArt: true,
wantHasCueSheet: false,
wantHasRipLog: false,
wantArtist: "Artist",
wantAlbum: "MP3 Album",
wantYear: 2023,
},
{
name: "mixed format dominant wins",
torrentName: "Mixed Format Album",
files: append(makeFlacFiles(10, 30),
testFile{path: "bonus1.mp3", size: 10485760},
testFile{path: "bonus2.mp3", size: 10485760}),
album: &metadataPb.Album{
Title: "Mixed Format Album",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}},
},
wantFormat: release.FormatFLAC,
wantAudioFileCount: 10,
},
{
name: "single file torrent flac",
torrentName: "Single Track.flac",
files: []testFile{{path: "Single Track.flac", size: 50 * 1024 * 1024}},
album: &metadataPb.Album{
Title: "Single Track",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}},
},
wantFormat: release.FormatFLAC,
wantAudioFileCount: 1,
},
{
name: "single file torrent mp3",
torrentName: "Single.mp3",
files: []testFile{{path: "Single.mp3", size: 10 * 1024 * 1024}},
album: &metadataPb.Album{
Title: "Single",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}},
},
wantFormat: release.FormatMP3,
wantAudioFileCount: 1,
},
{
name: "no audio files",
torrentName: "Not Music",
files: []testFile{
{path: "readme.txt", size: 1000},
{path: "image.jpg", size: 500000},
},
album: &metadataPb.Album{
Title: "Not Music",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Someone"}}},
},
wantFormat: release.FormatUnknown,
wantAudioFileCount: 0,
wantHasCoverArt: true,
},
{
name: "hires in title",
torrentName: "Artist - Album (2024) [24Bit-96kHz] FLAC",
files: makeFlacFiles(12, 100),
album: &metadataPb.Album{
Title: "Album",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}},
},
wantFormat: release.FormatFLAC,
wantAudioFileCount: 12,
wantBitDepth: 24,
wantSampleRate: 96000,
},
{
name: "source from title",
torrentName: "Artist - Album [WEB] FLAC",
files: makeFlacFiles(10, 30),
album: &metadataPb.Album{
Title: "Album",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}},
},
wantFormat: release.FormatFLAC,
wantAudioFileCount: 10,
wantSource: release.SourceWEB,
},
{
name: "track names cleaned",
torrentName: "Artist - Album",
files: []testFile{
{path: "01 - First Track.flac", size: 30 * 1024 * 1024},
{path: "02 - Second Track.flac", size: 30 * 1024 * 1024},
},
album: &metadataPb.Album{
Title: "Album",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}},
},
wantFormat: release.FormatFLAC,
wantAudioFileCount: 2,
wantTrackNames: []string{"First Track", "Second Track"},
},
{
name: "metadata fills release fields",
torrentName: "Test Torrent",
files: makeFlacFiles(8, 30),
album: &metadataPb.Album{
Title: "Metadata Album",
AlbumType: "EP",
ReleaseDate: "2020-06-15",
TotalTracks: 8,
TotalDiscs: 1,
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Metadata Artist"}}},
Genres: []*metadataPb.Genre{{Name: "Electronic"}, {Name: "Ambient"}},
Label: &metadataPb.Label{Name: "Metadata Label"},
},
wantFormat: release.FormatFLAC,
wantAudioFileCount: 8,
wantArtist: "Metadata Artist",
wantAlbum: "Metadata Album",
wantYear: 2020,
wantType: release.TypeEP,
wantGenres: []string{"Electronic", "Ambient"},
wantLabel: "Metadata Label",
},
{
name: "empty torrent data",
torrentName: "",
files: nil,
album: &metadataPb.Album{
Title: "Album Only",
ReleaseDate: "2022-01-01",
Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist Only"}}},
},
wantFormat: release.FormatUnknown,
wantAudioFileCount: 0,
wantInfoHashEmpty: true,
wantArtist: "Artist Only",
wantAlbum: "Album Only",
wantYear: 2022,
},
{
name: "invalid torrent data",
torrentName: "invalid",
files: nil,
album: &metadataPb.Album{Title: "Album", Artists: []*metadataPb.ArtistCredit{{Artist: &metadataPb.Artist{Name: "Artist"}}}},
wantArtist: "Artist",
wantAlbum: "Album",
wantParseErrors: true,
},
} }
if r.AudioFileCount != 12 {
t.Errorf("AudioFileCount = %d, want 12", r.AudioFileCount) for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var torrentData []byte
if tt.name == "empty torrent data" {
torrentData = nil
} else if tt.name == "invalid torrent data" {
torrentData = []byte("garbage data that is not valid bencode")
} else {
torrentData = buildTorrentData(tt.torrentName, tt.files)
} }
if !r.HasCoverArt {
t.Error("expected HasCoverArt") r := p.ParseTorrent(torrentData, tt.album)
if r.Format != tt.wantFormat {
t.Errorf("Format = %v, want %v", r.Format, tt.wantFormat)
} }
if !r.HasCueSheet { if r.AudioFileCount != tt.wantAudioFileCount {
t.Error("expected HasCueSheet") t.Errorf("AudioFileCount = %d, want %d", r.AudioFileCount, tt.wantAudioFileCount)
} }
if !r.HasRipLog { if r.HasCoverArt != tt.wantHasCoverArt {
t.Error("expected HasRipLog") t.Errorf("HasCoverArt = %v, want %v", r.HasCoverArt, tt.wantHasCoverArt)
} }
if r.Source != release.SourceCD { if r.HasCueSheet != tt.wantHasCueSheet {
t.Errorf("Source = %v, want CD (inferred from log)", r.Source) t.Errorf("HasCueSheet = %v, want %v", r.HasCueSheet, tt.wantHasCueSheet)
} }
if !r.ParsedSuccessfully { if r.HasRipLog != tt.wantHasRipLog {
t.Errorf("ParsedSuccessfully = false, errors: %v", r.ParseErrors) t.Errorf("HasRipLog = %v, want %v", r.HasRipLog, tt.wantHasRipLog)
}
if tt.wantSource != release.SourceUnknown && r.Source != tt.wantSource {
t.Errorf("Source = %v, want %v", r.Source, tt.wantSource)
}
if tt.wantInfoHashEmpty && r.InfoHash != "" {
t.Errorf("InfoHash = %q, want empty", r.InfoHash)
}
if !tt.wantInfoHashEmpty && tt.name != "invalid torrent data" && r.InfoHash == "" {
t.Error("InfoHash should not be empty")
}
if tt.wantBitDepth != 0 && r.BitDepth != tt.wantBitDepth {
t.Errorf("BitDepth = %d, want %d", r.BitDepth, tt.wantBitDepth)
}
if tt.wantSampleRate != 0 && r.SampleRate != tt.wantSampleRate {
t.Errorf("SampleRate = %d, want %d", r.SampleRate, tt.wantSampleRate)
}
if len(tt.wantTrackNames) > 0 {
if len(r.TrackNames) != len(tt.wantTrackNames) {
t.Errorf("TrackNames length = %d, want %d", len(r.TrackNames), len(tt.wantTrackNames))
} else {
for i, name := range tt.wantTrackNames {
if r.TrackNames[i] != name {
t.Errorf("TrackNames[%d] = %q, want %q", i, r.TrackNames[i], name)
}
}
}
}
if tt.wantArtist != "" && r.Artist != tt.wantArtist {
t.Errorf("Artist = %q, want %q", r.Artist, tt.wantArtist)
}
if tt.wantAlbum != "" && r.Album != tt.wantAlbum {
t.Errorf("Album = %q, want %q", r.Album, tt.wantAlbum)
}
if tt.wantYear != 0 && r.Year != tt.wantYear {
t.Errorf("Year = %d, want %d", r.Year, tt.wantYear)
}
if tt.wantType != release.TypeUnknown && r.Type != tt.wantType {
t.Errorf("Type = %v, want %v", r.Type, tt.wantType)
}
if len(tt.wantGenres) > 0 {
if len(r.Genres) != len(tt.wantGenres) {
t.Errorf("Genres length = %d, want %d", len(r.Genres), len(tt.wantGenres))
} else {
for i, g := range tt.wantGenres {
if r.Genres[i] != g {
t.Errorf("Genres[%d] = %q, want %q", i, r.Genres[i], g)
}
}
}
}
if tt.wantLabel != "" && r.Label != tt.wantLabel {
t.Errorf("Label = %q, want %q", r.Label, tt.wantLabel)
}
if tt.wantParseErrors && len(r.ParseErrors) == 0 {
t.Error("expected ParseErrors but got none")
}
})
}
}
func TestGenericParser_DeduceFromFileSize(t *testing.T) {
p := NewGenericParser()
makeFlacRelease := func(count int, avgSizeMB float64) *release.Release {
return &release.Release{
Format: release.FormatFLAC,
AudioFileCount: count,
TotalAudioSize: int64(float64(count) * avgSizeMB * 1024 * 1024),
}
}
makeMp3Release := func(count int, avgSizeMB float64) *release.Release {
return &release.Release{
Format: release.FormatMP3,
AudioFileCount: count,
TotalAudioSize: int64(float64(count) * avgSizeMB * 1024 * 1024),
}
}
tests := []struct {
name string
release *release.Release
wantBitDepth int
wantSampleRate int
wantBitrate string
}{
{
name: "flac 16/44.1 from small files",
release: makeFlacRelease(12, 30),
wantBitDepth: 16,
wantSampleRate: 44100,
},
{
name: "flac 24/48 from medium files",
release: makeFlacRelease(12, 50),
wantBitDepth: 24,
wantSampleRate: 48000,
},
{
name: "flac 24/96 from large files",
release: makeFlacRelease(12, 100),
wantBitDepth: 24,
wantSampleRate: 96000,
},
{
name: "flac 24/192 from very large files",
release: makeFlacRelease(12, 200),
wantBitDepth: 24,
wantSampleRate: 192000,
},
{
name: "title overrides heuristic",
release: &release.Release{
Format: release.FormatFLAC,
AudioFileCount: 12,
TotalAudioSize: int64(12 * 30 * 1024 * 1024),
BitDepth: 24,
SampleRate: 48000,
},
wantBitDepth: 24,
wantSampleRate: 48000,
},
{
name: "mp3 320kbps from large files",
release: makeMp3Release(12, 10),
wantBitrate: "320 kbps",
},
{
name: "mp3 128kbps from small files",
release: makeMp3Release(12, 3.5),
wantBitrate: "128 kbps",
},
{
name: "mp3 title overrides",
release: &release.Release{
Format: release.FormatMP3,
AudioFileCount: 12,
TotalAudioSize: int64(12 * 3.5 * 1024 * 1024),
Bitrate: "320 kbps",
},
wantBitrate: "320 kbps",
},
{
name: "no audio files skips deduction",
release: &release.Release{
Format: release.FormatFLAC,
AudioFileCount: 0,
TotalAudioSize: 0,
},
wantBitDepth: 0,
wantSampleRate: 0,
},
{
name: "aac files no deduction",
release: &release.Release{
Format: release.FormatAAC,
AudioFileCount: 12,
TotalAudioSize: int64(12 * 50 * 1024 * 1024),
},
wantBitDepth: 0,
wantSampleRate: 0,
wantBitrate: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p.deduceFromFileSize(tt.release)
if tt.wantBitDepth != 0 && tt.release.BitDepth != tt.wantBitDepth {
t.Errorf("BitDepth = %d, want %d", tt.release.BitDepth, tt.wantBitDepth)
}
if tt.wantSampleRate != 0 && tt.release.SampleRate != tt.wantSampleRate {
t.Errorf("SampleRate = %d, want %d", tt.release.SampleRate, tt.wantSampleRate)
}
if tt.wantBitrate != "" && tt.release.Bitrate != tt.wantBitrate {
t.Errorf("Bitrate = %q, want %q", tt.release.Bitrate, tt.wantBitrate)
}
if tt.name == "no audio files skips deduction" || tt.name == "aac files no deduction" {
if tt.release.BitDepth != 0 || tt.release.SampleRate != 0 || tt.release.Bitrate != "" {
t.Errorf("expected no deduction, got BitDepth=%d, SampleRate=%d, Bitrate=%q",
tt.release.BitDepth, tt.release.SampleRate, tt.release.Bitrate)
}
}
})
} }
} }