Files
music-agregator/internal/tracker/generic_parser.go
T

352 lines
8.4 KiB
Go

package tracker
import (
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/anacrolix/torrent/metainfo"
"github.com/rs/zerolog/log"
metadataPb "homelab.lan/music-agregator/gen/metadata/v1"
"homelab.lan/music-agregator/internal/release"
)
var (
bitratePattern = regexp.MustCompile(`(?i)(\d{2,3})\s*kbps`)
hiResPatterns = []*regexp.Regexp{
regexp.MustCompile(`(?i)(\d{1,2})\s*[Bb]it\s*[-/]?\s*(\d{2,3}(?:\.\d)?)\s*[kK][Hh][Zz]`),
regexp.MustCompile(`(?i)\[?\s*(?:FLAC|Flac)\s+(\d{1,2})\s*[-/]\s*(\d{2,3}(?:\.\d)?)\s*\]?`),
regexp.MustCompile(`(?i)\[?\s*(\d{1,2})\s*[Bb]it\s*\]?`),
}
sourcePattern = regexp.MustCompile(`(?i)\[(CD|WEB|Vinyl|LP|Cassette|MC|DVD|Blu-?Ray|SACD|DAT)\]`)
ripTypePattern = regexp.MustCompile(`(?i)(vinyl\s*rip|SACD[- ]?R|HDCD|DSD\d*|tape\s*rip)`)
)
var audioExtensions = map[string]release.AudioFormat{
".flac": release.FormatFLAC,
".mp3": release.FormatMP3,
".aac": release.FormatAAC,
".m4a": release.FormatAAC,
".ape": release.FormatAPE,
".wv": release.FormatWavPack,
".alac": release.FormatALAC,
".ogg": release.FormatOGG,
".wav": release.FormatWAV,
}
type GenericParser struct{}
func NewGenericParser() *GenericParser {
return &GenericParser{}
}
func (p *GenericParser) ParseTorrent(torrentData []byte, album *metadataPb.Album) *release.Release {
r := &release.Release{}
p.fillFromMetadata(r, album)
p.fillFromTorrent(r, torrentData)
r.ParsedSuccessfully = r.Artist != "" && r.Album != ""
if !r.ParsedSuccessfully {
r.ParseErrors = append(r.ParseErrors, "missing artist or album")
}
return r
}
func (p *GenericParser) Parse(title string) *release.Release {
r := &release.Release{RawTitle: title}
p.fillFromTitle(r, title)
r.ParsedSuccessfully = r.Artist != "" && r.Album != ""
if !r.ParsedSuccessfully {
r.ParseErrors = append(r.ParseErrors, "missing artist or album")
}
return r
}
func (p *GenericParser) fillFromMetadata(r *release.Release, album *metadataPb.Album) {
if album == nil {
return
}
r.Album = album.GetTitle()
if len(album.GetArtists()) > 0 {
r.Artist = album.GetArtists()[0].GetArtist().GetName()
}
if album.GetReleaseDate() != "" {
if year, err := strconv.Atoi(album.GetReleaseDate()[:4]); err == nil {
r.Year = year
}
}
switch strings.ToLower(album.GetAlbumType()) {
case "album":
r.Type = release.TypeAlbum
case "ep":
r.Type = release.TypeEP
case "single":
r.Type = release.TypeSingle
case "compilation":
r.Type = release.TypeCompilation
case "soundtrack":
r.Type = release.TypeSoundtrack
case "live":
r.Type = release.TypeLive
}
for _, g := range album.GetGenres() {
r.Genres = append(r.Genres, g.GetName())
}
if album.GetLabel() != nil {
r.Label = album.GetLabel().GetName()
}
r.TrackCount = int(album.GetTotalTracks())
r.ReleaseCount = int(album.GetTotalDiscs())
log.Trace().
Str("artist", r.Artist).
Str("album", r.Album).
Int("year", r.Year).
Str("type", r.Type.String()).
Msg("filled from metadata")
}
func (p *GenericParser) fillFromTorrent(r *release.Release, torrentData []byte) {
if len(torrentData) == 0 {
return
}
mi, err := metainfo.Load(strings.NewReader(string(torrentData)))
if err != nil {
log.Error().Err(err).Msg("failed to parse torrent data")
r.ParseErrors = append(r.ParseErrors, "failed to parse torrent: "+err.Error())
return
}
info, err := mi.UnmarshalInfo()
if err != nil {
log.Error().Err(err).Msg("failed to unmarshal torrent info")
r.ParseErrors = append(r.ParseErrors, "failed to unmarshal torrent info: "+err.Error())
return
}
r.RawTitle = info.Name
r.InfoHash = mi.HashInfoBytes().HexString()
formatCounts := make(map[release.AudioFormat]int)
formatSizes := make(map[release.AudioFormat]int64)
if len(info.Files) == 0 {
ext := strings.ToLower(filepath.Ext(info.Name))
if fmt, ok := audioExtensions[ext]; ok {
r.Format = fmt
r.AudioFileCount = 1
r.TotalAudioSize = info.Length
}
} else {
for _, f := range info.Files {
path := filepath.Join(f.Path...)
ext := strings.ToLower(filepath.Ext(path))
name := strings.TrimSuffix(filepath.Base(path), ext)
if fmt, ok := audioExtensions[ext]; ok {
formatCounts[fmt]++
formatSizes[fmt] += f.Length
r.TrackNames = append(r.TrackNames, cleanTrackName(name))
}
switch ext {
case ".jpg", ".jpeg", ".png":
r.HasCoverArt = true
case ".cue":
r.HasCueSheet = true
case ".log":
r.HasRipLog = true
}
}
var dominantFormat release.AudioFormat
var maxCount int
for fmt, count := range formatCounts {
if count > maxCount {
maxCount = count
dominantFormat = fmt
}
}
r.Format = dominantFormat
r.AudioFileCount = maxCount
r.TotalAudioSize = formatSizes[dominantFormat]
}
if r.HasRipLog {
r.Source = release.SourceCD
}
if r.TrackCount == 0 {
r.TrackCount = r.AudioFileCount
}
p.fillFromTitle(r, info.Name)
p.deduceFromFileSize(r)
log.Trace().
Str("hash", r.InfoHash).
Str("format", r.Format.String()).
Int("audio_files", r.AudioFileCount).
Int64("audio_size", r.TotalAudioSize).
Bool("cover", r.HasCoverArt).
Bool("cue", r.HasCueSheet).
Bool("log", r.HasRipLog).
Int("bit_depth", r.BitDepth).
Int("sample_rate", r.SampleRate).
Str("bitrate", r.Bitrate).
Msg("filled from torrent")
}
func (p *GenericParser) fillFromTitle(r *release.Release, title string) {
if title == "" {
return
}
if m := bitratePattern.FindStringSubmatch(title); len(m) > 1 {
r.Bitrate = m[1] + " kbps"
}
for _, pattern := range hiResPatterns {
m := pattern.FindStringSubmatch(title)
if len(m) < 2 {
continue
}
if r.BitDepth == 0 {
if bd, err := strconv.Atoi(m[1]); err == nil {
r.BitDepth = bd
}
}
if len(m) > 2 && r.SampleRate == 0 {
if sr, err := strconv.ParseFloat(m[2], 64); err == nil {
r.SampleRate = int(sr * 1000)
}
}
if r.BitDepth > 0 {
break
}
}
if m := sourcePattern.FindStringSubmatch(title); len(m) > 1 && r.Source == release.SourceUnknown {
switch strings.ToUpper(m[1]) {
case "CD":
r.Source = release.SourceCD
case "WEB":
r.Source = release.SourceWEB
case "VINYL", "LP":
r.Source = release.SourceVinyl
case "CASSETTE", "MC":
r.Source = release.SourceCassette
case "DVD":
r.Source = release.SourceDVD
case "BLU-RAY", "BLURAY":
r.Source = release.SourceBluRay
}
}
if m := ripTypePattern.FindStringSubmatch(title); len(m) > 1 {
r.RipType = m[1]
}
log.Trace().
Str("bitrate", r.Bitrate).
Int("bit_depth", r.BitDepth).
Int("sample_rate", r.SampleRate).
Str("source", r.Source.String()).
Str("rip_type", r.RipType).
Msg("filled from title")
}
func (p *GenericParser) deduceFromFileSize(r *release.Release) {
if r.AudioFileCount == 0 || r.TotalAudioSize == 0 {
return
}
avgFileSize := r.TotalAudioSize / int64(r.AudioFileCount)
avgFileSizeMB := float64(avgFileSize) / (1024 * 1024)
switch {
case r.Format.IsLossless():
if r.BitDepth > 0 && r.SampleRate > 0 {
return
}
// Average FLAC file size per ~4 min track:
// 16/44.1 ≈ 25-35 MB 24/48 ≈ 40-60 MB
// 24/96 ≈ 80-120 MB 24/192 ≈ 160-240 MB
switch {
case avgFileSizeMB >= 130:
p.setIfMissing(r, 24, 192000)
case avgFileSizeMB >= 65:
p.setIfMissing(r, 24, 96000)
case avgFileSizeMB >= 38:
p.setIfMissing(r, 24, 48000)
default:
p.setIfMissing(r, 16, 44100)
}
log.Trace().
Float64("avg_file_mb", avgFileSizeMB).
Int("deduced_bit_depth", r.BitDepth).
Int("deduced_sample_rate", r.SampleRate).
Msg("deduced lossless quality from file size")
case r.Format == release.FormatMP3:
if r.Bitrate != "" {
return
}
// Average MP3 file size per ~4 min track:
// 128 kbps ≈ 3.5-4 MB 192 kbps ≈ 5-6 MB
// 256 kbps ≈ 7-8 MB 320 kbps ≈ 9-10 MB
switch {
case avgFileSizeMB >= 8.5:
r.Bitrate = "320 kbps"
case avgFileSizeMB >= 6.5:
r.Bitrate = "256 kbps"
case avgFileSizeMB >= 4.5:
r.Bitrate = "192 kbps"
default:
r.Bitrate = "128 kbps"
}
log.Trace().
Float64("avg_file_mb", avgFileSizeMB).
Str("deduced_bitrate", r.Bitrate).
Msg("deduced mp3 bitrate from file size")
}
}
func (p *GenericParser) setIfMissing(r *release.Release, bitDepth int, sampleRate int) {
if r.BitDepth == 0 {
r.BitDepth = bitDepth
}
if r.SampleRate == 0 {
r.SampleRate = sampleRate
}
}
var trackNumberPrefix = regexp.MustCompile(`^\d{1,3}[\s.\-]+`)
func cleanTrackName(name string) string {
cleaned := trackNumberPrefix.ReplaceAllString(name, "")
if cleaned == "" {
return name
}
return strings.TrimSpace(cleaned)
}