- gRPC service with MusicBrainz provider - PostgreSQL schema with migrations - Service layer with database-first caching - Repository pattern for data access - YAML configuration support - Research documentation for 17 music metadata projects
32 KiB
AcoustID Codebase Analysis
Configuration System
Configuration File Format
File: acoustid.conf (INI format)
Template: acoustid.conf.dist
Structure:
[database]
name = acoustid_app
user = acoustid
password_file = /run/secrets/db_password
host = postgres
port = 5432
pool_size = 20
pool_recycle = 3600
[database_fingerprint]
name = acoustid_fingerprint
user = acoustid
password_file = /run/secrets/db_password
host = postgres
port = 5432
[database_ingest]
name = acoustid_ingest
user = acoustid
password_file = /run/secrets/db_password
host = postgres
port = 5432
[database_musicbrainz]
name = musicbrainz_db
user = acoustid_readonly
password_file = /run/secrets/mb_password
host = musicbrainz-db
port = 5432
[redis]
host = redis
port = 6379
db = 0
password_file = /run/secrets/redis_password
[nats]
servers = nats://nats:4222
stream = acoustid_submissions
consumer = acoustid_worker
[fingerprint_index]
host = index
port = 6081
protocol = http
[cluster]
role = master
name = acoustid-prod
[cluster.rate_limiter]
global_limit = 3
ip_limit = 3
[sentry]
dsn = https://...@sentry.io/...
environment = production
traces_sample_rate = 0.1
[logging]
level = INFO
Environment Variable Overrides
Pattern: ACOUSTID_<SECTION>_<KEY>
Examples:
ACOUSTID_DATABASE_NAME=acoustid_app
ACOUSTID_DATABASE_PASSWORD=secret123
ACOUSTID_REDIS_HOST=redis.example.com
ACOUSTID_FINGERPRINT_INDEX_HOST=index.example.com
Secret Files (suffix _file):
ACOUSTID_DATABASE_PASSWORD_FILE=/run/secrets/db_password
ACOUSTID_REDIS_PASSWORD_FILE=/run/secrets/redis_password
Configuration Loading
File: acoustid/config.py
import os
import configparser
from typing import Any, Optional
class Config:
"""Configuration manager with environment variable overrides."""
def __init__(self, config_file: Optional[str] = None):
self.config = configparser.ConfigParser()
# Load from file
if config_file:
self.config.read(config_file)
# Apply environment variable overrides
self._apply_env_overrides()
def _apply_env_overrides(self):
"""Apply ACOUSTID_* environment variables."""
prefix = 'ACOUSTID_'
for key, value in os.environ.items():
if not key.startswith(prefix):
continue
# Parse ACOUSTID_SECTION_KEY
parts = key[len(prefix):].lower().split('_', 1)
if len(parts) != 2:
continue
section, option = parts
# Handle _file suffix (read from file)
if option.endswith('_file'):
option = option[:-5]
with open(value) as f:
value = f.read().strip()
# Set config value
if not self.config.has_section(section):
self.config.add_section(section)
self.config.set(section, option, value)
def get(self, section: str, key: str, default: Any = None) -> Any:
"""Get configuration value."""
try:
return self.config.get(section, key)
except (configparser.NoSectionError, configparser.NoOptionError):
return default
def getint(self, section: str, key: str, default: int = 0) -> int:
"""Get integer configuration value."""
try:
return self.config.getint(section, key)
except (configparser.NoSectionError, configparser.NoOptionError):
return default
def getboolean(self, section: str, key: str, default: bool = False) -> bool:
"""Get boolean configuration value."""
try:
return self.config.getboolean(section, key)
except (configparser.NoSectionError, configparser.NoOptionError):
return default
# Global config instance
config = Config(os.environ.get('ACOUSTID_CONFIG', 'acoustid.conf'))
Configuration Access Patterns
Database Configuration:
from acoustid.config import config
db_config = {
'name': config.get('database', 'name'),
'user': config.get('database', 'user'),
'password': config.get('database', 'password'),
'host': config.get('database', 'host', 'localhost'),
'port': config.getint('database', 'port', 5432),
'pool_size': config.getint('database', 'pool_size', 20),
'pool_recycle': config.getint('database', 'pool_recycle', 3600)
}
Feature Flags:
# Check if feature is enabled
use_async_api = config.getboolean('features', 'async_api', False)
use_fpstore = config.getboolean('fingerprint_store', 'enabled', False)
Logging System
Logging Configuration
File: acoustid/logging.py
import logging
import sys
from acoustid.config import config
def setup_logging():
"""Configure logging for the application."""
# Root logger level
root_level = config.get('logging', 'level', 'INFO')
logging.basicConfig(
level=getattr(logging, root_level),
format='%(asctime)s [%(process)d] [%(levelname)s] %(name)s: %(message)s',
stream=sys.stdout
)
# Per-module log levels
for module in ['acoustid', 'sqlalchemy', 'werkzeug', 'uvicorn']:
level_key = f'level_{module}'
level = config.get('logging', level_key)
if level:
logging.getLogger(module).setLevel(getattr(logging, level))
Environment Variable Log Levels
Pattern: ACOUSTID_LOGGING_LEVEL_<MODULE>
Examples:
ACOUSTID_LOGGING_LEVEL=INFO
ACOUSTID_LOGGING_LEVEL_ACOUSTID=DEBUG
ACOUSTID_LOGGING_LEVEL_SQLALCHEMY=WARNING
ACOUSTID_LOGGING_LEVEL_WERKZEUG=ERROR
Logger Usage
Module-Level Logger:
import logging
logger = logging.getLogger(__name__)
def process_submission(submission_id):
logger.info("Processing submission %d", submission_id)
try:
# ... processing logic ...
logger.debug("Submission %d processed successfully", submission_id)
except Exception as e:
logger.error("Failed to process submission %d: %s", submission_id, e, exc_info=True)
Structured Logging (future):
import structlog
logger = structlog.get_logger()
logger.info("submission.processed",
submission_id=submission_id,
track_id=track_id,
duration_ms=duration)
Metrics and Monitoring
StatsD Metrics
File: acoustid/metrics.py
import statsd
from acoustid.config import config
# Initialize StatsD client
statsd_client = statsd.StatsClient(
host=config.get('statsd', 'host', 'localhost'),
port=config.getint('statsd', 'port', 8125),
prefix='acoustid'
)
def record_api_request(endpoint: str, method: str, status: int, duration: float):
"""Record API request metrics."""
# Counter: total requests
statsd_client.incr(f'api.requests_total.{endpoint}.{method}.{status}')
# Histogram: request duration
statsd_client.timing(f'api.request_duration_seconds.{endpoint}.{method}',
duration * 1000) # Convert to ms
def record_lookup_search(hit: bool):
"""Record lookup search result."""
statsd_client.incr('api.lookup.searches.total')
if hit:
statsd_client.incr('api.lookup.matches.total')
def record_submission():
"""Record new submission."""
statsd_client.incr('new_submissions')
def record_error(error_code: int, handled: bool = True):
"""Record error occurrence."""
if handled:
statsd_client.incr(f'api.handled_errors_total.{error_code}')
else:
statsd_client.incr('api.unhandled_errors_total')
Metrics Collection Points
API Request Handler:
from acoustid.metrics import record_api_request
import time
def handle_request(request):
start_time = time.time()
try:
response = process_request(request)
duration = time.time() - start_time
record_api_request(
endpoint=request.endpoint,
method=request.method,
status=response.status_code,
duration=duration
)
return response
except Exception as e:
duration = time.time() - start_time
record_api_request(
endpoint=request.endpoint,
method=request.method,
status=500,
duration=duration
)
raise
Lookup Handler:
from acoustid.metrics import record_lookup_search
def lookup_fingerprint(fingerprint):
results = search_index(fingerprint)
record_lookup_search(hit=len(results) > 0)
return results
Prometheus Metrics (Index)
File: src/metrics.zig (index)
const std = @import("std");
const prometheus = @import("metrics");
pub const Metrics = struct {
search_duration: prometheus.Histogram,
insert_duration: prometheus.Histogram,
segment_count: prometheus.Gauge,
memory_segment_size: prometheus.Gauge,
file_segment_size: prometheus.Gauge,
merge_duration: prometheus.Histogram,
pub fn init(allocator: std.mem.Allocator) !Metrics {
return Metrics{
.search_duration = try prometheus.Histogram.init(
allocator,
"fpindex_search_duration_seconds",
"Search operation duration",
&[_]f64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0}
),
.insert_duration = try prometheus.Histogram.init(
allocator,
"fpindex_insert_duration_seconds",
"Insert operation duration",
&[_]f64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0}
),
.segment_count = try prometheus.Gauge.init(
allocator,
"fpindex_segment_count",
"Number of segments"
),
.memory_segment_size = try prometheus.Gauge.init(
allocator,
"fpindex_memory_segment_size_bytes",
"Memory segment size in bytes"
),
.file_segment_size = try prometheus.Gauge.init(
allocator,
"fpindex_file_segment_size_bytes",
"File segment size in bytes"
),
.merge_duration = try prometheus.Histogram.init(
allocator,
"fpindex_merge_duration_seconds",
"Segment merge duration",
&[_]f64{0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0}
),
};
}
pub fn recordSearch(self: *Metrics, duration: f64) void {
self.search_duration.observe(duration);
}
pub fn recordInsert(self: *Metrics, duration: f64) void {
self.insert_duration.observe(duration);
}
pub fn updateSegmentCount(self: *Metrics, count: u64) void {
self.segment_count.set(@intToFloat(f64, count));
}
};
Health Check System
Health Check Endpoints
File: acoustid/api/health.py
from flask import Blueprint, jsonify
from acoustid.db import get_db_session
from acoustid.redis import get_redis_client
import logging
logger = logging.getLogger(__name__)
health_bp = Blueprint('health', __name__)
@health_bp.route('/_health')
def health_check():
"""Full health check with database write test."""
try:
# Test database write
db = get_db_session()
db.execute("SELECT 1")
db.execute("CREATE TEMP TABLE health_check (id INT)")
db.execute("INSERT INTO health_check VALUES (1)")
db.execute("DROP TABLE health_check")
db.commit()
# Test Redis
redis = get_redis_client()
redis.ping()
return jsonify({'status': 'ok'}), 200
except Exception as e:
logger.error("Health check failed: %s", e, exc_info=True)
return jsonify({'status': 'error', 'message': str(e)}), 503
@health_bp.route('/_health_ro')
def health_check_readonly():
"""Read-only health check (database read test only)."""
try:
# Test database read
db = get_db_session()
db.execute("SELECT 1")
# Test Redis
redis = get_redis_client()
redis.ping()
return jsonify({'status': 'ok'}), 200
except Exception as e:
logger.error("Read-only health check failed: %s", e, exc_info=True)
return jsonify({'status': 'error', 'message': str(e)}), 503
@health_bp.route('/_health_docker')
def health_check_docker():
"""Minimal health check for Docker (no external dependencies)."""
return jsonify({'status': 'ok'}), 200
Health Check Usage
Docker Compose:
healthcheck:
test: ["CMD", "wget", "-q", "-O-", "http://localhost:5000/_health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
Kubernetes:
livenessProbe:
httpGet:
path: /_health_docker
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /_health_ro
port: 5000
initialDelaySeconds: 10
periodSeconds: 5
Error Tracking (Sentry)
Sentry Integration
File: acoustid/sentry.py
import sentry_sdk
from sentry_sdk.integrations.flask import FlaskIntegration
from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
from sentry_sdk.integrations.redis import RedisIntegration
from acoustid.config import config
def init_sentry():
"""Initialize Sentry error tracking."""
dsn = config.get('sentry', 'dsn')
if not dsn:
return
sentry_sdk.init(
dsn=dsn,
environment=config.get('sentry', 'environment', 'production'),
traces_sample_rate=config.getfloat('sentry', 'traces_sample_rate', 0.1),
integrations=[
FlaskIntegration(),
SqlalchemyIntegration(),
RedisIntegration(),
],
before_send=before_send_filter,
)
def before_send_filter(event, hint):
"""Filter events before sending to Sentry."""
# Ignore certain exceptions
if 'exc_info' in hint:
exc_type, exc_value, tb = hint['exc_info']
if isinstance(exc_value, (KeyboardInterrupt, SystemExit)):
return None
# Add custom context
event.setdefault('tags', {})
event['tags']['cluster'] = config.get('cluster', 'name', 'unknown')
return event
Error Context
Adding Context:
from sentry_sdk import set_context, set_tag, set_user
def handle_submission(submission_id, user_id):
# Set user context
set_user({'id': user_id})
# Set custom tags
set_tag('submission_id', submission_id)
# Set custom context
set_context('submission', {
'id': submission_id,
'user_id': user_id,
'timestamp': time.time()
})
try:
process_submission(submission_id)
except Exception as e:
# Exception automatically sent to Sentry with context
raise
Authentication System
API Key Types
File: acoustid/auth.py
from acoustid.db import get_db_session
from acoustid.tables import Application, Account
from cachetools import TTLCache
import logging
logger = logging.getLogger(__name__)
# API key cache (1000 keys, 60 second TTL)
api_key_cache = TTLCache(maxsize=1000, ttl=60)
class APIKeyType:
APPLICATION = 'application'
USER = 'user'
DEMO = 'demo'
DEMO_API_KEY = '8XaBELgH'
def validate_application_key(api_key: str) -> Optional[Application]:
"""Validate application API key.
Returns:
Application object if valid, None otherwise
"""
# Check demo key
if api_key == DEMO_API_KEY:
return Application(
id=0,
name='Demo Application',
apikey=DEMO_API_KEY,
active=True,
rate_limit=3
)
# Check cache
if api_key in api_key_cache:
return api_key_cache[api_key]
# Query database
db = get_db_session()
app = db.query(Application).filter_by(apikey=api_key, active=True).first()
if app:
api_key_cache[api_key] = app
return app
def validate_user_key(api_key: str) -> Optional[Account]:
"""Validate user API key.
Returns:
Account object if valid, None otherwise
"""
# Check cache
cache_key = f'user:{api_key}'
if cache_key in api_key_cache:
return api_key_cache[cache_key]
# Query database
db = get_db_session()
account = db.query(Account).filter_by(apikey=api_key).first()
if account:
api_key_cache[cache_key] = account
return account
def require_api_key(key_type: str = APIKeyType.APPLICATION):
"""Decorator to require API key authentication.
Args:
key_type: Type of API key required (application or user)
"""
def decorator(func):
def wrapper(*args, **kwargs):
from flask import request, jsonify
# Get API key from request
api_key = request.values.get('client' if key_type == APIKeyType.APPLICATION else 'user')
if not api_key:
return jsonify({
'status': 'error',
'error': {
'code': 1,
'message': f'Missing {key_type} API key'
}
}), 401
# Validate API key
if key_type == APIKeyType.APPLICATION:
entity = validate_application_key(api_key)
else:
entity = validate_user_key(api_key)
if not entity:
return jsonify({
'status': 'error',
'error': {
'code': 1,
'message': f'Invalid {key_type} API key'
}
}), 401
# Store in request context
request.api_application = entity if key_type == APIKeyType.APPLICATION else None
request.api_account = entity if key_type == APIKeyType.USER else None
return func(*args, **kwargs)
wrapper.__name__ = func.__name__
return wrapper
return decorator
Authentication Usage
Lookup Endpoint (application key only):
from acoustid.auth import require_api_key, APIKeyType
@app.route('/v2/lookup', methods=['GET', 'POST'])
@require_api_key(APIKeyType.APPLICATION)
def lookup():
# request.api_application is available
application = request.api_application
# ... lookup logic ...
Submit Endpoint (application + user key):
@app.route('/v2/submit', methods=['POST'])
@require_api_key(APIKeyType.APPLICATION)
def submit():
from flask import request
# Validate user key
user_key = request.values.get('user')
if not user_key:
return jsonify({'status': 'error', 'error': {'code': 1, 'message': 'Missing user API key'}}), 401
account = validate_user_key(user_key)
if not account:
return jsonify({'status': 'error', 'error': {'code': 1, 'message': 'Invalid user API key'}}), 401
# Both application and user are authenticated
application = request.api_application
# ... submit logic ...
Rate Limiting
Rate Limiter Implementation
File: acoustid/api/ratelimit.py
from acoustid.redis import get_redis_client
from acoustid.config import config
from flask import request
import time
import logging
logger = logging.getLogger(__name__)
class RateLimiter:
"""Redis-based sliding window rate limiter."""
def __init__(self):
self.redis = get_redis_client()
self.window_duration = 20 # seconds
self.window_steps = 4
self.bucket_duration = self.window_duration // self.window_steps
self.ttl = self.window_duration + 5 # cleanup buffer
def check_limit(self, scope: str, identifier: str, limit: int) -> tuple[bool, dict]:
"""Check if request is within rate limit.
Args:
scope: Rate limit scope (global, app, ip)
identifier: Unique identifier for scope
limit: Maximum requests per window
Returns:
Tuple of (allowed, info_dict)
"""
current_time = int(time.time())
# Calculate window buckets
buckets = []
for i in range(self.window_steps):
bucket_time = current_time - (i * self.bucket_duration)
bucket_time = (bucket_time // self.bucket_duration) * self.bucket_duration
buckets.append(bucket_time)
# Increment current bucket
current_bucket_key = f"rl:bucket:{scope}:{identifier}:{buckets[0]}"
count = self.redis.incr(current_bucket_key)
self.redis.expire(current_bucket_key, self.ttl)
# Sum all buckets in window
total = 0
for bucket_time in buckets:
bucket_key = f"rl:bucket:{scope}:{identifier}:{bucket_time}"
bucket_count = self.redis.get(bucket_key)
if bucket_count:
total += int(bucket_count)
# Check limit
allowed = total <= limit
info = {
'limit': limit,
'remaining': max(0, limit - total),
'reset': buckets[0] + self.window_duration
}
if not allowed:
logger.warning("Rate limit exceeded: scope=%s, identifier=%s, total=%d, limit=%d",
scope, identifier, total, limit)
return allowed, info
rate_limiter = RateLimiter()
def check_rate_limit(application=None):
"""Check rate limits for current request.
Checks three tiers:
1. Global limit (all requests)
2. Application limit (per API key)
3. IP limit (per client IP)
Returns:
Tuple of (allowed, info_dict)
"""
# Global limit
global_limit = config.getint('cluster.rate_limiter', 'global_limit', 3)
allowed, info = rate_limiter.check_limit('global', 'all', global_limit)
if not allowed:
return False, info
# Application limit
if application:
app_limit = application.rate_limit or config.getint('cluster.rate_limiter', 'app_limit', 10)
allowed, info = rate_limiter.check_limit('app', application.apikey, app_limit)
if not allowed:
return False, info
# IP limit
ip_limit = config.getint('cluster.rate_limiter', 'ip_limit', 3)
client_ip = request.remote_addr
allowed, info = rate_limiter.check_limit('ip', client_ip, ip_limit)
return allowed, info
Rate Limit Middleware
File: acoustid/api/middleware.py
from acoustid.api.ratelimit import check_rate_limit
from flask import request, jsonify
def rate_limit_middleware():
"""Flask before_request handler for rate limiting."""
# Skip health checks
if request.path.startswith('/_health'):
return None
# Check rate limits
application = getattr(request, 'api_application', None)
allowed, info = check_rate_limit(application)
# Add rate limit headers
response_headers = {
'X-RateLimit-Limit': str(info['limit']),
'X-RateLimit-Remaining': str(info['remaining']),
'X-RateLimit-Reset': str(info['reset'])
}
if not allowed:
response = jsonify({
'status': 'error',
'error': {
'code': 5,
'message': 'Rate limit exceeded'
}
})
response.status_code = 429
for key, value in response_headers.items():
response.headers[key] = value
return response
# Store headers for later
request.rate_limit_headers = response_headers
return None
Testing Framework
Test Configuration
File: tests/conftest.py
import pytest
from acoustid.db import create_engine, create_session, Base
from acoustid.config import Config
import tempfile
import os
@pytest.fixture(scope='session')
def test_config():
"""Create test configuration."""
config = Config()
config.config.add_section('database')
config.config.set('database', 'name', 'acoustid_test')
config.config.set('database', 'user', 'acoustid')
config.config.set('database', 'password', 'acoustid')
config.config.set('database', 'host', 'localhost')
return config
@pytest.fixture
def with_database(test_config):
"""Provide test database session."""
engine = create_engine(test_config)
# Create all tables
Base.metadata.create_all(engine)
# Create session
session = create_session(engine)
yield session
# Rollback and cleanup
session.rollback()
session.close()
Base.metadata.drop_all(engine)
@pytest.fixture
def with_script(test_config):
"""Provide script context with database."""
from acoustid.script import Script
script = Script('test', config=test_config)
script.setup()
yield script
script.teardown()
@pytest.fixture
def fingerprint_fixture():
"""Predefined test fingerprint."""
return [
123456789, 987654321, 456789123, 321987654,
789123456, 654321987, 147258369, 963852741
] * 30 # ~240 hashes for 3-minute track
Test Decorators
File: tests/helpers.py
import functools
from tests.conftest import with_database, with_script
def requires_database(func):
"""Decorator to inject database session."""
@functools.wraps(func)
def wrapper(*args, **kwargs):
# Use pytest fixture
return func(*args, **kwargs)
return pytest.mark.usefixtures('with_database')(wrapper)
def requires_script(func):
"""Decorator to inject script context."""
@functools.wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return pytest.mark.usefixtures('with_script')(wrapper)
Example Tests
File: tests/test_api_lookup.py
import pytest
from acoustid.api.v2.lookup import LookupHandler
from tests.conftest import with_database, fingerprint_fixture
class TestLookupAPI:
"""Test lookup API endpoint."""
def test_lookup_with_fingerprint(self, with_database, fingerprint_fixture):
"""Test lookup with valid fingerprint."""
# Setup test data
track = create_test_track(with_database)
create_test_fingerprint(with_database, track.id, fingerprint_fixture)
# Perform lookup
handler = LookupHandler(with_database)
results = handler.lookup(
fingerprint=fingerprint_fixture,
duration=240
)
# Verify results
assert len(results) > 0
assert results[0]['id'] == str(track.gid)
assert results[0]['score'] > 0.9
def test_lookup_with_track_id(self, with_database):
"""Test lookup with track ID."""
# Setup test data
track = create_test_track(with_database)
# Perform lookup
handler = LookupHandler(with_database)
results = handler.lookup(trackid=str(track.gid))
# Verify results
assert len(results) == 1
assert results[0]['id'] == str(track.gid)
def test_lookup_no_match(self, with_database, fingerprint_fixture):
"""Test lookup with no matching fingerprint."""
handler = LookupHandler(with_database)
results = handler.lookup(
fingerprint=fingerprint_fixture,
duration=240
)
assert len(results) == 0
@pytest.mark.parametrize('duration', [0, -1, 10000])
def test_lookup_invalid_duration(self, with_database, fingerprint_fixture, duration):
"""Test lookup with invalid duration."""
handler = LookupHandler(with_database)
with pytest.raises(ValueError):
handler.lookup(fingerprint=fingerprint_fixture, duration=duration)
File: tests/test_fingerprint.py
import pytest
from acoustid.fingerprint import (
decode_fingerprint,
encode_fingerprint,
extract_query,
compare_fingerprints
)
from tests.conftest import fingerprint_fixture
class TestFingerprint:
"""Test fingerprint utilities."""
def test_encode_decode(self, fingerprint_fixture):
"""Test fingerprint encoding and decoding."""
encoded = encode_fingerprint(fingerprint_fixture)
decoded = decode_fingerprint(encoded)
assert decoded == fingerprint_fixture
def test_extract_query(self, fingerprint_fixture):
"""Test query extraction."""
query = extract_query(fingerprint_fixture, max_terms=50)
assert len(query) <= 50
assert all(term in fingerprint_fixture for term in query)
def test_compare_identical(self, fingerprint_fixture):
"""Test comparison of identical fingerprints."""
score = compare_fingerprints(fingerprint_fixture, fingerprint_fixture)
assert score == 1.0
def test_compare_different(self, fingerprint_fixture):
"""Test comparison of different fingerprints."""
other_fp = [x + 1000 for x in fingerprint_fixture]
score = compare_fingerprints(fingerprint_fixture, other_fp)
assert score < 0.1
Code Organization Patterns
Service Layer Pattern
File: acoustid/data/fingerprint.py
from acoustid.db import get_db_session
from acoustid.tables import Fingerprint, Track
from acoustid.fpstore import FingerprintIndexClient
import logging
logger = logging.getLogger(__name__)
class FingerprintService:
"""Service for fingerprint operations."""
def __init__(self, db_session=None, index_client=None):
self.db = db_session or get_db_session()
self.index = index_client or FingerprintIndexClient()
def search(self, fingerprint, duration, limit=10):
"""Search for matching fingerprints."""
# Extract query terms
query_terms = extract_query(fingerprint)
# Search index
candidates = self.index.search(query_terms, limit=limit * 2)
# Fetch from database
fp_ids = [c[0] for c in candidates]
fingerprints = self.db.query(Fingerprint).filter(
Fingerprint.id.in_(fp_ids),
Fingerprint.length.between(duration - 5, duration + 5)
).all()
# Score and sort
results = []
for fp in fingerprints:
score = compare_fingerprints(fingerprint, fp.fingerprint)
results.append((fp, score))
results.sort(key=lambda x: x[1], reverse=True)
return results[:limit]
def insert(self, track_id, fingerprint, duration, **metadata):
"""Insert new fingerprint."""
# Create fingerprint record
fp = Fingerprint(
track_id=track_id,
fingerprint=fingerprint,
length=duration,
**metadata
)
self.db.add(fp)
self.db.flush()
# Update index
query_terms = extract_query(fingerprint)
self.index.insert(fp.id, query_terms)
self.db.commit()
logger.info("Inserted fingerprint %d for track %d", fp.id, track_id)
return fp
Repository Pattern
File: acoustid/data/track.py
from acoustid.tables import Track, TrackMBID
from sqlalchemy.orm import joinedload
class TrackRepository:
"""Repository for track data access."""
def __init__(self, db_session):
self.db = db_session
def get_by_id(self, track_id):
"""Get track by ID."""
return self.db.query(Track).filter_by(id=track_id).first()
def get_by_gid(self, gid):
"""Get track by public GID."""
return self.db.query(Track).filter_by(gid=gid).first()
def get_with_mbids(self, track_id):
"""Get track with all linked MBIDs."""
return self.db.query(Track).options(
joinedload(Track.mbids)
).filter_by(id=track_id).first()
def create(self):
"""Create new track."""
import uuid
track = Track(gid=uuid.uuid4())
self.db.add(track)
self.db.flush()
return track
def link_mbid(self, track_id, mbid):
"""Link track to MusicBrainz recording."""
link = TrackMBID(track_id=track_id, mbid=mbid)
self.db.add(link)
self.db.flush()
return link