a1f6701bac
- gRPC service with MusicBrainz provider - PostgreSQL schema with migrations - Service layer with database-first caching - Repository pattern for data access - YAML configuration support - Research documentation for 17 music metadata projects
1180 lines
28 KiB
Markdown
1180 lines
28 KiB
Markdown
# Lidarr Metadata API - Codebase Analysis
|
|
|
|
## Project Structure
|
|
|
|
```
|
|
LidarrAPI.Metadata/
|
|
├── lidarrmetadata/ # Main package
|
|
│ ├── __init__.py # Version and package metadata
|
|
│ ├── server.py # API server entry point
|
|
│ ├── crawler.py # Background crawler entry point
|
|
│ ├── app.py # Quart application factory + routes
|
|
│ ├── api.py # Business logic layer
|
|
│ ├── provider.py # Provider mixin architecture
|
|
│ ├── cache.py # Multi-tier cache implementation
|
|
│ ├── config.py # Configuration metaclass system
|
|
│ ├── util.py # Utility functions
|
|
│ ├── sql/ # MusicBrainz SQL queries
|
|
│ │ ├── artist.sql
|
|
│ │ ├── album.sql
|
|
│ │ ├── updated_artists.sql
|
|
│ │ └── updated_albums.sql
|
|
│ └── providers/ # Individual provider implementations
|
|
│ ├── __init__.py
|
|
│ ├── musicbrainz_db.py
|
|
│ ├── solr_search.py
|
|
│ ├── fanart.py
|
|
│ ├── theaudiodb.py
|
|
│ ├── wikipedia.py
|
|
│ └── spotify.py
|
|
├── tests/ # Test suite
|
|
│ ├── __init__.py
|
|
│ ├── test_config.py # Configuration tests (152 lines)
|
|
│ ├── test_provider.py # Provider tests
|
|
│ ├── test_cache.py # Cache tests
|
|
│ ├── test_api.py # API endpoint tests
|
|
│ ├── test_util.py # Utility function tests
|
|
│ └── test_app.py # Application tests
|
|
├── docker/ # Docker configuration
|
|
│ ├── Dockerfile
|
|
│ ├── docker-compose.yml
|
|
│ ├── docker-compose.dev.yml
|
|
│ ├── docker-compose.prod.yml
|
|
│ └── docker-compose.crawler.yml
|
|
├── scripts/ # Deployment scripts
|
|
│ ├── init-db.sh
|
|
│ ├── setup-amqp.sh
|
|
│ ├── create-indices.sql
|
|
│ └── backup.sh
|
|
├── pyproject.toml # Poetry dependencies
|
|
├── poetry.lock # Locked dependencies
|
|
├── azure-pipelines.yml # CI/CD configuration
|
|
├── sonar-project.properties # SonarCloud configuration
|
|
├── README.md # Project documentation
|
|
└── LICENSE # GPL-3.0 license
|
|
```
|
|
|
|
## Configuration System
|
|
|
|
### Metaclass-Based Configuration
|
|
|
|
**File**: `lidarrmetadata/config.py`
|
|
|
|
**Design pattern**: Metaclass with environment variable override support
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
import os
|
|
from typing import Any
|
|
|
|
class ConfigMeta(type):
|
|
"""Metaclass that allows environment variable overrides"""
|
|
|
|
def __getattribute__(cls, name: str) -> Any:
|
|
# Skip special attributes
|
|
if name.startswith('_'):
|
|
return super().__getattribute__(name)
|
|
|
|
# Check for environment variable override
|
|
env_key = f"{cls.__name__.upper()}_{name.upper()}"
|
|
if env_key in os.environ:
|
|
value = os.environ[env_key]
|
|
|
|
# Type conversion
|
|
original = super().__getattribute__(name)
|
|
if isinstance(original, bool):
|
|
return value.lower() in ('true', '1', 'yes')
|
|
elif isinstance(original, int):
|
|
return int(value)
|
|
elif isinstance(original, float):
|
|
return float(value)
|
|
else:
|
|
return value
|
|
|
|
# Check for nested override (double underscore)
|
|
if '__' in name:
|
|
parts = name.split('__')
|
|
value = super().__getattribute__(parts[0])
|
|
for part in parts[1:]:
|
|
if isinstance(value, dict):
|
|
value = value[part]
|
|
else:
|
|
value = getattr(value, part)
|
|
return value
|
|
|
|
return super().__getattribute__(name)
|
|
```
|
|
|
|
### Configuration Classes
|
|
|
|
#### DefaultConfig
|
|
|
|
**Base configuration** with sensible defaults:
|
|
|
|
```python
|
|
class DefaultConfig(metaclass=ConfigMeta):
|
|
"""Default configuration"""
|
|
|
|
# Application
|
|
APPLICATION_ROOT = '/'
|
|
PORT = 5001
|
|
DEBUG = False
|
|
TESTING = False
|
|
|
|
# Database
|
|
DATABASE = {
|
|
'host': 'localhost',
|
|
'port': 5432,
|
|
'database': 'musicbrainz_db',
|
|
'user': 'abc',
|
|
'password': 'abc',
|
|
'min_pool_size': 10,
|
|
'max_pool_size': 50,
|
|
'command_timeout': 30
|
|
}
|
|
|
|
# Cache
|
|
CACHE = {
|
|
'redis_url': 'redis://localhost:6379/0',
|
|
'postgres_url': 'postgresql://abc:abc@localhost/lm_cache_db',
|
|
'namespace': 'lm3.7',
|
|
'default_ttl': 604800, # 7 days
|
|
'max_memory': '512mb'
|
|
}
|
|
|
|
# Solr
|
|
SOLR = {
|
|
'url': 'http://localhost:8983/solr',
|
|
'artist_core': 'artist',
|
|
'album_core': 'release-group',
|
|
'timeout': 5,
|
|
'rows': 10
|
|
}
|
|
|
|
# RabbitMQ
|
|
RABBITMQ = {
|
|
'host': 'localhost',
|
|
'port': 5672,
|
|
'user': 'abc',
|
|
'password': 'abc',
|
|
'exchange': 'search.index',
|
|
'artist_queue': 'search.index.artist',
|
|
'album_queue': 'search.index.album'
|
|
}
|
|
|
|
# External APIs
|
|
FANART_API_KEY = None
|
|
THEAUDIODB_API_KEY = '1'
|
|
SPOTIFY_CLIENT_ID = None
|
|
SPOTIFY_CLIENT_SECRET = None
|
|
LASTFM_API_KEY = None
|
|
LASTFM_API_SECRET = None
|
|
|
|
# Wikipedia
|
|
WIKIPEDIA = {
|
|
'timeout': 2,
|
|
'max_connections_per_host': 1,
|
|
'user_agent': 'LidarrMetadataAPI/10.0.0 (https://github.com/Lidarr/LidarrAPI.Metadata)',
|
|
'languages': ['en', 'fr', 'de', 'es', 'it', 'ja', 'zh', 'ru', 'pt', 'nl', 'sv', 'fi', 'no', 'da', 'pl', 'cs', 'hu', 'ro', 'tr', 'el', 'he', 'ar', 'fa', 'hi', 'th', 'ko', 'vi', 'id', 'ms', 'tl', 'bn', 'ta']
|
|
}
|
|
|
|
# Cloudflare
|
|
CLOUDFLARE_ZONE_ID = None
|
|
CLOUDFLARE_API_TOKEN = None
|
|
|
|
# Monitoring
|
|
SENTRY_DSN = None
|
|
SENTRY_ENVIRONMENT = 'development'
|
|
STATSD_HOST = None
|
|
STATSD_PORT = 8125
|
|
|
|
# Rate Limiting
|
|
RATE_LIMITER = 'null' # 'null', 'simple', 'redis'
|
|
RATE_LIMIT_REQUESTS = 100
|
|
RATE_LIMIT_WINDOW = 60
|
|
|
|
# Crawler
|
|
CRAWLER_INTERVAL = 3600 # 1 hour
|
|
CRAWLER_BATCH_SIZE = 100
|
|
CRAWLER_INVALIDATE_ONLY = False
|
|
|
|
# Security
|
|
INVALIDATE_APIKEY = 'replaceme'
|
|
CORS_ORIGINS = ['*']
|
|
```
|
|
|
|
#### DevelopmentConfig
|
|
|
|
**Development-specific overrides**:
|
|
|
|
```python
|
|
class DevelopmentConfig(DefaultConfig):
|
|
"""Development configuration"""
|
|
|
|
DEBUG = True
|
|
TESTING = False
|
|
|
|
# Disable Sentry in development
|
|
SENTRY_DSN = None
|
|
|
|
# Use null rate limiter
|
|
RATE_LIMITER = 'null'
|
|
|
|
# Shorter cache TTL for testing
|
|
CACHE = {
|
|
**DefaultConfig.CACHE,
|
|
'default_ttl': 300 # 5 minutes
|
|
}
|
|
```
|
|
|
|
#### TestConfig
|
|
|
|
**Test-specific configuration**:
|
|
|
|
```python
|
|
class TestConfig(DefaultConfig):
|
|
"""Test configuration"""
|
|
|
|
DEBUG = False
|
|
TESTING = True
|
|
|
|
# In-memory SQLite for cache
|
|
CACHE = {
|
|
'redis_url': 'redis://localhost:6379/15', # Separate DB
|
|
'postgres_url': 'sqlite:///:memory:',
|
|
'namespace': 'test',
|
|
'default_ttl': 60
|
|
}
|
|
|
|
# Mock external APIs
|
|
FANART_API_KEY = 'test-key'
|
|
SPOTIFY_CLIENT_ID = 'test-client-id'
|
|
SPOTIFY_CLIENT_SECRET = 'test-client-secret'
|
|
|
|
# Disable Sentry
|
|
SENTRY_DSN = None
|
|
```
|
|
|
|
#### ProductionConfig
|
|
|
|
**Production-specific configuration**:
|
|
|
|
```python
|
|
class ProductionConfig(DefaultConfig):
|
|
"""Production configuration"""
|
|
|
|
DEBUG = False
|
|
TESTING = False
|
|
|
|
# Use Redis rate limiter
|
|
RATE_LIMITER = 'redis'
|
|
|
|
# Longer cache TTL
|
|
CACHE = {
|
|
**DefaultConfig.CACHE,
|
|
'default_ttl': 2592000 # 30 days
|
|
}
|
|
|
|
# Enable Sentry
|
|
SENTRY_ENVIRONMENT = 'production'
|
|
```
|
|
|
|
### Configuration Selection
|
|
|
|
**Environment variable**:
|
|
```bash
|
|
export LIDARR_METADATA_CONFIG=lidarrmetadata.config.ProductionConfig
|
|
```
|
|
|
|
**Loading configuration**:
|
|
```python
|
|
import os
|
|
import importlib
|
|
|
|
def load_config():
|
|
"""Load configuration from environment variable"""
|
|
config_path = os.environ.get(
|
|
'LIDARR_METADATA_CONFIG',
|
|
'lidarrmetadata.config.DefaultConfig'
|
|
)
|
|
|
|
module_path, class_name = config_path.rsplit('.', 1)
|
|
module = importlib.import_module(module_path)
|
|
config_class = getattr(module, class_name)
|
|
|
|
return config_class
|
|
```
|
|
|
|
### Environment Variable Override Examples
|
|
|
|
**Simple override**:
|
|
```bash
|
|
export DEFAULTCONFIG_PORT=8080
|
|
export DEFAULTCONFIG_DEBUG=true
|
|
```
|
|
|
|
**Nested override** (double underscore):
|
|
```bash
|
|
export DEFAULTCONFIG_DATABASE__HOST=musicbrainz-db
|
|
export DEFAULTCONFIG_DATABASE__PORT=5433
|
|
export DEFAULTCONFIG_CACHE__REDIS_URL=redis://redis:6379/1
|
|
```
|
|
|
|
**Type conversion**:
|
|
- Booleans: `true`, `1`, `yes` → `True`
|
|
- Integers: `"5001"` → `5001`
|
|
- Floats: `"3.14"` → `3.14`
|
|
- Strings: No conversion
|
|
|
|
## Logging System
|
|
|
|
### Logger Configuration
|
|
|
|
**File**: `lidarrmetadata/util.py`
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
import logging
|
|
import sys
|
|
|
|
def setup_logging(config):
|
|
"""Configure logging for application"""
|
|
|
|
# Root logger
|
|
root_logger = logging.getLogger()
|
|
root_logger.setLevel(logging.DEBUG if config.DEBUG else logging.INFO)
|
|
|
|
# Console handler
|
|
console_handler = logging.StreamHandler(sys.stdout)
|
|
console_handler.setLevel(logging.DEBUG if config.DEBUG else logging.INFO)
|
|
|
|
# Formatter
|
|
formatter = logging.Formatter(
|
|
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
)
|
|
console_handler.setFormatter(formatter)
|
|
|
|
root_logger.addHandler(console_handler)
|
|
|
|
# Suppress noisy loggers
|
|
logging.getLogger('asyncio').setLevel(logging.WARNING)
|
|
logging.getLogger('aiohttp').setLevel(logging.WARNING)
|
|
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
|
```
|
|
|
|
### Per-Module Loggers
|
|
|
|
**Usage pattern**:
|
|
|
|
```python
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Debug logging
|
|
logger.debug(f"Fetching artist {mbid} from cache")
|
|
|
|
# Info logging
|
|
logger.info(f"Artist {mbid} cache miss, querying database")
|
|
|
|
# Warning logging
|
|
logger.warning(f"FanArt.tv timeout for artist {mbid}, using fallback")
|
|
|
|
# Error logging
|
|
logger.error(f"Failed to fetch artist {mbid}: {error}", exc_info=True)
|
|
```
|
|
|
|
### Log Levels
|
|
|
|
| Level | Usage | Example |
|
|
|-------|-------|---------|
|
|
| **DEBUG** | Detailed diagnostic info | Cache key generation, SQL queries |
|
|
| **INFO** | General informational messages | Request summaries, cache operations |
|
|
| **WARNING** | Warning messages | Provider timeouts, fallback usage |
|
|
| **ERROR** | Error messages | Unhandled exceptions, data inconsistencies |
|
|
| **CRITICAL** | Critical errors | Database connection failures |
|
|
|
|
## Sentry Integration
|
|
|
|
### Initialization
|
|
|
|
**File**: `lidarrmetadata/server.py`
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
import sentry_sdk
|
|
from sentry_sdk.integrations.flask import FlaskIntegration
|
|
|
|
def init_sentry(config):
|
|
"""Initialize Sentry error tracking"""
|
|
|
|
if not config.SENTRY_DSN:
|
|
return
|
|
|
|
sentry_sdk.init(
|
|
dsn=config.SENTRY_DSN,
|
|
integrations=[FlaskIntegration()],
|
|
release=f"lidarr-metadata@{__version__}",
|
|
environment=config.SENTRY_ENVIRONMENT,
|
|
traces_sample_rate=0.1, # 10% transaction sampling
|
|
before_send=sentry_rate_limiter
|
|
)
|
|
```
|
|
|
|
### Redis-Based Rate Limiting
|
|
|
|
**Purpose**: Prevent duplicate error reports and alert fatigue
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
import hashlib
|
|
import aioredis
|
|
|
|
redis = None # Global Redis connection
|
|
|
|
async def sentry_rate_limiter(event, hint):
|
|
"""Rate limit Sentry events using Redis"""
|
|
|
|
if not redis:
|
|
return event
|
|
|
|
# Generate error hash
|
|
error_type = event.get('exception', {}).get('type', 'unknown')
|
|
error_value = event.get('exception', {}).get('value', 'unknown')
|
|
error_hash = hashlib.md5(f"{error_type}:{error_value}".encode()).hexdigest()
|
|
|
|
key = f"lm3.7:sentry:{error_hash}"
|
|
|
|
# Check if error seen recently (1 hour window)
|
|
if await redis.exists(key):
|
|
logger.debug(f"Sentry event {error_hash} rate limited")
|
|
return None # Drop event
|
|
|
|
# Mark error as seen
|
|
await redis.setex(key, 3600, "1")
|
|
|
|
return event
|
|
```
|
|
|
|
### Error Context
|
|
|
|
**Adding context to Sentry events**:
|
|
|
|
```python
|
|
from sentry_sdk import configure_scope
|
|
|
|
async def get_artist(mbid):
|
|
"""Get artist metadata with Sentry context"""
|
|
|
|
with configure_scope() as scope:
|
|
scope.set_tag('entity_type', 'artist')
|
|
scope.set_tag('mbid', mbid)
|
|
scope.set_context('request', {
|
|
'mbid': mbid,
|
|
'endpoint': '/artist/{mbid}'
|
|
})
|
|
|
|
try:
|
|
artist = await fetch_artist(mbid)
|
|
return artist
|
|
except Exception as e:
|
|
scope.set_extra('error_details', str(e))
|
|
raise
|
|
```
|
|
|
|
### Release Tracking
|
|
|
|
**CI/CD integration**:
|
|
|
|
```bash
|
|
# Create Sentry release
|
|
sentry-cli releases new "lidarr-metadata@${GIT_SHA}"
|
|
|
|
# Associate commits
|
|
sentry-cli releases set-commits "lidarr-metadata@${GIT_SHA}" --auto
|
|
|
|
# Finalize release
|
|
sentry-cli releases finalize "lidarr-metadata@${GIT_SHA}"
|
|
```
|
|
|
|
## Telegraf Integration
|
|
|
|
### StatsD Client
|
|
|
|
**File**: `lidarrmetadata/util.py`
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
import statsd
|
|
|
|
stats_client = None
|
|
|
|
def init_statsd(config):
|
|
"""Initialize StatsD client"""
|
|
|
|
global stats_client
|
|
|
|
if not config.STATSD_HOST:
|
|
stats_client = statsd.StatsClient(prefix='lidarr.metadata') # No-op client
|
|
return
|
|
|
|
stats_client = statsd.StatsClient(
|
|
host=config.STATSD_HOST,
|
|
port=config.STATSD_PORT,
|
|
prefix='lidarr.metadata'
|
|
)
|
|
```
|
|
|
|
### Metrics Collection
|
|
|
|
**Request counters**:
|
|
|
|
```python
|
|
# Increment request counter
|
|
stats_client.incr('requests.artist')
|
|
stats_client.incr('requests.album')
|
|
stats_client.incr('requests.search')
|
|
```
|
|
|
|
**Response time tracking**:
|
|
|
|
```python
|
|
# Timer context manager
|
|
with stats_client.timer('response_time.artist'):
|
|
artist = await get_artist(mbid)
|
|
|
|
# Manual timing
|
|
start = time.time()
|
|
artist = await get_artist(mbid)
|
|
duration = time.time() - start
|
|
stats_client.timing('response_time.artist', duration * 1000) # milliseconds
|
|
```
|
|
|
|
**Cache metrics**:
|
|
|
|
```python
|
|
# Cache hit
|
|
stats_client.incr('cache.hit')
|
|
stats_client.incr('cache.hit.redis')
|
|
|
|
# Cache miss
|
|
stats_client.incr('cache.miss')
|
|
stats_client.incr('cache.miss.redis')
|
|
|
|
# Cache size
|
|
stats_client.gauge('cache.size.redis', redis_key_count)
|
|
```
|
|
|
|
**Provider metrics**:
|
|
|
|
```python
|
|
# Provider request
|
|
stats_client.incr('provider.fanart.request')
|
|
stats_client.incr('provider.wikipedia.request')
|
|
|
|
# Provider error
|
|
stats_client.incr('provider.fanart.error')
|
|
|
|
# Provider timeout
|
|
stats_client.incr('provider.fanart.timeout')
|
|
```
|
|
|
|
## Health Checks
|
|
|
|
### Health Endpoint
|
|
|
|
**No dedicated health endpoint**: The root endpoint (`/`) serves as health check
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
@app.route('/')
|
|
async def root():
|
|
"""Health check and version info"""
|
|
return {
|
|
'version': __version__,
|
|
'status': 'healthy',
|
|
'timestamp': datetime.utcnow().isoformat()
|
|
}
|
|
```
|
|
|
|
### Component Health Checks
|
|
|
|
**Database health**:
|
|
|
|
```python
|
|
async def check_database_health():
|
|
"""Check MusicBrainz database connectivity"""
|
|
try:
|
|
async with db_pool.acquire() as conn:
|
|
await conn.fetchval('SELECT 1')
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Database health check failed: {e}")
|
|
return False
|
|
```
|
|
|
|
**Redis health**:
|
|
|
|
```python
|
|
async def check_redis_health():
|
|
"""Check Redis connectivity"""
|
|
try:
|
|
await redis.ping()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Redis health check failed: {e}")
|
|
return False
|
|
```
|
|
|
|
**Solr health**:
|
|
|
|
```python
|
|
async def check_solr_health():
|
|
"""Check Solr connectivity"""
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
url = f"{config.SOLR['url']}/admin/ping"
|
|
async with session.get(url, timeout=5) as response:
|
|
return response.status == 200
|
|
except Exception as e:
|
|
logger.error(f"Solr health check failed: {e}")
|
|
return False
|
|
```
|
|
|
|
## Authentication and Authorization
|
|
|
|
### API Key Authentication
|
|
|
|
**Implementation**: Single API key for invalidation endpoint only
|
|
|
|
**Configuration**:
|
|
```python
|
|
INVALIDATE_APIKEY = 'replaceme' # MUST change in production
|
|
```
|
|
|
|
**Middleware**:
|
|
|
|
```python
|
|
from quart import request, abort
|
|
|
|
async def require_api_key():
|
|
"""Require API key for protected endpoints"""
|
|
|
|
api_key = request.headers.get('X-Api-Key')
|
|
|
|
if not api_key:
|
|
abort(401, 'Missing API key')
|
|
|
|
if api_key != config.INVALIDATE_APIKEY:
|
|
abort(401, 'Invalid API key')
|
|
```
|
|
|
|
**Usage**:
|
|
|
|
```python
|
|
@app.route('/invalidate', methods=['POST'])
|
|
async def invalidate():
|
|
"""Invalidate cache (requires API key)"""
|
|
|
|
await require_api_key()
|
|
|
|
# Invalidation logic
|
|
...
|
|
```
|
|
|
|
### CORS Configuration
|
|
|
|
**Implementation**: Permissive CORS by default
|
|
|
|
**Configuration**:
|
|
```python
|
|
CORS_ORIGINS = ['*'] # Allow all origins
|
|
```
|
|
|
|
**Middleware**:
|
|
|
|
```python
|
|
from quart_cors import cors
|
|
|
|
app = cors(app, allow_origin=config.CORS_ORIGINS)
|
|
```
|
|
|
|
**Production recommendation**: Restrict to specific origins
|
|
|
|
```python
|
|
CORS_ORIGINS = ['https://lidarr.audio', 'https://app.lidarr.audio']
|
|
```
|
|
|
|
## Rate Limiting
|
|
|
|
### Rate Limiter Implementations
|
|
|
|
#### 1. NullRateLimiter (Default)
|
|
|
|
**Purpose**: No rate limiting, maximum throughput
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
class NullRateLimiter:
|
|
"""No-op rate limiter"""
|
|
|
|
async def acquire(self, key: str):
|
|
"""Always allow request"""
|
|
pass
|
|
|
|
async def release(self, key: str):
|
|
"""No-op"""
|
|
pass
|
|
```
|
|
|
|
#### 2. SimpleRateLimiter
|
|
|
|
**Purpose**: In-memory rate limiting (single instance)
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
import time
|
|
from collections import defaultdict
|
|
|
|
class SimpleRateLimiter:
|
|
"""In-memory rate limiter"""
|
|
|
|
def __init__(self, max_requests: int, window: int):
|
|
self.max_requests = max_requests
|
|
self.window = window
|
|
self.requests = defaultdict(list)
|
|
|
|
async def acquire(self, key: str):
|
|
"""Check and update rate limit"""
|
|
now = time.time()
|
|
|
|
# Remove old requests
|
|
self.requests[key] = [
|
|
req_time for req_time in self.requests[key]
|
|
if req_time > now - self.window
|
|
]
|
|
|
|
# Check limit
|
|
if len(self.requests[key]) >= self.max_requests:
|
|
raise RateLimitExceeded(
|
|
f"Rate limit exceeded: {len(self.requests[key])}/{self.max_requests}"
|
|
)
|
|
|
|
# Add current request
|
|
self.requests[key].append(now)
|
|
```
|
|
|
|
#### 3. RedisRateLimiter
|
|
|
|
**Purpose**: Distributed rate limiting (multiple instances)
|
|
|
|
**Implementation**:
|
|
|
|
```python
|
|
class RedisRateLimiter:
|
|
"""Redis-based distributed rate limiter"""
|
|
|
|
def __init__(self, redis, max_requests: int, window: int):
|
|
self.redis = redis
|
|
self.max_requests = max_requests
|
|
self.window = window
|
|
|
|
async def acquire(self, key: str):
|
|
"""Check and update rate limit using Redis"""
|
|
now = time.time()
|
|
window_key = f"lm3.7:ratelimit:{key}:{int(now / self.window)}"
|
|
|
|
# Increment counter
|
|
count = await self.redis.incr(window_key)
|
|
|
|
# Set expiration on first request
|
|
if count == 1:
|
|
await self.redis.expire(window_key, self.window)
|
|
|
|
# Check limit
|
|
if count > self.max_requests:
|
|
raise RateLimitExceeded(
|
|
f"Rate limit exceeded: {count}/{self.max_requests}"
|
|
)
|
|
```
|
|
|
|
### Rate Limiter Selection
|
|
|
|
**Configuration**:
|
|
```python
|
|
RATE_LIMITER = 'redis' # 'null', 'simple', 'redis'
|
|
RATE_LIMIT_REQUESTS = 100
|
|
RATE_LIMIT_WINDOW = 60
|
|
```
|
|
|
|
**Factory function**:
|
|
|
|
```python
|
|
def create_rate_limiter(config):
|
|
"""Create rate limiter based on configuration"""
|
|
|
|
if config.RATE_LIMITER == 'null':
|
|
return NullRateLimiter()
|
|
elif config.RATE_LIMITER == 'simple':
|
|
return SimpleRateLimiter(
|
|
max_requests=config.RATE_LIMIT_REQUESTS,
|
|
window=config.RATE_LIMIT_WINDOW
|
|
)
|
|
elif config.RATE_LIMITER == 'redis':
|
|
return RedisRateLimiter(
|
|
redis=redis,
|
|
max_requests=config.RATE_LIMIT_REQUESTS,
|
|
window=config.RATE_LIMIT_WINDOW
|
|
)
|
|
else:
|
|
raise ValueError(f"Unknown rate limiter: {config.RATE_LIMITER}")
|
|
```
|
|
|
|
## Testing
|
|
|
|
### Test Suite Structure
|
|
|
|
**Framework**: pytest with pytest-asyncio
|
|
|
|
**Test files**:
|
|
|
|
| File | Lines | Coverage | Description |
|
|
|------|-------|----------|-------------|
|
|
| `test_config.py` | 152 | High | Configuration system tests |
|
|
| `test_provider.py` | 98 | Medium | Provider mixin tests |
|
|
| `test_cache.py` | 87 | Medium | Cache layer tests |
|
|
| `test_api.py` | 76 | Low | API endpoint tests |
|
|
| `test_util.py` | 45 | High | Utility function tests |
|
|
| `test_app.py` | 34 | Low | Application initialization tests |
|
|
|
|
### Configuration Tests (Most Comprehensive)
|
|
|
|
**File**: `tests/test_config.py` (152 lines)
|
|
|
|
**Test cases**:
|
|
|
|
```python
|
|
import pytest
|
|
from lidarrmetadata.config import DefaultConfig, DevelopmentConfig, ProductionConfig
|
|
|
|
def test_default_config():
|
|
"""Test default configuration values"""
|
|
assert DefaultConfig.PORT == 5001
|
|
assert DefaultConfig.DEBUG is False
|
|
assert DefaultConfig.DATABASE['host'] == 'localhost'
|
|
|
|
def test_environment_override(monkeypatch):
|
|
"""Test environment variable override"""
|
|
monkeypatch.setenv('DEFAULTCONFIG_PORT', '8080')
|
|
assert DefaultConfig.PORT == 8080
|
|
|
|
def test_nested_override(monkeypatch):
|
|
"""Test nested environment variable override"""
|
|
monkeypatch.setenv('DEFAULTCONFIG_DATABASE__HOST', 'musicbrainz-db')
|
|
assert DefaultConfig.DATABASE__HOST == 'musicbrainz-db'
|
|
|
|
def test_boolean_conversion(monkeypatch):
|
|
"""Test boolean type conversion"""
|
|
monkeypatch.setenv('DEFAULTCONFIG_DEBUG', 'true')
|
|
assert DefaultConfig.DEBUG is True
|
|
|
|
monkeypatch.setenv('DEFAULTCONFIG_DEBUG', '1')
|
|
assert DefaultConfig.DEBUG is True
|
|
|
|
monkeypatch.setenv('DEFAULTCONFIG_DEBUG', 'false')
|
|
assert DefaultConfig.DEBUG is False
|
|
|
|
def test_integer_conversion(monkeypatch):
|
|
"""Test integer type conversion"""
|
|
monkeypatch.setenv('DEFAULTCONFIG_PORT', '9000')
|
|
assert DefaultConfig.PORT == 9000
|
|
assert isinstance(DefaultConfig.PORT, int)
|
|
|
|
def test_development_config():
|
|
"""Test development configuration"""
|
|
assert DevelopmentConfig.DEBUG is True
|
|
assert DevelopmentConfig.SENTRY_DSN is None
|
|
|
|
def test_production_config():
|
|
"""Test production configuration"""
|
|
assert ProductionConfig.DEBUG is False
|
|
assert ProductionConfig.RATE_LIMITER == 'redis'
|
|
```
|
|
|
|
### Provider Tests
|
|
|
|
**File**: `tests/test_provider.py`
|
|
|
|
**Test cases**:
|
|
|
|
```python
|
|
import pytest
|
|
from lidarrmetadata.provider import MusicbrainzDbProvider
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_artist_by_id():
|
|
"""Test artist lookup by MBID"""
|
|
provider = MusicbrainzDbProvider(config)
|
|
artist = await provider.get_artist_by_id('5b11f4ce-a62d-471e-81fc-a69a8278c7da')
|
|
|
|
assert artist is not None
|
|
assert artist['ArtistName'] == 'Nirvana'
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_artist_not_found():
|
|
"""Test artist lookup with invalid MBID"""
|
|
provider = MusicbrainzDbProvider(config)
|
|
artist = await provider.get_artist_by_id('00000000-0000-0000-0000-000000000000')
|
|
|
|
assert artist is None
|
|
```
|
|
|
|
### Cache Tests
|
|
|
|
**File**: `tests/test_cache.py`
|
|
|
|
**Test cases**:
|
|
|
|
```python
|
|
import pytest
|
|
from lidarrmetadata.cache import RedisCache, PostgresCache
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_redis_cache_set_get():
|
|
"""Test Redis cache set and get"""
|
|
cache = RedisCache(redis, namespace='test')
|
|
|
|
await cache.set('key1', {'value': 'test'}, ttl=60)
|
|
value = await cache.get('key1')
|
|
|
|
assert value == {'value': 'test'}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_redis_cache_expiration():
|
|
"""Test Redis cache expiration"""
|
|
cache = RedisCache(redis, namespace='test')
|
|
|
|
await cache.set('key2', {'value': 'test'}, ttl=1)
|
|
await asyncio.sleep(2)
|
|
value = await cache.get('key2')
|
|
|
|
assert value is None
|
|
```
|
|
|
|
### Test Configuration
|
|
|
|
**pytest.ini**:
|
|
|
|
```ini
|
|
[pytest]
|
|
asyncio_mode = auto
|
|
testpaths = tests
|
|
python_files = test_*.py
|
|
python_classes = Test*
|
|
python_functions = test_*
|
|
addopts = -v --tb=short
|
|
```
|
|
|
|
**Test dependencies** (pyproject.toml):
|
|
|
|
```toml
|
|
[tool.poetry.dev-dependencies]
|
|
pytest = "^7.0"
|
|
pytest-asyncio = "^0.18"
|
|
pytest-cov = "^3.0"
|
|
pytest-mock = "^3.6"
|
|
```
|
|
|
|
### CI/CD Test Execution
|
|
|
|
**Azure Pipelines** (commented out):
|
|
|
|
```yaml
|
|
# - script: |
|
|
# poetry run pytest tests/ --cov=lidarrmetadata --cov-report=xml
|
|
# displayName: 'Run tests'
|
|
```
|
|
|
|
**Reason for disabling**: Tests require full infrastructure (MusicBrainz DB, Solr, Redis)
|
|
|
|
## Code Quality
|
|
|
|
### SonarCloud Integration
|
|
|
|
**Configuration**: `sonar-project.properties`
|
|
|
|
```properties
|
|
sonar.projectKey=Lidarr_LidarrAPI.Metadata
|
|
sonar.organization=lidarr
|
|
sonar.projectName=LidarrAPI.Metadata
|
|
sonar.projectVersion=10.0.0.0
|
|
|
|
sonar.sources=lidarrmetadata
|
|
sonar.tests=tests
|
|
sonar.python.version=3.9
|
|
|
|
sonar.coverage.exclusions=tests/**,lidarrmetadata/server.py,lidarrmetadata/crawler.py
|
|
```
|
|
|
|
### Code Formatting
|
|
|
|
**Tool**: Black (Python code formatter)
|
|
|
|
**Configuration**: `pyproject.toml`
|
|
|
|
```toml
|
|
[tool.black]
|
|
line-length = 100
|
|
target-version = ['py39']
|
|
include = '\.pyi?$'
|
|
exclude = '''
|
|
/(
|
|
\.git
|
|
| \.venv
|
|
| build
|
|
| dist
|
|
)/
|
|
'''
|
|
```
|
|
|
|
### Linting
|
|
|
|
**Tool**: Flake8
|
|
|
|
**Configuration**: `.flake8`
|
|
|
|
```ini
|
|
[flake8]
|
|
max-line-length = 100
|
|
exclude = .git,__pycache__,build,dist,.venv
|
|
ignore = E203,W503
|
|
```
|
|
|
|
## Security Considerations
|
|
|
|
### Hardcoded Credentials
|
|
|
|
**Problem**: Default credentials hardcoded throughout codebase
|
|
|
|
**Instances**:
|
|
- Database: `abc/abc`
|
|
- RabbitMQ: `abc/abc`
|
|
- Redis: No password
|
|
- API key: `replaceme`
|
|
|
|
**Recommendation**: Use environment variables or secrets management
|
|
|
|
### No Authentication on Read Endpoints
|
|
|
|
**Problem**: All read endpoints are publicly accessible
|
|
|
|
**Impact**: Anyone can query the API without authentication
|
|
|
|
**Recommendation**: Implement API key authentication or OAuth
|
|
|
|
### SQL Injection Protection
|
|
|
|
**Protection**: asyncpg parameterized queries
|
|
|
|
**Example**:
|
|
```python
|
|
# Safe (parameterized)
|
|
await conn.fetchrow("SELECT * FROM artist WHERE gid = $1", mbid)
|
|
|
|
# Unsafe (string interpolation) - NOT USED
|
|
# await conn.fetchrow(f"SELECT * FROM artist WHERE gid = '{mbid}'")
|
|
```
|
|
|
|
### Dependency Vulnerabilities
|
|
|
|
**Outdated dependencies**:
|
|
- Python 3.9 (EOL October 2025)
|
|
- aioredis 1.3.1 (deprecated)
|
|
- Quart 0.14.1 (5 years old)
|
|
- sentry-sdk 0.19.5 (major version behind)
|
|
|
|
**Recommendation**: Upgrade to latest versions
|
|
|
|
## Performance Optimizations
|
|
|
|
### Async/Await Throughout
|
|
|
|
**Pattern**: All I/O operations use async/await
|
|
|
|
**Benefits**:
|
|
- High concurrency
|
|
- Efficient resource usage
|
|
- Non-blocking operations
|
|
|
|
### Connection Pooling
|
|
|
|
**Database**:
|
|
```python
|
|
pool = await asyncpg.create_pool(
|
|
min_size=10,
|
|
max_size=50
|
|
)
|
|
```
|
|
|
|
**Redis**:
|
|
```python
|
|
redis = await aioredis.create_redis_pool(
|
|
minsize=5,
|
|
maxsize=20
|
|
)
|
|
```
|
|
|
|
### Parallel Fetching
|
|
|
|
**Pattern**: Fetch multiple data sources concurrently
|
|
|
|
**Example**:
|
|
```python
|
|
images_task = asyncio.create_task(get_images(mbid))
|
|
overview_task = asyncio.create_task(get_overview(mbid))
|
|
links_task = asyncio.create_task(get_links(mbid))
|
|
|
|
images = await images_task
|
|
overview = await overview_task
|
|
links = await links_task
|
|
```
|
|
|
|
### Compression
|
|
|
|
**Cache compression**: zlib compression of pickled objects
|
|
|
|
**Ratio**: Typically 10:1 for JSON metadata
|
|
|
|
**Trade-off**: CPU time for storage savings
|
|
|
|
## Conclusion
|
|
|
|
The codebase demonstrates several advanced patterns:
|
|
|
|
1. **Metaclass-based configuration**: Elegant environment variable override system
|
|
2. **Comprehensive logging**: Per-module loggers with appropriate levels
|
|
3. **Sentry integration**: Redis-based rate limiting prevents alert fatigue
|
|
4. **Telegraf metrics**: StatsD integration for operational visibility
|
|
5. **Multiple rate limiters**: Pluggable rate limiting strategies
|
|
6. **Async-first design**: All I/O operations use async/await
|
|
7. **Connection pooling**: Efficient resource management
|
|
|
|
Key weaknesses:
|
|
|
|
1. **Tests disabled in CI**: Infrastructure dependencies prevent automated testing
|
|
2. **Hardcoded credentials**: Insecure defaults throughout
|
|
3. **No read authentication**: Public API access
|
|
4. **Outdated dependencies**: Python 3.9, deprecated libraries
|
|
5. **Limited test coverage**: Only 6 test files, most are minimal
|
|
|
|
The configuration system is particularly well-designed, allowing flexible deployment across environments while maintaining type safety and validation.
|