a1f6701bac
- gRPC service with MusicBrainz provider - PostgreSQL schema with migrations - Service layer with database-first caching - Repository pattern for data access - YAML configuration support - Research documentation for 17 music metadata projects
737 lines
14 KiB
Markdown
737 lines
14 KiB
Markdown
# MusicBrainz Server Codebase
|
|
|
|
## Configuration System
|
|
|
|
### Two-Tier Architecture
|
|
|
|
**File:** `lib/DBDefs.pm`
|
|
|
|
**Structure:**
|
|
1. `lib/DBDefs/Default.pm` - Base defaults (in git)
|
|
2. `lib/DBDefs.pm` - Instance-specific overrides (not in git)
|
|
|
|
**Pattern:**
|
|
```perl
|
|
package DBDefs;
|
|
use parent 'DBDefs::Default';
|
|
|
|
# Override defaults for this instance
|
|
sub DB_SCHEMA_SEQUENCE { 28 }
|
|
sub DB_STAGING_SERVER { 0 }
|
|
sub REPLICATION_TYPE { RT_MASTER }
|
|
```
|
|
|
|
### Configuration Categories
|
|
|
|
**Database Configuration:**
|
|
```perl
|
|
# Primary database
|
|
sub READWRITE_DATABASE {
|
|
return {
|
|
database => 'musicbrainz_db',
|
|
host => 'localhost',
|
|
port => 5432,
|
|
username => 'musicbrainz',
|
|
password => 'musicbrainz',
|
|
};
|
|
}
|
|
|
|
# Read-only replica (optional)
|
|
sub READONLY_DATABASE { READWRITE_DATABASE }
|
|
|
|
# System user for maintenance
|
|
sub SYSTEM_USER { 'musicbrainz' }
|
|
|
|
# Schema version
|
|
sub DB_SCHEMA_SEQUENCE { 28 }
|
|
|
|
# Staging server flag
|
|
sub DB_STAGING_SERVER { 0 }
|
|
```
|
|
|
|
**Redis Configuration:**
|
|
```perl
|
|
# Redis server
|
|
sub REDIS_SERVER { 'localhost:6379' }
|
|
|
|
# Redis namespace (prefix for all keys)
|
|
sub REDIS_NAMESPACE { 'MB' }
|
|
|
|
# Redis databases (0-15)
|
|
sub REDIS_DATABASE_CACHE { 0 }
|
|
sub REDIS_DATABASE_SESSION { 1 }
|
|
sub REDIS_DATABASE_SEARCH { 2 }
|
|
sub REDIS_DATABASE_STATS { 3 }
|
|
```
|
|
|
|
**Solr Configuration:**
|
|
```perl
|
|
# Solr server
|
|
sub SOLR_SERVER { 'http://localhost:8983/solr' }
|
|
|
|
# Solr cores
|
|
sub SOLR_CORE_ARTIST { 'artist' }
|
|
sub SOLR_CORE_RELEASE { 'release' }
|
|
sub SOLR_CORE_RECORDING { 'recording' }
|
|
# ... (13 cores total)
|
|
```
|
|
|
|
**Web Server Configuration:**
|
|
```perl
|
|
# Server processes
|
|
sub WEB_SERVER_PROCESSES { 10 }
|
|
|
|
# Server host
|
|
sub WEB_SERVER_HOST { 'localhost' }
|
|
|
|
# Server port
|
|
sub WEB_SERVER_PORT { 5000 }
|
|
|
|
# Use reverse proxy
|
|
sub WEB_SERVER_USED_IN_REVERSE_PROXY { 1 }
|
|
```
|
|
|
|
**Mail Configuration:**
|
|
```perl
|
|
# SMTP server
|
|
sub SMTP_SERVER { 'localhost' }
|
|
|
|
# From address
|
|
sub EMAIL_SUPPORT_ADDRESS { 'support@musicbrainz.org' }
|
|
|
|
# Noreply address
|
|
sub EMAIL_NOREPLY_ADDRESS { 'noreply@musicbrainz.org' }
|
|
|
|
# Bugs address
|
|
sub EMAIL_BUGS_ADDRESS { 'bugs@musicbrainz.org' }
|
|
```
|
|
|
|
**External Service Configuration:**
|
|
```perl
|
|
# Cover Art Archive
|
|
sub COVER_ART_ARCHIVE_ACCESS_KEY { '' }
|
|
sub COVER_ART_ARCHIVE_SECRET_KEY { '' }
|
|
sub COVER_ART_ARCHIVE_UPLOAD_PREFIXER { 'MB' }
|
|
sub COVER_ART_ARCHIVE_DOWNLOAD_PREFIX { 'https://coverartarchive.org' }
|
|
|
|
# Wikipedia
|
|
sub WIKIPEDIA_CACHE_TIMEOUT { 259200 } # 3 days
|
|
|
|
# Discourse SSO
|
|
sub DISCOURSE_SSO_SECRET { '' }
|
|
sub DISCOURSE_SERVER { 'https://community.metabrainz.org' }
|
|
|
|
# MetaBrainz OAuth
|
|
sub OAUTH2_ENFORCE_TLS { 1 }
|
|
```
|
|
|
|
**Replication Configuration:**
|
|
```perl
|
|
# Replication type
|
|
sub REPLICATION_TYPE { RT_STANDALONE } # RT_MASTER, RT_MIRROR, RT_STANDALONE
|
|
|
|
# Replication access token
|
|
sub REPLICATION_ACCESS_TOKEN { '' }
|
|
|
|
# Replication URL
|
|
sub REPLICATION_URL { 'https://data.musicbrainz.org/replication' }
|
|
```
|
|
|
|
**Session Configuration:**
|
|
```perl
|
|
# Session expiry (10 hours)
|
|
sub SESSION_EXPIRE { 36000 }
|
|
|
|
# Session idle timeout (3 hours)
|
|
sub SESSION_IDLE_TIMEOUT { 10800 }
|
|
|
|
# Session cookie name
|
|
sub SESSION_COOKIE { 'AF_SID' }
|
|
|
|
# Session cookie domain
|
|
sub SESSION_DOMAIN { '.musicbrainz.org' }
|
|
```
|
|
|
|
**Feature Flags:**
|
|
```perl
|
|
# Enable beta features
|
|
sub BETA_FEATURES { 0 }
|
|
|
|
# Enable development mode
|
|
sub DEVELOPMENT_SERVER { 0 }
|
|
|
|
# Enable debug mode
|
|
sub DEBUG { 0 }
|
|
|
|
# Enable SQL logging
|
|
sub DB_READ_ONLY { 0 }
|
|
```
|
|
|
|
**Rate Limiting:**
|
|
```perl
|
|
# API rate limit (requests per second)
|
|
sub API_RATE_LIMIT { 1 }
|
|
|
|
# Web rate limit (requests per second)
|
|
sub WEB_RATE_LIMIT { 10 }
|
|
```
|
|
|
|
**Caching:**
|
|
```perl
|
|
# Cache TTL for entities (seconds)
|
|
sub CACHE_TTL_ENTITY { 3600 } # 1 hour
|
|
|
|
# Cache TTL for search results (seconds)
|
|
sub CACHE_TTL_SEARCH { 900 } # 15 minutes
|
|
|
|
# Cache TTL for statistics (seconds)
|
|
sub CACHE_TTL_STATS { 3600 } # 1 hour
|
|
```
|
|
|
|
## Logging System
|
|
|
|
### Log::Dispatch Framework
|
|
|
|
**Configuration:**
|
|
```perl
|
|
use Log::Dispatch;
|
|
|
|
my $log = Log::Dispatch->new(
|
|
outputs => [
|
|
[
|
|
'Screen',
|
|
min_level => 'debug',
|
|
stderr => 1,
|
|
newline => 1,
|
|
],
|
|
[
|
|
'File',
|
|
min_level => 'info',
|
|
filename => '/var/log/musicbrainz/server.log',
|
|
mode => 'append',
|
|
newline => 1,
|
|
],
|
|
],
|
|
);
|
|
```
|
|
|
|
### Log Levels
|
|
|
|
**DEBUG:** Verbose debugging information
|
|
```perl
|
|
$log->debug("Loading artist with GID: $gid");
|
|
```
|
|
|
|
**INFO:** Informational messages
|
|
```perl
|
|
$log->info("User $username logged in");
|
|
```
|
|
|
|
**WARN:** Warning messages
|
|
```perl
|
|
$log->warn("Cache miss for entity $gid");
|
|
```
|
|
|
|
**ERROR:** Error messages
|
|
```perl
|
|
$log->error("Failed to connect to database: $error");
|
|
```
|
|
|
|
**FATAL:** Fatal errors
|
|
```perl
|
|
$log->fatal("Database connection lost, shutting down");
|
|
```
|
|
|
|
### Message Limit
|
|
|
|
**Maximum Size:** 16KB per log message
|
|
|
|
**Truncation:** Messages exceeding 16KB are truncated with "..." suffix
|
|
|
|
**Rationale:** Prevent log flooding from large data dumps
|
|
|
|
### Lazy Evaluation
|
|
|
|
**Pattern:**
|
|
```perl
|
|
# Expensive operation only executed if debug level enabled
|
|
$log->debug(sub {
|
|
my $data = expensive_serialization($object);
|
|
return "Object data: $data";
|
|
});
|
|
```
|
|
|
|
**Benefits:**
|
|
- Avoid expensive operations when logging disabled
|
|
- Reduce CPU usage in production
|
|
|
|
### Stack Traces
|
|
|
|
**Automatic:** Stack traces included for ERROR and FATAL levels
|
|
|
|
**Format:**
|
|
```
|
|
ERROR: Failed to load artist
|
|
Stack trace:
|
|
at MusicBrainz::Server::Data::Artist::get_by_gid line 123
|
|
at MusicBrainz::Server::Controller::Artist::show line 45
|
|
at Catalyst::Action::execute line 67
|
|
```
|
|
|
|
### Log Rotation
|
|
|
|
**Tool:** logrotate
|
|
|
|
**Configuration:**
|
|
```
|
|
/var/log/musicbrainz/*.log {
|
|
daily
|
|
rotate 30
|
|
compress
|
|
delaycompress
|
|
notifempty
|
|
create 0640 musicbrainz musicbrainz
|
|
sharedscripts
|
|
postrotate
|
|
/usr/bin/killall -HUP starman
|
|
endscript
|
|
}
|
|
```
|
|
|
|
## Error Tracking (Sentry)
|
|
|
|
### Server-Side Integration
|
|
|
|
**Library:** Sentry::Raven (Perl SDK)
|
|
|
|
**Configuration:**
|
|
```perl
|
|
use Sentry::Raven;
|
|
|
|
my $raven = Sentry::Raven->new(
|
|
sentry_dsn => 'https://public_key@sentry.io/project_id',
|
|
environment => 'production',
|
|
release => '2024.01.15',
|
|
);
|
|
```
|
|
|
|
**Capture Exception:**
|
|
```perl
|
|
eval {
|
|
# Code that might fail
|
|
$c->model('Artist')->get_by_gid($gid);
|
|
};
|
|
if ($@) {
|
|
$raven->capture_exception($@, {
|
|
request => {
|
|
url => $c->req->uri,
|
|
method => $c->req->method,
|
|
headers => $c->req->headers,
|
|
},
|
|
user => {
|
|
id => $c->user->id,
|
|
username => $c->user->name,
|
|
},
|
|
extra => {
|
|
gid => $gid,
|
|
},
|
|
});
|
|
}
|
|
```
|
|
|
|
### Client-Side Integration
|
|
|
|
**Library:** @sentry/browser (JavaScript SDK)
|
|
|
|
**Configuration:**
|
|
```javascript
|
|
import * as Sentry from '@sentry/browser';
|
|
|
|
Sentry.init({
|
|
dsn: 'https://public_key@sentry.io/project_id',
|
|
environment: 'production',
|
|
release: '2024.01.15',
|
|
integrations: [
|
|
new Sentry.BrowserTracing(),
|
|
],
|
|
tracesSampleRate: 0.1,
|
|
});
|
|
```
|
|
|
|
**Capture Exception:**
|
|
```javascript
|
|
try {
|
|
// Code that might fail
|
|
loadArtist(gid);
|
|
} catch (error) {
|
|
Sentry.captureException(error, {
|
|
tags: {
|
|
component: 'ArtistPage',
|
|
},
|
|
extra: {
|
|
gid: gid,
|
|
},
|
|
});
|
|
}
|
|
```
|
|
|
|
### Context Enrichment
|
|
|
|
**Request Context:**
|
|
- URL
|
|
- HTTP method
|
|
- Headers
|
|
- Query parameters
|
|
- POST data (sanitized)
|
|
|
|
**User Context:**
|
|
- User ID
|
|
- Username
|
|
- Email (hashed)
|
|
- IP address (anonymized)
|
|
|
|
**Custom Context:**
|
|
- Entity GID
|
|
- Edit ID
|
|
- Search query
|
|
- API endpoint
|
|
|
|
## Monitoring
|
|
|
|
### Current State
|
|
|
|
**Metrics Endpoint:** None (no Prometheus exporter)
|
|
|
|
**Health Check Endpoint:** None (no `/health` endpoint)
|
|
|
|
**Workarounds:**
|
|
- Monitor HTTP 200 responses on `/`
|
|
- Parse logs for error rates
|
|
- Monitor database connection count
|
|
- Monitor Redis memory usage
|
|
|
|
### Planned Improvements
|
|
|
|
**Prometheus Exporter:**
|
|
- Request count by endpoint
|
|
- Request duration histogram
|
|
- Database query count
|
|
- Database query duration
|
|
- Cache hit/miss ratio
|
|
- Edit submission rate
|
|
- Vote count
|
|
|
|
**Health Check Endpoint:**
|
|
- Database connectivity
|
|
- Redis connectivity
|
|
- Solr connectivity
|
|
- Disk space
|
|
- Memory usage
|
|
|
|
## Session Management
|
|
|
|
### Redis-Backed Sessions
|
|
|
|
**Storage:** Redis database 1
|
|
|
|
**Session Key:** `session:{session_id}`
|
|
|
|
**Session Data:**
|
|
```json
|
|
{
|
|
"user_id": 12345,
|
|
"username": "user",
|
|
"csrf_token": "abc123...",
|
|
"last_activity": 1609459200,
|
|
"preferences": {
|
|
"language": "en",
|
|
"timezone": "UTC"
|
|
}
|
|
}
|
|
```
|
|
|
|
### Session Lifecycle
|
|
|
|
**Creation:**
|
|
```perl
|
|
my $session_id = generate_session_id(); # Random 32-byte hex
|
|
my $session_data = {
|
|
user_id => $user->id,
|
|
csrf_token => generate_csrf_token(),
|
|
last_activity => time(),
|
|
};
|
|
|
|
$redis->setex(
|
|
"session:$session_id",
|
|
36000, # 10 hours
|
|
encode_json($session_data)
|
|
);
|
|
|
|
$c->res->cookies->{AF_SID} = {
|
|
value => $session_id,
|
|
path => '/',
|
|
domain => '.musicbrainz.org',
|
|
secure => 1,
|
|
httponly => 1,
|
|
samesite => 'Lax',
|
|
};
|
|
```
|
|
|
|
**Validation:**
|
|
```perl
|
|
my $session_id = $c->req->cookies->{AF_SID};
|
|
my $session_json = $redis->get("session:$session_id");
|
|
|
|
if (!$session_json) {
|
|
# Session expired or invalid
|
|
return undef;
|
|
}
|
|
|
|
my $session_data = decode_json($session_json);
|
|
|
|
# Check idle timeout
|
|
my $idle_time = time() - $session_data->{last_activity};
|
|
if ($idle_time > 10800) { # 3 hours
|
|
$redis->del("session:$session_id");
|
|
return undef;
|
|
}
|
|
|
|
# Update last activity
|
|
$session_data->{last_activity} = time();
|
|
$redis->setex("session:$session_id", 36000, encode_json($session_data));
|
|
|
|
return $session_data;
|
|
```
|
|
|
|
**Destruction:**
|
|
```perl
|
|
$redis->del("session:$session_id");
|
|
$c->res->cookies->{AF_SID} = {
|
|
value => '',
|
|
expires => '-1d',
|
|
};
|
|
```
|
|
|
|
### Session Expiry
|
|
|
|
**Absolute Expiry:** 10 hours (36,000 seconds)
|
|
|
|
**Idle Timeout:** 3 hours (10,800 seconds)
|
|
|
|
**Sliding Window:** Last activity updated on each request
|
|
|
|
### Cookie Configuration
|
|
|
|
**Name:** `AF_SID`
|
|
|
|
**Attributes:**
|
|
- `Secure` - HTTPS only
|
|
- `HttpOnly` - Not accessible via JavaScript
|
|
- `SameSite=Lax` - CSRF protection
|
|
- `Domain=.musicbrainz.org` - Shared across subdomains
|
|
- `Path=/` - Available site-wide
|
|
|
|
## Security
|
|
|
|
### CSRF Protection
|
|
|
|
**Token Generation:**
|
|
```perl
|
|
use Digest::SHA qw(sha256_hex);
|
|
|
|
my $csrf_token = sha256_hex(
|
|
$session_id .
|
|
$user_id .
|
|
time() .
|
|
random_bytes(32)
|
|
);
|
|
```
|
|
|
|
**Token Storage:** Stored in session data
|
|
|
|
**Token Validation:**
|
|
```perl
|
|
sub validate_csrf_token {
|
|
my ($c, $submitted_token) = @_;
|
|
|
|
my $session_token = $c->session->{csrf_token};
|
|
|
|
if (!$session_token || $submitted_token ne $session_token) {
|
|
$c->detach('/error_403');
|
|
}
|
|
}
|
|
```
|
|
|
|
**Form Inclusion:**
|
|
```html
|
|
<form method="POST" action="/edit/artist/create">
|
|
<input type="hidden" name="csrf_token" value="[% csrf_token %]">
|
|
<!-- form fields -->
|
|
</form>
|
|
```
|
|
|
|
**AJAX Requests:**
|
|
```javascript
|
|
fetch('/api/endpoint', {
|
|
method: 'POST',
|
|
headers: {
|
|
'X-CSRF-Token': csrfToken,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify(data),
|
|
});
|
|
```
|
|
|
|
### Content Security Policy (CSP)
|
|
|
|
**Header:**
|
|
```
|
|
Content-Security-Policy:
|
|
default-src 'self';
|
|
script-src 'self' 'unsafe-inline' https://www.google-analytics.com;
|
|
style-src 'self' 'unsafe-inline';
|
|
img-src 'self' data: https:;
|
|
font-src 'self' data:;
|
|
connect-src 'self' https://sentry.io;
|
|
frame-ancestors 'none';
|
|
```
|
|
|
|
**Directives:**
|
|
- `default-src 'self'` - Only load resources from same origin
|
|
- `script-src` - Allow scripts from self and Google Analytics
|
|
- `style-src` - Allow styles from self (inline allowed for legacy)
|
|
- `img-src` - Allow images from anywhere (cover art, etc.)
|
|
- `connect-src` - Allow AJAX to self and Sentry
|
|
- `frame-ancestors 'none'` - Prevent clickjacking
|
|
|
|
### Authentication
|
|
|
|
**Realms:**
|
|
1. Session-based (cookie)
|
|
2. HTTP Digest (legacy)
|
|
3. OAuth2 Bearer token
|
|
|
|
**Session Authentication:**
|
|
```perl
|
|
sub authenticate_session {
|
|
my ($c) = @_;
|
|
|
|
my $session_id = $c->req->cookies->{AF_SID};
|
|
my $session = $c->model('Session')->load($session_id);
|
|
|
|
if ($session) {
|
|
my $user = $c->model('Editor')->get_by_id($session->{user_id});
|
|
$c->set_authenticated_user($user);
|
|
}
|
|
}
|
|
```
|
|
|
|
**OAuth2 Authentication:**
|
|
```perl
|
|
sub authenticate_oauth2 {
|
|
my ($c) = @_;
|
|
|
|
my $auth_header = $c->req->header('Authorization');
|
|
if ($auth_header =~ /^Bearer (.+)$/) {
|
|
my $token = $1;
|
|
my $token_info = $c->model('OAuth2')->introspect($token);
|
|
|
|
if ($token_info->{active}) {
|
|
my $user = $c->model('Editor')->get_by_id($token_info->{sub});
|
|
$c->set_authenticated_user($user);
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### Password Hashing
|
|
|
|
**Algorithm:** Bcrypt
|
|
|
|
**Cost Factor:** 12 (2^12 = 4096 iterations)
|
|
|
|
**Hashing:**
|
|
```perl
|
|
use Crypt::Eksblowfish::Bcrypt qw(bcrypt en_base64);
|
|
|
|
sub hash_password {
|
|
my ($password) = @_;
|
|
|
|
my $salt = generate_salt(); # 16 random bytes
|
|
my $settings = '$2a$12$' . en_base64($salt);
|
|
|
|
return bcrypt($password, $settings);
|
|
}
|
|
```
|
|
|
|
**Verification:**
|
|
```perl
|
|
sub verify_password {
|
|
my ($password, $hash) = @_;
|
|
|
|
my $computed_hash = bcrypt($password, $hash);
|
|
|
|
return $computed_hash eq $hash;
|
|
}
|
|
```
|
|
|
|
**Password Requirements:**
|
|
- Minimum 8 characters
|
|
- No maximum length
|
|
- No complexity requirements (user choice)
|
|
|
|
### Editor Privileges
|
|
|
|
**Privilege Flags (Bitmask):**
|
|
|
|
| Flag | Value | Description |
|
|
|------|-------|-------------|
|
|
| `UNTRUSTED` | 1 | New user, limited privileges |
|
|
| `AUTOEDITOR` | 2 | Auto-editor, edits auto-approved |
|
|
| `BOT` | 4 | Bot account |
|
|
| `UNTRUSTED_BOT` | 5 | Untrusted bot (1 + 4) |
|
|
| `RELATIONSHIP_EDITOR` | 8 | Can edit relationships |
|
|
| `WIKI_TRANSCLUSION` | 16 | Can transclude wiki content |
|
|
| `MBID_SUBMITTER` | 32 | Can submit MBIDs |
|
|
| `ACCOUNT_ADMIN` | 64 | Can manage user accounts |
|
|
| `LOCATION_EDITOR` | 128 | Can edit locations |
|
|
| `BANNER_EDITOR` | 256 | Can edit site banners |
|
|
| `EDITING_DISABLED` | 512 | Editing disabled (banned) |
|
|
| `ADDING_NOTES_DISABLED` | 1024 | Cannot add edit notes |
|
|
| `SPAMMER` | 2048 | Marked as spammer |
|
|
| `AUTO_EDITOR_ELECTIONS` | 4096 | Can vote in auto-editor elections |
|
|
| `DONT_NAG` | 8192 | Don't show donation nag |
|
|
|
|
**Privilege Check:**
|
|
```perl
|
|
sub is_auto_editor {
|
|
my ($user) = @_;
|
|
return ($user->privs & 2) != 0;
|
|
}
|
|
|
|
sub can_edit_relationships {
|
|
my ($user) = @_;
|
|
return ($user->privs & 8) != 0;
|
|
}
|
|
```
|
|
|
|
### Auto-Editor Election System
|
|
|
|
**Eligibility:**
|
|
- 100+ accepted edits
|
|
- Member for 2+ weeks
|
|
- No recent failed votes
|
|
|
|
**Election Process:**
|
|
1. User nominates self or is nominated
|
|
2. 1-week voting period
|
|
3. Existing auto-editors vote
|
|
4. 75% approval required
|
|
5. Minimum 5 votes required
|
|
|
|
**Auto-Editor Benefits:**
|
|
- Edits auto-approved (no voting period)
|
|
- Can vote in elections
|
|
- Can approve/reject edits
|
|
- Higher trust level
|