- gRPC service with MusicBrainz provider - PostgreSQL schema with migrations - Service layer with database-first caching - Repository pattern for data access - YAML configuration support - Research documentation for 17 music metadata projects
21 KiB
AcoustID Deployment
Deployment Overview
AcoustID supports multiple deployment models: production multi-server, Docker Compose for self-hosting, and local development. The system requires coordination between multiple services: PostgreSQL, Redis, NATS, the Python server, and the Zig index.
Docker Deployment
Server Docker Image
Dockerfile: docker/Dockerfile
Multi-Stage Build
Stage 1: Chromaprint Build
FROM ubuntu:24.04 AS chromaprint-build
RUN apt-get update && apt-get install -y \
git \
cmake \
build-essential \
libfftw3-dev
WORKDIR /build
RUN git clone https://github.com/acoustid/chromaprint.git && \
cd chromaprint && \
git checkout 41a3e8fb && \
cmake -DCMAKE_BUILD_TYPE=Release \
-DBUILD_TOOLS=OFF \
-DBUILD_TESTS=OFF . && \
make -j$(nproc) && \
make install
Stage 2: Base Image
FROM ubuntu:24.04 AS base
RUN apt-get update && apt-get install -y \
python3.12 \
python3-pip \
libfftw3-3 \
libpq5 \
&& rm -rf /var/lib/apt/lists/*
COPY --from=chromaprint-build /usr/local/lib/libchromaprint.so* /usr/local/lib/
COPY --from=chromaprint-build /usr/local/include/chromaprint.h /usr/local/include/
RUN ldconfig
Stage 3: Builder
FROM base AS builder
RUN apt-get update && apt-get install -y \
build-essential \
python3-dev \
libpq-dev \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install uv
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
ENV PATH="/root/.cargo/bin:$PATH"
WORKDIR /app
COPY pyproject.toml uv.lock ./
RUN uv sync --frozen --no-dev
COPY . .
RUN uv build
Stage 4: Final Image
FROM base AS final
# Create non-root user
RUN useradd -m -u 1000 acoustid
WORKDIR /app
# Copy built wheel and dependencies
COPY --from=builder /app/.venv /app/.venv
COPY --from=builder /app/dist/*.whl /tmp/
# Install application
RUN /app/.venv/bin/pip install /tmp/*.whl && rm /tmp/*.whl
# Copy configuration template
COPY acoustid.conf.dist /etc/acoustid/acoustid.conf.dist
USER acoustid
ENV PATH="/app/.venv/bin:$PATH"
ENV PYTHONUNBUFFERED=1
ENTRYPOINT ["python", "manage.py"]
CMD ["run", "api"]
Image Size: ~400MB (compressed)
Base OS: Ubuntu 24.04
Python Version: 3.12
Index Docker Image
Dockerfile: docker/Dockerfile.index
FROM ubuntu:24.04 AS builder
RUN apt-get update && apt-get install -y \
curl \
xz-utils \
&& rm -rf /var/lib/apt/lists/*
# Install Zig
RUN curl -L https://ziglang.org/download/0.11.0/zig-linux-x86_64-0.11.0.tar.xz | \
tar -xJ -C /usr/local && \
ln -s /usr/local/zig-linux-x86_64-0.11.0/zig /usr/local/bin/zig
WORKDIR /build
COPY . .
RUN zig build -Doptimize=ReleaseFast
FROM ubuntu:24.04
RUN useradd -m -u 1000 acoustid
WORKDIR /app
COPY --from=builder /build/zig-out/bin/fpindex /app/fpindex
RUN mkdir -p /var/lib/acoustid-index && \
chown acoustid:acoustid /var/lib/acoustid-index
USER acoustid
EXPOSE 6081
ENTRYPOINT ["/app/fpindex"]
CMD ["--dir", "/var/lib/acoustid-index", "--port", "6081"]
Image Size: ~50MB (compressed)
Base OS: Ubuntu 24.04
Binary: Single statically-linked executable
Docker Compose Configuration
File: docker-compose.yml
version: '3.8'
services:
postgres:
image: ghcr.io/acoustid/postgresql:17.4
environment:
POSTGRES_USER: acoustid
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
POSTGRES_MULTIPLE_DATABASES: acoustid_app,acoustid_fingerprint,acoustid_ingest
volumes:
- postgres_data:/var/lib/postgresql/data
- ./docker/init-db.sh:/docker-entrypoint-initdb.d/init-db.sh
secrets:
- db_password
ports:
- "5432:5432"
healthcheck:
test: ["CMD-EXEC", "pg_isready -U acoustid"]
interval: 10s
timeout: 5s
retries: 5
redis:
image: redis:7-alpine
command: redis-server --requirepass-file /run/secrets/redis_password
volumes:
- redis_data:/data
secrets:
- redis_password
ports:
- "6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
nats:
image: nats:2-alpine
command: -js -sd /data
volumes:
- nats_data:/data
ports:
- "4222:4222"
- "8222:8222"
healthcheck:
test: ["CMD", "wget", "-q", "-O-", "http://localhost:8222/healthz"]
interval: 10s
timeout: 5s
retries: 5
index:
image: ghcr.io/acoustid/acoustid-index:latest
command: >
--dir /var/lib/acoustid-index
--port 6081
--threads 4
--log-level info
volumes:
- index_data:/var/lib/acoustid-index
ports:
- "6081:6081"
healthcheck:
test: ["CMD", "wget", "-q", "-O-", "http://localhost:6081/_health"]
interval: 10s
timeout: 5s
retries: 5
profiles:
- backend
api:
image: ghcr.io/acoustid/acoustid-server:latest
command: run api
environment:
ACOUSTID_CONFIG: /etc/acoustid/acoustid.conf
volumes:
- ./acoustid.conf:/etc/acoustid/acoustid.conf:ro
secrets:
- db_password
- redis_password
ports:
- "5000:5000"
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
nats:
condition: service_healthy
index:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "-q", "-O-", "http://localhost:5000/_health"]
interval: 30s
timeout: 10s
retries: 3
profiles:
- frontend
web:
image: ghcr.io/acoustid/acoustid-server:latest
command: run web
environment:
ACOUSTID_CONFIG: /etc/acoustid/acoustid.conf
volumes:
- ./acoustid.conf:/etc/acoustid/acoustid.conf:ro
secrets:
- db_password
- redis_password
ports:
- "5001:5001"
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "-q", "-O-", "http://localhost:5001/_health"]
interval: 30s
timeout: 10s
retries: 3
profiles:
- frontend
worker:
image: ghcr.io/acoustid/acoustid-server:latest
command: run worker
environment:
ACOUSTID_CONFIG: /etc/acoustid/acoustid.conf
volumes:
- ./acoustid.conf:/etc/acoustid/acoustid.conf:ro
secrets:
- db_password
- redis_password
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
nats:
condition: service_healthy
index:
condition: service_healthy
deploy:
replicas: 2
profiles:
- backend
cron:
image: ghcr.io/acoustid/acoustid-server:latest
command: run cron
environment:
ACOUSTID_CONFIG: /etc/acoustid/acoustid.conf
volumes:
- ./acoustid.conf:/etc/acoustid/acoustid.conf:ro
secrets:
- db_password
- redis_password
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
profiles:
- backend
volumes:
postgres_data:
redis_data:
nats_data:
index_data:
secrets:
db_password:
file: ./secrets/db_password.txt
redis_password:
file: ./secrets/redis_password.txt
Docker Compose Profiles
Frontend Profile (public-facing services):
docker compose --profile frontend up
Services: api, web
Backend Profile (background services):
docker compose --profile backend up
Services: index, worker, cron
Full Stack:
docker compose --profile frontend --profile backend up
Tools Profile (one-off commands):
docker compose run --rm tools python manage.py <command>
PostgreSQL Setup
Custom PostgreSQL Image
Image: ghcr.io/acoustid/postgresql:17.4
Base: postgres:17.4
Dockerfile: docker/Dockerfile.postgres
FROM postgres:17.4
# Install extensions
RUN apt-get update && apt-get install -y \
postgresql-17-intarray \
postgresql-17-pgcrypto \
postgresql-17-cube \
build-essential \
postgresql-server-dev-17 \
&& rm -rf /var/lib/apt/lists/*
# Build acoustid extension
COPY extensions/acoustid /build/acoustid
WORKDIR /build/acoustid
RUN make && make install
# Copy initialization scripts
COPY docker/init-db.sh /docker-entrypoint-initdb.d/
Database Initialization
Script: docker/init-db.sh
#!/bin/bash
set -e
# Create multiple databases
for db in acoustid_app acoustid_fingerprint acoustid_ingest; do
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL
CREATE DATABASE $db;
\c $db
CREATE EXTENSION IF NOT EXISTS pgcrypto;
EOSQL
done
# Install extensions for fingerprint database
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" -d acoustid_fingerprint <<-EOSQL
CREATE EXTENSION IF NOT EXISTS intarray;
CREATE EXTENSION IF NOT EXISTS cube;
CREATE EXTENSION IF NOT EXISTS acoustid;
EOSQL
# Run migrations
cd /app
python manage.py db upgrade
Database Configuration
postgresql.conf (custom settings):
# Connection settings
max_connections = 200
shared_buffers = 4GB
effective_cache_size = 12GB
# Write-ahead log
wal_level = replica
max_wal_size = 2GB
min_wal_size = 1GB
# Query planner
random_page_cost = 1.1 # SSD
effective_io_concurrency = 200
# Parallel query
max_parallel_workers_per_gather = 4
max_parallel_workers = 8
# Logging
log_min_duration_statement = 1000 # Log slow queries (>1s)
log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '
# Autovacuum
autovacuum_max_workers = 4
autovacuum_naptime = 10s
CI/CD Pipeline
GitHub Actions Workflows
File: .github/workflows/ci.yml
name: CI
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install dependencies
run: uv sync
- name: Run isort
run: uv run isort --check-only acoustid/
- name: Run black
run: uv run black --check acoustid/
- name: Run flake8
run: uv run flake8 acoustid/
- name: Run mypy
run: uv run mypy acoustid/
test:
runs-on: ubuntu-latest
services:
postgres:
image: ghcr.io/acoustid/postgresql:17.4
env:
POSTGRES_USER: acoustid
POSTGRES_PASSWORD: acoustid
POSTGRES_DB: acoustid_test
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
redis:
image: redis:7-alpine
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
nats:
image: nats:2-alpine
options: >-
--health-cmd "wget -q -O- http://localhost:8222/healthz"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 4222:4222
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install dependencies
run: uv sync
- name: Run migrations
run: uv run python manage.py db upgrade
env:
ACOUSTID_DATABASE_NAME: acoustid_test
ACOUSTID_DATABASE_USER: acoustid
ACOUSTID_DATABASE_PASSWORD: acoustid
ACOUSTID_DATABASE_HOST: localhost
- name: Run tests
run: uv run pytest -v --cov=acoustid --cov-report=xml
env:
ACOUSTID_DATABASE_NAME: acoustid_test
ACOUSTID_DATABASE_USER: acoustid
ACOUSTID_DATABASE_PASSWORD: acoustid
ACOUSTID_DATABASE_HOST: localhost
ACOUSTID_REDIS_HOST: localhost
ACOUSTID_NATS_SERVERS: nats://localhost:4222
- name: Upload coverage
uses: codecov/codecov-action@v4
with:
file: ./coverage.xml
build:
runs-on: ubuntu-latest
needs: [lint, test]
if: github.event_name == 'push'
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push server image
uses: docker/build-push-action@v5
with:
context: .
file: docker/Dockerfile
push: true
tags: |
ghcr.io/acoustid/acoustid-server:latest
ghcr.io/acoustid/acoustid-server:${{ github.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push index image
uses: docker/build-push-action@v5
with:
context: .
file: docker/Dockerfile.index
push: true
tags: |
ghcr.io/acoustid/acoustid-index:latest
ghcr.io/acoustid/acoustid-index:${{ github.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
Linting Tools
isort (import sorting):
# pyproject.toml
[tool.isort]
profile = "black"
line_length = 100
black (code formatting):
# pyproject.toml
[tool.black]
line-length = 100
target-version = ['py312']
flake8 (style checking):
# .flake8
[flake8]
max-line-length = 100
extend-ignore = E203, W503
exclude = .git,__pycache__,build,dist,.venv
mypy (type checking):
# pyproject.toml
[tool.mypy]
python_version = "3.12"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
Testing
pytest configuration:
# pyproject.toml
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = "-v --strict-markers --tb=short"
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"integration: marks tests as integration tests",
]
Test Files (24 total):
tests/
├── test_api_lookup.py
├── test_api_submit.py
├── test_fingerprint.py
├── test_indexclient.py
├── test_fpstore.py
├── test_data_account.py
├── test_data_fingerprint.py
├── test_data_track.py
├── test_data_musicbrainz.py
├── test_worker.py
├── test_cron.py
├── test_ratelimit.py
├── test_db.py
├── test_config.py
└── ...
Test Fixtures:
# tests/conftest.py
import pytest
from acoustid.db import create_engine, create_session
@pytest.fixture
def with_database():
"""Provide test database session."""
engine = create_engine('acoustid_test')
session = create_session(engine)
yield session
session.rollback()
session.close()
@pytest.fixture
def with_script():
"""Provide script context with database."""
from acoustid.script import Script
script = Script('test')
script.setup()
yield script
script.teardown()
@pytest.fixture
def fingerprint_fixture():
"""Predefined test fingerprint."""
return [123456789, 987654321, 456789123, ...]
Infrastructure Requirements
Minimum Requirements (Self-Hosted)
| Component | CPU | RAM | Disk | Notes |
|---|---|---|---|---|
| PostgreSQL | 2 cores | 4 GB | 100 GB SSD | For small dataset |
| Redis | 1 core | 1 GB | 10 GB | Mostly in-memory |
| NATS | 1 core | 512 MB | 10 GB | JetStream storage |
| Index | 2 cores | 2 GB | 50 GB SSD | Depends on dataset size |
| API | 2 cores | 2 GB | 10 GB | Per instance |
| Worker | 2 cores | 2 GB | 10 GB | Per instance |
| Total | 10 cores | 11.5 GB | 190 GB | Single-host deployment |
Production Requirements (acoustid.org scale)
| Component | CPU | RAM | Disk | Instances | Notes |
|---|---|---|---|---|---|
| PostgreSQL | 16 cores | 64 GB | 2 TB NVMe | 1 primary + 2 replicas | High IOPS required |
| Redis | 4 cores | 16 GB | 100 GB SSD | 3 (cluster) | Persistence enabled |
| NATS | 4 cores | 8 GB | 500 GB SSD | 3 (cluster) | JetStream storage |
| Index | 8 cores | 16 GB | 1 TB NVMe | 4+ | Sharded by fingerprint ID |
| API | 4 cores | 8 GB | 50 GB | 4+ | Behind load balancer |
| Web | 2 cores | 4 GB | 50 GB | 2+ | Behind load balancer |
| Worker | 4 cores | 8 GB | 50 GB | 8+ | Auto-scaling |
| Cron | 2 cores | 4 GB | 50 GB | 1 | Leader election |
Network Requirements
Bandwidth:
- API: 100 Mbps per instance (burst to 1 Gbps)
- Index: 1 Gbps (internal network)
- Database: 1 Gbps (internal network)
Latency:
- API to Index: <5ms
- API to Database: <5ms
- API to Redis: <1ms
Monitoring and Observability
Health Checks
Endpoints:
/_health: Full health check (database write test)/_health_ro: Read-only health check/_health_docker: Minimal health check for Docker
Kubernetes Probes:
livenessProbe:
httpGet:
path: /_health_docker
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /_health_ro
port: 5000
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 2
Metrics
StatsD Metrics (server):
api.requests_total{endpoint,method,status}api.request_duration_seconds{endpoint,method}api.handled_errors_total{error_code}api.unhandled_errors_totalapi.lookup.searches.totalapi.lookup.matches.totalnew_submissions
Prometheus Metrics (index):
fpindex_search_duration_secondsfpindex_insert_duration_secondsfpindex_segment_countfpindex_memory_segment_size_bytesfpindex_file_segment_size_bytesfpindex_merge_duration_seconds
Logging
Log Levels:
DEBUG: Detailed diagnostic informationINFO: General informational messagesWARNING: Warning messagesERROR: Error messagesCRITICAL: Critical errors
Log Format:
%(asctime)s [%(process)d] [%(levelname)s] %(name)s: %(message)s
Environment Variables:
ACOUSTID_LOGGING_LEVEL=INFO
ACOUSTID_LOGGING_LEVEL_ACOUSTID=DEBUG
ACOUSTID_LOGGING_LEVEL_SQLALCHEMY=WARNING
Error Tracking
Sentry Integration:
# acoustid.conf
[sentry]
dsn = https://...@sentry.io/...
environment = production
traces_sample_rate = 0.1
Configuration:
import sentry_sdk
from sentry_sdk.integrations.flask import FlaskIntegration
sentry_sdk.init(
dsn=config.sentry.dsn,
environment=config.sentry.environment,
traces_sample_rate=config.sentry.traces_sample_rate,
integrations=[FlaskIntegration()]
)
Scaling Strategies
Horizontal Scaling
API/Web:
- Add more instances behind load balancer
- No shared state (stateless)
- Session data in Redis if needed
Workers:
- Add more instances
- NATS distributes work automatically
- No coordination required
Index:
- Shard by fingerprint ID
- Consistent hashing for distribution
- NATS for cluster coordination
Vertical Scaling
Database:
- Increase shared_buffers (25% of RAM)
- Increase effective_cache_size (50-75% of RAM)
- Add more CPU for parallel queries
Index:
- Increase thread count
- Larger memory segment
- Faster disk (NVMe)
Caching
Application-Level:
- API key cache (in-memory, 60s TTL)
- Format lookup cache (permanent)
- MBID existence cache (Redis, 1h TTL)
Database-Level:
- Connection pooling
- Query result caching
- Materialized views
Backup and Disaster Recovery
Backup Strategy
PostgreSQL:
# Daily full backup
pg_dump -Fc acoustid_app > acoustid_app_$(date +%Y%m%d).dump
# Continuous WAL archiving
archive_command = 'cp %p /backup/wal/%f'
Index:
# Daily snapshot
curl -X GET http://index:6081/fingerprints/_snapshot
# Backup segment files
rsync -av /var/lib/acoustid-index/ /backup/index/
Redis:
# RDB snapshot (automatic)
save 900 1
save 300 10
save 60 10000
# AOF (append-only file)
appendonly yes
appendfsync everysec
Disaster Recovery
Recovery Time Objective (RTO): 1 hour
Recovery Point Objective (RPO): 5 minutes
Recovery Steps:
- Restore PostgreSQL from latest backup
- Replay WAL to point-in-time
- Restore Redis from RDB/AOF
- Restore index from snapshot
- Rebuild index from database if needed
- Restart all services
- Verify health checks