diff --git a/config.example.yaml b/config.example.yaml index 797e639..a7b0121 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -1,11 +1,19 @@ port: 8082 # telemetry: -# endpoint: "localhost:4317" # OTLP gRPC endpoint (omit to disable export) -# insecure: true # disable TLS for local dev # service_name: "anthropic-proxy" -# headers: # optional auth headers (e.g. Grafana Cloud) -# Authorization: "Basic ..." +# export: +# endpoint: "localhost:4317" # OTLP gRPC endpoint (omit to disable export) +# insecure: true # disable TLS for local dev +# headers: # optional auth headers (e.g. Grafana Cloud) +# Authorization: "Basic ..." +# embedded: +# enabled: true # start embedded Perses dashboard + VictoriaMetrics +# port: 8080 # Perses dashboard port +# vm_port: 8428 # VictoriaMetrics listen port +# bin_dir: "" # download dir (default: ~/.cache/anthropic-proxy/bin) +# perses_binary: "" # custom path to perses binary (default: auto-download) +# vm_binary: "" # custom path to victoria-metrics binary (default: auto-download) logging: level: debug @@ -43,21 +51,21 @@ sanitize: - match: "Workspace root folder" replace: "Working directory" body: - - match: "anomalyco/opencode" + - match: "anthropics/claude-code" replace: "anthropics/claude-code" - - match: "anomalyco" + - match: "anthropic" replace: "anthropic" - - match: "oh-my-opencode" + - match: "system-directive" replace: "system-directive" - - match: "ohmyopencode" + - match: "claude-code" replace: "claude-code" - - match: "oh-my-openagent" + - match: "claude-agent" replace: "claude-agent" - - match: "omo_internal_initiator" + - match: "system_initiator" replace: "system_initiator" - - match: "call_omo_agent" + - match: "call_agent" replace: "call_agent" - - match: "opencode.ai" + - match: "claude.ai" replace: "claude.ai" - - match: "opencode" + - match: "agent" replace: "agent" diff --git a/examples/perses/dashboard.json b/examples/perses/dashboard.json new file mode 100644 index 0000000..f389f7b --- /dev/null +++ b/examples/perses/dashboard.json @@ -0,0 +1,450 @@ +{ + "kind": "Dashboard", + "metadata": { + "name": "proxy", + "createdAt": "2026-04-14T19:47:48.013238204Z", + "updatedAt": "2026-04-14T19:49:30.874125459Z", + "version": 1, + "project": "anthropic-proxy" + }, + "spec": { + "display": { + "name": "Anthropic Proxy" + }, + "datasources": { + "vm": { + "default": true, + "plugin": { + "kind": "PrometheusDatasource", + "spec": { + "directUrl": "http://localhost:9428" + } + } + } + }, + "panels": { + "latency": { + "kind": "Panel", + "spec": { + "display": { + "name": "Latency" + }, + "plugin": { + "kind": "TimeSeriesChart", + "spec": { + "legend": { + "position": "bottom" + }, + "yAxis": { + "format": { + "unit": "milliseconds" + } + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "histogram_quantile(0.50, rate(proxy_request_duration_ms_milliseconds_bucket[5m]))", + "seriesNameFormat": "p50" + } + } + } + }, + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "histogram_quantile(0.95, rate(proxy_request_duration_ms_milliseconds_bucket[5m]))", + "seriesNameFormat": "p95" + } + } + } + }, + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "histogram_quantile(0.99, rate(proxy_request_duration_ms_milliseconds_bucket[5m]))", + "seriesNameFormat": "p99" + } + } + } + } + ] + } + }, + "request_rate": { + "kind": "Panel", + "spec": { + "display": { + "name": "Request Rate" + }, + "plugin": { + "kind": "TimeSeriesChart", + "spec": { + "legend": { + "position": "bottom" + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "rate(proxy_request_count_total[5m])", + "seriesNameFormat": "req/s" + } + } + } + } + ] + } + }, + "token_rate": { + "kind": "Panel", + "spec": { + "display": { + "name": "Token Rate" + }, + "plugin": { + "kind": "TimeSeriesChart", + "spec": { + "legend": { + "position": "bottom" + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "rate(proxy_tokens_input_total[5m]) * 60", + "seriesNameFormat": "input/min" + } + } + } + }, + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "rate(proxy_tokens_output_total[5m]) * 60", + "seriesNameFormat": "output/min" + } + } + } + } + ] + } + }, + "tokens_5h": { + "kind": "Panel", + "spec": { + "display": { + "name": "5h Tokens" + }, + "plugin": { + "kind": "StatChart", + "spec": { + "calculation": "last", + "format": { + "unit": "decimal" + }, + "sparkline": {} + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "increase(proxy_tokens_output_total[3h])" + } + } + } + } + ] + } + }, + "tokens_7d": { + "kind": "Panel", + "spec": { + "display": { + "name": "7d Tokens" + }, + "plugin": { + "kind": "StatChart", + "spec": { + "calculation": "last", + "format": { + "unit": "decimal" + }, + "sparkline": {} + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "increase(proxy_tokens_output_total[9h])" + } + } + } + } + ] + } + }, + "util_5h": { + "kind": "Panel", + "spec": { + "display": { + "name": "5h Utilization" + }, + "plugin": { + "kind": "GaugeChart", + "spec": { + "calculation": "last", + "format": { + "unit": "percent" + }, + "thresholds": { + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "proxy_usage_utilization{window=\"5h\"}" + } + } + } + } + ] + } + }, + "util_7d": { + "kind": "Panel", + "spec": { + "display": { + "name": "7d Utilization" + }, + "plugin": { + "kind": "GaugeChart", + "spec": { + "calculation": "last", + "format": { + "unit": "percent" + }, + "thresholds": { + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "proxy_usage_utilization{window=\"7d\"}" + } + } + } + } + ] + } + } + }, + "layouts": [ + { + "kind": "Grid", + "spec": { + "display": { + "title": "Utilization" + }, + "items": [ + { + "x": 0, + "y": 0, + "width": 6, + "height": 5, + "content": { + "$ref": "#/spec/panels/util_5h" + } + }, + { + "x": 6, + "y": 0, + "width": 6, + "height": 5, + "content": { + "$ref": "#/spec/panels/util_7d" + } + }, + { + "x": 12, + "y": 0, + "width": 6, + "height": 5, + "content": { + "$ref": "#/spec/panels/tokens_5h" + } + }, + { + "x": 18, + "y": 0, + "width": 6, + "height": 5, + "content": { + "$ref": "#/spec/panels/tokens_7d" + } + } + ] + } + }, + { + "kind": "Grid", + "spec": { + "display": { + "title": "Traffic" + }, + "items": [ + { + "x": 0, + "y": 0, + "width": 12, + "height": 8, + "content": { + "$ref": "#/spec/panels/request_rate" + } + }, + { + "x": 12, + "y": 0, + "width": 12, + "height": 8, + "content": { + "$ref": "#/spec/panels/latency" + } + } + ] + } + }, + { + "kind": "Grid", + "spec": { + "display": { + "title": "Tokens" + }, + "items": [ + { + "x": 0, + "y": 0, + "width": 24, + "height": 8, + "content": { + "$ref": "#/spec/panels/token_rate" + } + } + ] + } + } + ], + "duration": "1h", + "refreshInterval": "10s" + } +} diff --git a/go.mod b/go.mod index e76d812..992ea04 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.26 require ( github.com/gin-gonic/gin v1.12.0 github.com/google/uuid v1.6.0 + github.com/prometheus/client_golang v1.23.2 github.com/refraction-networking/utls v1.8.2 github.com/rs/zerolog v1.35.0 github.com/tidwall/gjson v1.18.0 @@ -14,13 +15,13 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.19.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 + go.opentelemetry.io/otel/exporters/prometheus v0.65.0 go.opentelemetry.io/otel/log v0.19.0 go.opentelemetry.io/otel/metric v1.43.0 go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/sdk/log v0.19.0 go.opentelemetry.io/otel/sdk/metric v1.43.0 golang.org/x/net v0.52.0 - google.golang.org/grpc v1.80.0 gopkg.in/lumberjack.v2 v2.0.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -28,6 +29,7 @@ require ( require ( github.com/BurntSushi/toml v1.6.0 // indirect github.com/andybalholm/brotli v1.0.6 // indirect + github.com/beorn7/perks v1.0.1 // indirect github.com/bytedance/gopkg v0.1.4 // indirect github.com/bytedance/sonic v1.15.0 // indirect github.com/bytedance/sonic/loader v0.5.1 // indirect @@ -45,14 +47,19 @@ require ( github.com/goccy/go-yaml v1.19.2 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.6 // indirect + github.com/klauspost/compress v1.18.0 // indirect github.com/klauspost/cpuid/v2 v2.3.0 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pelletier/go-toml/v2 v2.3.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.67.5 // indirect + github.com/prometheus/otlptranslator v1.0.0 // indirect + github.com/prometheus/procfs v0.20.1 // indirect github.com/quic-go/qpack v0.6.0 // indirect github.com/quic-go/quic-go v0.59.0 // indirect github.com/tidwall/match v1.1.1 // indirect @@ -64,12 +71,14 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect go.opentelemetry.io/otel/trace v1.43.0 // indirect go.opentelemetry.io/proto/otlp v1.10.0 // indirect + go.yaml.in/yaml/v2 v2.4.4 // indirect golang.org/x/arch v0.25.0 // indirect golang.org/x/crypto v0.49.0 // indirect golang.org/x/sys v0.42.0 // indirect golang.org/x/text v0.35.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 // indirect google.golang.org/protobuf v1.36.11 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/go.sum b/go.sum index 4ad9016..fb0ad1d 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,8 @@ github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI= github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bytedance/gopkg v0.1.4 h1:oZnQwnX82KAIWb7033bEwtxvTqXcYMxDBaQxo5JJHWM= github.com/bytedance/gopkg v0.1.4/go.mod h1:v1zWfPm21Fb+OsyXN2VAHdL6TBb2L88anLQgdyje6R4= github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE= @@ -51,14 +53,16 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI= -github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= @@ -70,10 +74,22 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM= github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= +github.com/prometheus/otlptranslator v1.0.0 h1:s0LJW/iN9dkIH+EnhiD3BlkkP5QVIUVEoIwkU+A6qos= +github.com/prometheus/otlptranslator v1.0.0/go.mod h1:vRYWnXvI6aWGpsdY/mOT/cbeVRBlPWtBNDb7kGR3uKM= +github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc= +github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo= github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8= github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII= github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SAw= @@ -126,6 +142,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bT go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk= +go.opentelemetry.io/otel/exporters/prometheus v0.65.0 h1:jOveH/b4lU9HT7y+Gfamf18BqlOuz2PWEvs8yM7Q6XE= +go.opentelemetry.io/otel/exporters/prometheus v0.65.0/go.mod h1:i1P8pcumauPtUI4YNopea1dhzEMuEqWP1xoUZDylLHo= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0 h1:mS47AX77OtFfKG4vtp+84kuGSFZHTyxtXIN269vChY0= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0/go.mod h1:PJnsC41lAGncJlPUniSwM81gc80GkgWJWr3cu2nKEtU= go.opentelemetry.io/otel/log v0.19.0 h1:KUZs/GOsw79TBBMfDWsXS+KZ4g2Ckzksd1ymzsIEbo4= @@ -148,6 +166,8 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= +go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ= +go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ= golang.org/x/arch v0.25.0 h1:qnk6Ksugpi5Bz32947rkUgDt9/s5qvqDPl/gBKdMJLE= golang.org/x/arch v0.25.0/go.mod h1:0X+GdSIP+kL5wPmpK7sdkEVTt2XoYP0cSjQSbZBwOi8= golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= diff --git a/internal/config/config.go b/internal/config/config.go index 718908d..97f9753 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -11,12 +11,12 @@ import ( ) type Config struct { - Port int `yaml:"port"` - APIKeys []string `yaml:"api_keys"` - ClaudeBinary string `yaml:"claude_binary"` - Sanitize SanitizeConfig `yaml:"sanitize"` - Logging LoggingConfig `yaml:"logging"` - Telemetry TelemetryConfig `yaml:"telemetry"` + Port int `yaml:"port"` + APIKeys []string `yaml:"api_keys"` + ClaudeBinary string `yaml:"claude_binary"` + Sanitize SanitizeConfig `yaml:"sanitize"` + Logging LoggingConfig `yaml:"logging"` + Telemetry TelemetryConfig `yaml:"telemetry"` } type SanitizeConfig struct { @@ -36,13 +36,27 @@ type ReplaceRule struct { } type TelemetryConfig struct { - Endpoint string `yaml:"endpoint"` - Insecure bool `yaml:"insecure"` - ServiceName string `yaml:"service_name"` - Headers map[string]string `yaml:"headers"` + Export ExportConfig `yaml:"export"` + Embedded EmbeddedConfig `yaml:"embedded"` + ServiceName string `yaml:"service_name"` } -func (t TelemetryConfig) ExportEnabled() bool { return t.Endpoint != "" } +type ExportConfig struct { + Endpoint string `yaml:"endpoint"` + Insecure bool `yaml:"insecure"` + Headers map[string]string `yaml:"headers"` +} + +func (e ExportConfig) Enabled() bool { return e.Endpoint != "" } + +type EmbeddedConfig struct { + Enabled bool `yaml:"enabled"` + Port int `yaml:"port"` + PersesBinary string `yaml:"perses_binary"` + VMBinary string `yaml:"vm_binary"` + VMPort int `yaml:"vm_port"` + BinDir string `yaml:"bin_dir"` +} type LoggingConfig struct { Level string `yaml:"level"` @@ -89,6 +103,18 @@ func Load(path string) (*Config, error) { if cfg.Telemetry.ServiceName == "" { cfg.Telemetry.ServiceName = "anthropic-proxy" } + if cfg.Telemetry.Embedded.Port == 0 { + cfg.Telemetry.Embedded.Port = 8080 + } + if cfg.Telemetry.Embedded.PersesBinary == "" { + cfg.Telemetry.Embedded.PersesBinary = "perses" + } + if cfg.Telemetry.Embedded.VMBinary == "" { + cfg.Telemetry.Embedded.VMBinary = "victoria-metrics" + } + if cfg.Telemetry.Embedded.VMPort == 0 { + cfg.Telemetry.Embedded.VMPort = 8428 + } // Check for deprecated claude_credentials field var rawCfg map[string]interface{} diff --git a/internal/embedded/dashboard.go b/internal/embedded/dashboard.go new file mode 100644 index 0000000..2502101 --- /dev/null +++ b/internal/embedded/dashboard.go @@ -0,0 +1,12 @@ +package embedded + +import ( + "embed" +) + +//go:embed dashboard/proxy.json +var dashboardFS embed.FS + +func DashboardJSON() ([]byte, error) { + return dashboardFS.ReadFile("dashboard/proxy.json") +} diff --git a/internal/embedded/dashboard/proxy.json b/internal/embedded/dashboard/proxy.json new file mode 100644 index 0000000..f389f7b --- /dev/null +++ b/internal/embedded/dashboard/proxy.json @@ -0,0 +1,450 @@ +{ + "kind": "Dashboard", + "metadata": { + "name": "proxy", + "createdAt": "2026-04-14T19:47:48.013238204Z", + "updatedAt": "2026-04-14T19:49:30.874125459Z", + "version": 1, + "project": "anthropic-proxy" + }, + "spec": { + "display": { + "name": "Anthropic Proxy" + }, + "datasources": { + "vm": { + "default": true, + "plugin": { + "kind": "PrometheusDatasource", + "spec": { + "directUrl": "http://localhost:9428" + } + } + } + }, + "panels": { + "latency": { + "kind": "Panel", + "spec": { + "display": { + "name": "Latency" + }, + "plugin": { + "kind": "TimeSeriesChart", + "spec": { + "legend": { + "position": "bottom" + }, + "yAxis": { + "format": { + "unit": "milliseconds" + } + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "histogram_quantile(0.50, rate(proxy_request_duration_ms_milliseconds_bucket[5m]))", + "seriesNameFormat": "p50" + } + } + } + }, + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "histogram_quantile(0.95, rate(proxy_request_duration_ms_milliseconds_bucket[5m]))", + "seriesNameFormat": "p95" + } + } + } + }, + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "histogram_quantile(0.99, rate(proxy_request_duration_ms_milliseconds_bucket[5m]))", + "seriesNameFormat": "p99" + } + } + } + } + ] + } + }, + "request_rate": { + "kind": "Panel", + "spec": { + "display": { + "name": "Request Rate" + }, + "plugin": { + "kind": "TimeSeriesChart", + "spec": { + "legend": { + "position": "bottom" + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "rate(proxy_request_count_total[5m])", + "seriesNameFormat": "req/s" + } + } + } + } + ] + } + }, + "token_rate": { + "kind": "Panel", + "spec": { + "display": { + "name": "Token Rate" + }, + "plugin": { + "kind": "TimeSeriesChart", + "spec": { + "legend": { + "position": "bottom" + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "rate(proxy_tokens_input_total[5m]) * 60", + "seriesNameFormat": "input/min" + } + } + } + }, + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "rate(proxy_tokens_output_total[5m]) * 60", + "seriesNameFormat": "output/min" + } + } + } + } + ] + } + }, + "tokens_5h": { + "kind": "Panel", + "spec": { + "display": { + "name": "5h Tokens" + }, + "plugin": { + "kind": "StatChart", + "spec": { + "calculation": "last", + "format": { + "unit": "decimal" + }, + "sparkline": {} + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "increase(proxy_tokens_output_total[3h])" + } + } + } + } + ] + } + }, + "tokens_7d": { + "kind": "Panel", + "spec": { + "display": { + "name": "7d Tokens" + }, + "plugin": { + "kind": "StatChart", + "spec": { + "calculation": "last", + "format": { + "unit": "decimal" + }, + "sparkline": {} + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "increase(proxy_tokens_output_total[9h])" + } + } + } + } + ] + } + }, + "util_5h": { + "kind": "Panel", + "spec": { + "display": { + "name": "5h Utilization" + }, + "plugin": { + "kind": "GaugeChart", + "spec": { + "calculation": "last", + "format": { + "unit": "percent" + }, + "thresholds": { + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "proxy_usage_utilization{window=\"5h\"}" + } + } + } + } + ] + } + }, + "util_7d": { + "kind": "Panel", + "spec": { + "display": { + "name": "7d Utilization" + }, + "plugin": { + "kind": "GaugeChart", + "spec": { + "calculation": "last", + "format": { + "unit": "percent" + }, + "thresholds": { + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + } + } + }, + "queries": [ + { + "kind": "TimeSeriesQuery", + "spec": { + "plugin": { + "kind": "PrometheusTimeSeriesQuery", + "spec": { + "datasource": { + "kind": "PrometheusDatasource", + "name": "vm" + }, + "query": "proxy_usage_utilization{window=\"7d\"}" + } + } + } + } + ] + } + } + }, + "layouts": [ + { + "kind": "Grid", + "spec": { + "display": { + "title": "Utilization" + }, + "items": [ + { + "x": 0, + "y": 0, + "width": 6, + "height": 5, + "content": { + "$ref": "#/spec/panels/util_5h" + } + }, + { + "x": 6, + "y": 0, + "width": 6, + "height": 5, + "content": { + "$ref": "#/spec/panels/util_7d" + } + }, + { + "x": 12, + "y": 0, + "width": 6, + "height": 5, + "content": { + "$ref": "#/spec/panels/tokens_5h" + } + }, + { + "x": 18, + "y": 0, + "width": 6, + "height": 5, + "content": { + "$ref": "#/spec/panels/tokens_7d" + } + } + ] + } + }, + { + "kind": "Grid", + "spec": { + "display": { + "title": "Traffic" + }, + "items": [ + { + "x": 0, + "y": 0, + "width": 12, + "height": 8, + "content": { + "$ref": "#/spec/panels/request_rate" + } + }, + { + "x": 12, + "y": 0, + "width": 12, + "height": 8, + "content": { + "$ref": "#/spec/panels/latency" + } + } + ] + } + }, + { + "kind": "Grid", + "spec": { + "display": { + "title": "Tokens" + }, + "items": [ + { + "x": 0, + "y": 0, + "width": 24, + "height": 8, + "content": { + "$ref": "#/spec/panels/token_rate" + } + } + ] + } + } + ], + "duration": "1h", + "refreshInterval": "10s" + } +} diff --git a/internal/embedded/download.go b/internal/embedded/download.go new file mode 100644 index 0000000..cd408ae --- /dev/null +++ b/internal/embedded/download.go @@ -0,0 +1,155 @@ +package embedded + +import ( + "archive/tar" + "compress/gzip" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "runtime" + + "github.com/rs/zerolog/log" +) + +const cacheDir = ".cache/anthropic-proxy/bin" + +var downloads = map[string]struct { + urlTemplate string + version string + extractName string +}{ + "victoria-metrics": { + urlTemplate: "https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v%s/victoria-metrics-%s-v%s.tar.gz", + version: "1.118.0", + extractName: "victoria-metrics-prod", + }, + "perses": { + urlTemplate: "https://github.com/perses/perses/releases/download/v%s/perses_%s_%s_%s.tar.gz", + version: "0.53.1", + }, +} + +func ensureBinary(name, configPath, configBinDir string) (string, error) { + if configPath != "" { + if p, err := exec.LookPath(configPath); err == nil { + return p, nil + } + } + + if p, err := exec.LookPath(name); err == nil { + return p, nil + } + + binDir := configBinDir + if binDir == "" { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("get home dir: %w", err) + } + binDir = filepath.Join(home, cacheDir) + } + cachedPath := filepath.Join(binDir, name) + + if _, err := os.Stat(cachedPath); err == nil { + return cachedPath, nil + } + + log.Info().Str("binary", name).Msg("downloading binary (first run)") + + if err := os.MkdirAll(binDir, 0o755); err != nil { + return "", fmt.Errorf("create cache dir: %w", err) + } + + url, err := downloadURL(name) + if err != nil { + return "", err + } + + if err := extractAll(url, binDir); err != nil { + return "", fmt.Errorf("download %s: %w", name, err) + } + + d := downloads[name] + if d.extractName != "" { + oldPath := filepath.Join(binDir, d.extractName) + if _, err := os.Stat(oldPath); err == nil { + os.Rename(oldPath, cachedPath) + } + } + + if _, err := os.Stat(cachedPath); err != nil { + return "", fmt.Errorf("binary %s not found after extraction", name) + } + + log.Info().Str("binary", name).Str("path", cachedPath).Msg("binary downloaded") + return cachedPath, nil +} + +func downloadURL(name string) (string, error) { + goarch := runtime.GOARCH + goos := runtime.GOOS + + d, ok := downloads[name] + if !ok { + return "", fmt.Errorf("unknown binary: %s", name) + } + + switch name { + case "victoria-metrics": + vmOS := fmt.Sprintf("%s-%s", goos, goarch) + return fmt.Sprintf(d.urlTemplate, d.version, vmOS, d.version), nil + case "perses": + return fmt.Sprintf(d.urlTemplate, d.version, d.version, goos, goarch), nil + } + return "", fmt.Errorf("unknown binary: %s", name) +} + +func extractAll(url, destDir string) error { + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return fmt.Errorf("download failed: HTTP %d from %s", resp.StatusCode, url) + } + + gz, err := gzip.NewReader(resp.Body) + if err != nil { + return fmt.Errorf("gzip reader: %w", err) + } + defer gz.Close() + + tr := tar.NewReader(gz) + for { + hdr, err := tr.Next() + if err == io.EOF { + return nil + } + if err != nil { + return fmt.Errorf("read tar: %w", err) + } + + target := filepath.Join(destDir, hdr.Name) + switch hdr.Typeflag { + case tar.TypeDir: + os.MkdirAll(target, 0o755) + case tar.TypeReg: + os.MkdirAll(filepath.Dir(target), 0o755) + mode := os.FileMode(hdr.Mode) + if mode == 0 { + mode = 0o644 + } + out, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, mode) + if err != nil { + return err + } + io.Copy(out, tr) + out.Close() + } + } +} diff --git a/internal/embedded/perses.go b/internal/embedded/perses.go new file mode 100644 index 0000000..1f80d87 --- /dev/null +++ b/internal/embedded/perses.go @@ -0,0 +1,149 @@ +package embedded + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/fujin/anthropic-proxy/internal/config" + "github.com/rs/zerolog/log" +) + +type Perses struct { + cfg config.EmbeddedConfig + proxyPort int + cmd *exec.Cmd + tmpDir string +} + +func NewPerses(cfg config.EmbeddedConfig, proxyPort int) *Perses { + return &Perses{cfg: cfg, proxyPort: proxyPort} +} + +func (p *Perses) Start() error { + bin, err := ensureBinary("perses", p.cfg.PersesBinary, p.cfg.BinDir) + if err != nil { + return fmt.Errorf("perses: %w", err) + } + + p.tmpDir, err = os.MkdirTemp("", "perses-*") + if err != nil { + return fmt.Errorf("create temp dir: %w", err) + } + + if err := p.writeServerConfig(); err != nil { + return fmt.Errorf("write server config: %w", err) + } + if err := p.writeDatasourceProvision(); err != nil { + return fmt.Errorf("write datasource provision: %w", err) + } + if err := p.writeDashboardProvision(); err != nil { + return fmt.Errorf("write dashboard provision: %w", err) + } + + p.cmd = exec.Command(bin, + "--config", filepath.Join(p.tmpDir, "config.yaml"), + "-web.listen-address", fmt.Sprintf(":%d", p.cfg.Port), + ) + p.cmd.Dir = filepath.Dir(bin) + p.cmd.Stdout = &logWriter{level: "info", component: "perses"} + p.cmd.Stderr = &logWriter{level: "error", component: "perses"} + + if err := p.cmd.Start(); err != nil { + return fmt.Errorf("start perses: %w", err) + } + + log.Info(). + Str("binary", bin). + Int("port", p.cfg.Port). + Str("config", p.tmpDir). + Msg("perses started") + + return nil +} + +func (p *Perses) Stop() { + if p.cmd != nil && p.cmd.Process != nil { + _ = p.cmd.Process.Kill() + _ = p.cmd.Wait() + } + if p.tmpDir != "" { + _ = os.RemoveAll(p.tmpDir) + } +} + +func (p *Perses) Running() bool { + return p.cmd != nil && p.cmd.Process != nil && p.cmd.ProcessState == nil +} + +func (p *Perses) writeServerConfig() error { + provisionDir := filepath.Join(p.tmpDir, "provisions") + if err := os.MkdirAll(filepath.Join(provisionDir, "datasources"), 0o755); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(provisionDir, "dashboards"), 0o755); err != nil { + return err + } + + cfg := fmt.Sprintf(`provisioning: + interval: 1m + folders: + - %s +database: + file: + folder: %s/data + extension: json +security: + readonly: false + enable_auth: false +`, provisionDir, p.tmpDir) + + return os.WriteFile(filepath.Join(p.tmpDir, "config.yaml"), []byte(cfg), 0o644) +} + +func (p *Perses) writeDatasourceProvision() error { + ds := fmt.Sprintf(`kind: Datasource +metadata: + name: victoria-metrics + project: anthropic-proxy +spec: + default: true + plugin: + kind: PrometheusDatasource + spec: + directUrl: http://localhost:%d +`, p.cfg.VMPort) + + return os.WriteFile( + filepath.Join(p.tmpDir, "provisions", "datasources", "vm.yaml"), + []byte(ds), 0o644, + ) +} + +func (p *Perses) writeDashboardProvision() error { + dashData, err := DashboardJSON() + if err != nil { + return err + } + return os.WriteFile( + filepath.Join(p.tmpDir, "provisions", "dashboards", "proxy.json"), + dashData, 0o644, + ) +} + +type logWriter struct { + level string + component string +} + +func (w *logWriter) Write(p []byte) (n int, err error) { + msg := string(p) + switch w.level { + case "error": + log.Error().Str("component", w.component).Msg(msg) + default: + log.Debug().Str("component", w.component).Msg(msg) + } + return len(p), nil +} diff --git a/internal/embedded/vm.go b/internal/embedded/vm.go new file mode 100644 index 0000000..012eee9 --- /dev/null +++ b/internal/embedded/vm.go @@ -0,0 +1,88 @@ +package embedded + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/fujin/anthropic-proxy/internal/config" + "github.com/rs/zerolog/log" +) + +type VM struct { + cfg config.EmbeddedConfig + proxyPort int + cmd *exec.Cmd + tmpDir string +} + +func NewVM(cfg config.EmbeddedConfig, proxyPort int) *VM { + return &VM{cfg: cfg, proxyPort: proxyPort} +} + +func (v *VM) Start() error { + bin, err := ensureBinary("victoria-metrics", v.cfg.VMBinary, v.cfg.BinDir) + if err != nil { + return fmt.Errorf("victoria-metrics: %w", err) + } + + v.tmpDir, err = os.MkdirTemp("", "vm-*") + if err != nil { + return fmt.Errorf("create temp dir: %w", err) + } + + scrapeConfig := fmt.Sprintf(`global: + scrape_interval: 15s +scrape_configs: + - job_name: anthropic-proxy + static_configs: + - targets: + - localhost:%d +`, v.proxyPort) + + scrapePath := filepath.Join(v.tmpDir, "scrape.yaml") + if err := os.WriteFile(scrapePath, []byte(scrapeConfig), 0o644); err != nil { + return fmt.Errorf("write scrape config: %w", err) + } + + dataPath := filepath.Join(v.tmpDir, "data") + if err := os.MkdirAll(dataPath, 0o755); err != nil { + return fmt.Errorf("create data dir: %w", err) + } + + v.cmd = exec.Command(bin, + "-storageDataPath", dataPath, + "-retentionPeriod", "7d", + "-httpListenAddr", fmt.Sprintf(":%d", v.cfg.VMPort), + "-promscrape.config", scrapePath, + ) + v.cmd.Stdout = &logWriter{level: "info", component: "victoria-metrics"} + v.cmd.Stderr = &logWriter{level: "error", component: "victoria-metrics"} + + if err := v.cmd.Start(); err != nil { + return fmt.Errorf("start victoria-metrics: %w", err) + } + + log.Info(). + Str("binary", bin). + Int("port", v.cfg.VMPort). + Int("scrape_target_port", v.proxyPort). + Msg("victoria-metrics started") + + return nil +} + +func (v *VM) Stop() { + if v.cmd != nil && v.cmd.Process != nil { + _ = v.cmd.Process.Kill() + _ = v.cmd.Wait() + } + if v.tmpDir != "" { + _ = os.RemoveAll(v.tmpDir) + } +} + +func (v *VM) Running() bool { + return v.cmd != nil && v.cmd.Process != nil && v.cmd.ProcessState == nil +} diff --git a/internal/server/server.go b/internal/server/server.go index 28d7492..84836fe 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -26,7 +26,7 @@ type Server struct { apiKeys atomic.Pointer[map[string]struct{}] } -func New(cfg *config.Config, pool *auth.Pool, profile *proxy.SniffedProfile, tracker *ratelimit.Tracker) *Server { +func New(cfg *config.Config, pool *auth.Pool, profile *proxy.SniffedProfile, tracker *ratelimit.Tracker, metricsHandler http.Handler) *Server { s := &Server{configPath: "config.yaml"} san := proxy.NewSanitizer(cfg.Sanitize) @@ -39,7 +39,7 @@ func New(cfg *config.Config, pool *auth.Pool, profile *proxy.SniffedProfile, tra engine := gin.New() engine.Use(gin.Recovery()) engine.Use(corsMiddleware()) - if cfg.Telemetry.ExportEnabled() { + if cfg.Telemetry.Export.Enabled() { engine.Use(otelgin.Middleware(cfg.Telemetry.ServiceName)) } engine.Use(s.authMiddleware()) @@ -51,6 +51,10 @@ func New(cfg *config.Config, pool *auth.Pool, profile *proxy.SniffedProfile, tra engine.POST("/v1/messages", handler) engine.POST("/messages", handler) + if metricsHandler != nil { + engine.GET("/metrics", gin.WrapH(metricsHandler)) + } + engine.POST("/reload", s.handleReload()) engine.POST("/debug/refresh", handleDebugRefresh(pool)) engine.GET("/healthz", func(c *gin.Context) { @@ -137,7 +141,7 @@ func corsMiddleware() gin.HandlerFunc { func (s *Server) authMiddleware() gin.HandlerFunc { return func(c *gin.Context) { path := c.Request.URL.Path - if path == "/healthz" || path == "/reload" { + if path == "/healthz" || path == "/reload" || path == "/metrics" { c.Next() return } diff --git a/internal/telemetry/telemetry.go b/internal/telemetry/telemetry.go index d836e93..222cd98 100644 --- a/internal/telemetry/telemetry.go +++ b/internal/telemetry/telemetry.go @@ -3,13 +3,16 @@ package telemetry import ( "context" "io" + "net/http" "github.com/fujin/anthropic-proxy/internal/config" "github.com/fujin/anthropic-proxy/internal/ratelimit" + "github.com/prometheus/client_golang/prometheus/promhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + promexporter "go.opentelemetry.io/otel/exporters/prometheus" otellog "go.opentelemetry.io/otel/log/global" "go.opentelemetry.io/otel/sdk/log" sdkmetric "go.opentelemetry.io/otel/sdk/metric" @@ -18,46 +21,51 @@ import ( semconv "go.opentelemetry.io/otel/semconv/v1.26.0" ) -// Setup initializes OpenTelemetry providers. It always creates a MeterProvider -// so metrics can be recorded in-process. When cfg.ExportEnabled(), OTLP gRPC -// exporters are additionally configured to push to the LGTM stack. -// Returns a shutdown function and an optional io.Writer for the log bridge. -func Setup(ctx context.Context, cfg config.TelemetryConfig, tracker *ratelimit.Tracker) (shutdown func(context.Context) error, logWriter io.Writer, err error) { +func Setup(ctx context.Context, cfg config.TelemetryConfig, tracker *ratelimit.Tracker) (shutdown func(context.Context) error, logWriter io.Writer, metricsHandler http.Handler, err error) { res, err := resource.New(ctx, resource.WithAttributes( semconv.ServiceName(cfg.ServiceName), ), ) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - if !cfg.ExportEnabled() { - // No export — set up in-memory meter provider only so metric - // instruments are valid (they just don't export anywhere). - mp := sdkmetric.NewMeterProvider(sdkmetric.WithResource(res)) + var readers []sdkmetric.Option + readers = append(readers, sdkmetric.WithResource(res)) + + var promHandler http.Handler + if cfg.Embedded.Enabled { + exporter, pErr := promexporter.New() + if pErr != nil { + return nil, nil, nil, pErr + } + readers = append(readers, sdkmetric.WithReader(exporter)) + promHandler = promhttp.Handler() + } + + if !cfg.Export.Enabled() { + mp := sdkmetric.NewMeterProvider(readers...) otel.SetMeterProvider(mp) InitMetrics(mp.Meter(cfg.ServiceName), tracker) - return func(ctx context.Context) error { return mp.Shutdown(ctx) }, nil, nil + return func(ctx context.Context) error { return mp.Shutdown(ctx) }, nil, promHandler, nil } - // Build exporter options - traceOpts := []otlptracegrpc.Option{otlptracegrpc.WithEndpoint(cfg.Endpoint)} + traceOpts := []otlptracegrpc.Option{otlptracegrpc.WithEndpoint(cfg.Export.Endpoint)} metricOpts := []otlpmetricgrpc.Option{ - otlpmetricgrpc.WithEndpoint(cfg.Endpoint), + otlpmetricgrpc.WithEndpoint(cfg.Export.Endpoint), otlpmetricgrpc.WithTemporalitySelector(sdkmetric.CumulativeTemporalitySelector), } - logOpts := []otlploggrpc.Option{otlploggrpc.WithEndpoint(cfg.Endpoint)} - if cfg.Insecure { + logOpts := []otlploggrpc.Option{otlploggrpc.WithEndpoint(cfg.Export.Endpoint)} + if cfg.Export.Insecure { traceOpts = append(traceOpts, otlptracegrpc.WithInsecure()) metricOpts = append(metricOpts, otlpmetricgrpc.WithInsecure()) logOpts = append(logOpts, otlploggrpc.WithInsecure()) } - // Trace exporter traceExp, err := otlptracegrpc.New(ctx, traceOpts...) if err != nil { - return nil, nil, err + return nil, nil, nil, err } tp := trace.NewTracerProvider( trace.WithBatcher(traceExp), @@ -65,22 +73,18 @@ func Setup(ctx context.Context, cfg config.TelemetryConfig, tracker *ratelimit.T ) otel.SetTracerProvider(tp) - // Metric exporter metricExp, err := otlpmetricgrpc.New(ctx, metricOpts...) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - mp := sdkmetric.NewMeterProvider( - sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp)), - sdkmetric.WithResource(res), - ) + readers = append(readers, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp))) + mp := sdkmetric.NewMeterProvider(readers...) otel.SetMeterProvider(mp) InitMetrics(mp.Meter(cfg.ServiceName), tracker) - // Log exporter logExp, err := otlploggrpc.New(ctx, logOpts...) if err != nil { - return nil, nil, err + return nil, nil, nil, err } lp := log.NewLoggerProvider( log.WithProcessor(log.NewBatchProcessor(logExp)), @@ -104,5 +108,5 @@ func Setup(ctx context.Context, cfg config.TelemetryConfig, tracker *ratelimit.T return firstErr } - return shutdownFn, bridge, nil + return shutdownFn, bridge, promHandler, nil } diff --git a/main.go b/main.go index 4974ce9..a1884de 100644 --- a/main.go +++ b/main.go @@ -12,6 +12,7 @@ import ( "github.com/fujin/anthropic-proxy/internal/auth" "github.com/fujin/anthropic-proxy/internal/config" + "github.com/fujin/anthropic-proxy/internal/embedded" "github.com/fujin/anthropic-proxy/internal/logging" "github.com/fujin/anthropic-proxy/internal/proxy" "github.com/fujin/anthropic-proxy/internal/ratelimit" @@ -36,7 +37,7 @@ func run() error { }) // Initialize telemetry (metrics always active; OTLP export when endpoint set) - telemetryShutdown, logBridge, err := telemetry.Setup(context.Background(), cfg.Telemetry, tracker) + telemetryShutdown, logBridge, metricsHandler, err := telemetry.Setup(context.Background(), cfg.Telemetry, tracker) if err != nil { return fmt.Errorf("telemetry setup: %w", err) } @@ -115,8 +116,27 @@ func run() error { } } + // Start embedded observability stack (VM + Perses) if enabled + var vm *embedded.VM + var perses *embedded.Perses + if cfg.Telemetry.Embedded.Enabled { + vm = embedded.NewVM(cfg.Telemetry.Embedded, cfg.Port) + if err := vm.Start(); err != nil { + log.Error().Err(err).Msg("failed to start victoria-metrics") + } else { + defer vm.Stop() + } + + perses = embedded.NewPerses(cfg.Telemetry.Embedded, cfg.Port) + if err := perses.Start(); err != nil { + log.Error().Err(err).Msg("failed to start perses") + } else { + defer perses.Stop() + } + } + log.Info().Int("port", cfg.Port).Msg("starting server") - srv := server.New(cfg, pool, profile, tracker) + srv := server.New(cfg, pool, profile, tracker, metricsHandler) quit := make(chan os.Signal, 1) signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) diff --git a/package.nix b/package.nix index 9d3df46..2b17ee5 100644 --- a/package.nix +++ b/package.nix @@ -7,11 +7,11 @@ buildGoModule rec { pname = "anthropic-proxy"; - version = "0.0.4"; + version = "0.0.5"; src = ./.; - vendorHash = "sha256-8pq4GYFjOfYcYLcZSuXMWn77RUxVGP18AcyzIJGbKf4="; + vendorHash = "sha256-yXINNC+NEw+HbOQ5aBgSE5dYTWp+zEZ230rzXfwOoDY="; meta = with lib; { description = "Reverse proxy that lets OpenCode (and similar tools) use a Claude subscription instead of an API key.";