feat(ratelimit): track per-window token usage and utilization
Poll /api/oauth/usage every 5 min and extract utilization from /v1/messages response headers for real-time updates. Track proxy tokens in/out per rate limit window (5h/7d), resetting on window change. Expose as OTel observable gauges for Grafana dashboards. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,12 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
|
||||
"github.com/fujin/anthropic-proxy/internal/ratelimit"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -17,7 +22,8 @@ var (
|
||||
)
|
||||
|
||||
// InitMetrics creates all metric instruments from the given meter.
|
||||
func InitMetrics(meter metric.Meter) {
|
||||
// If tracker is non-nil, registers observable gauges for per-window usage.
|
||||
func InitMetrics(meter metric.Meter, tracker *ratelimit.Tracker) {
|
||||
RequestCounter, _ = meter.Int64Counter("proxy.request.count",
|
||||
metric.WithDescription("Total proxy requests"),
|
||||
)
|
||||
@@ -47,4 +53,50 @@ func InitMetrics(meter metric.Meter) {
|
||||
StreamRequests, _ = meter.Int64Counter("proxy.stream.requests",
|
||||
metric.WithDescription("Streaming request count"),
|
||||
)
|
||||
|
||||
if tracker == nil {
|
||||
return
|
||||
}
|
||||
|
||||
attr5h := attribute.String("window", "5h")
|
||||
attr7d := attribute.String("window", "7d")
|
||||
attrSonnet := attribute.String("window", "7d_sonnet")
|
||||
|
||||
meter.Float64ObservableGauge("proxy.usage.utilization",
|
||||
metric.WithDescription("Current utilization % from API"),
|
||||
metric.WithFloat64Callback(func(_ context.Context, o metric.Float64Observer) error {
|
||||
o.Observe(tracker.FiveHour().Utilization, metric.WithAttributes(attr5h))
|
||||
o.Observe(tracker.SevenDay().Utilization, metric.WithAttributes(attr7d))
|
||||
o.Observe(tracker.Sonnet().Utilization, metric.WithAttributes(attrSonnet))
|
||||
return nil
|
||||
}),
|
||||
)
|
||||
|
||||
meter.Int64ObservableGauge("proxy.usage.resets_at",
|
||||
metric.WithDescription("Unix seconds when window resets"),
|
||||
metric.WithInt64Callback(func(_ context.Context, o metric.Int64Observer) error {
|
||||
o.Observe(tracker.FiveHour().ResetsAt.Unix(), metric.WithAttributes(attr5h))
|
||||
o.Observe(tracker.SevenDay().ResetsAt.Unix(), metric.WithAttributes(attr7d))
|
||||
o.Observe(tracker.Sonnet().ResetsAt.Unix(), metric.WithAttributes(attrSonnet))
|
||||
return nil
|
||||
}),
|
||||
)
|
||||
|
||||
meter.Int64ObservableGauge("proxy.usage.window_tokens.input",
|
||||
metric.WithDescription("Proxy input tokens in current window"),
|
||||
metric.WithInt64Callback(func(_ context.Context, o metric.Int64Observer) error {
|
||||
o.Observe(tracker.FiveHour().TokensIn, metric.WithAttributes(attr5h))
|
||||
o.Observe(tracker.SevenDay().TokensIn, metric.WithAttributes(attr7d))
|
||||
return nil
|
||||
}),
|
||||
)
|
||||
|
||||
meter.Int64ObservableGauge("proxy.usage.window_tokens.output",
|
||||
metric.WithDescription("Proxy output tokens in current window"),
|
||||
metric.WithInt64Callback(func(_ context.Context, o metric.Int64Observer) error {
|
||||
o.Observe(tracker.FiveHour().TokensOut, metric.WithAttributes(attr5h))
|
||||
o.Observe(tracker.SevenDay().TokensOut, metric.WithAttributes(attr7d))
|
||||
return nil
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"io"
|
||||
|
||||
"github.com/fujin/anthropic-proxy/internal/config"
|
||||
"github.com/fujin/anthropic-proxy/internal/ratelimit"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
@@ -21,7 +22,7 @@ import (
|
||||
// so metrics can be recorded in-process. When cfg.ExportEnabled(), OTLP gRPC
|
||||
// exporters are additionally configured to push to the LGTM stack.
|
||||
// Returns a shutdown function and an optional io.Writer for the log bridge.
|
||||
func Setup(ctx context.Context, cfg config.TelemetryConfig) (shutdown func(context.Context) error, logWriter io.Writer, err error) {
|
||||
func Setup(ctx context.Context, cfg config.TelemetryConfig, tracker *ratelimit.Tracker) (shutdown func(context.Context) error, logWriter io.Writer, err error) {
|
||||
res, err := resource.New(ctx,
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceName(cfg.ServiceName),
|
||||
@@ -36,7 +37,7 @@ func Setup(ctx context.Context, cfg config.TelemetryConfig) (shutdown func(conte
|
||||
// instruments are valid (they just don't export anywhere).
|
||||
mp := sdkmetric.NewMeterProvider(sdkmetric.WithResource(res))
|
||||
otel.SetMeterProvider(mp)
|
||||
InitMetrics(mp.Meter(cfg.ServiceName))
|
||||
InitMetrics(mp.Meter(cfg.ServiceName), tracker)
|
||||
return func(ctx context.Context) error { return mp.Shutdown(ctx) }, nil, nil
|
||||
}
|
||||
|
||||
@@ -74,7 +75,7 @@ func Setup(ctx context.Context, cfg config.TelemetryConfig) (shutdown func(conte
|
||||
sdkmetric.WithResource(res),
|
||||
)
|
||||
otel.SetMeterProvider(mp)
|
||||
InitMetrics(mp.Meter(cfg.ServiceName))
|
||||
InitMetrics(mp.Meter(cfg.ServiceName), tracker)
|
||||
|
||||
// Log exporter
|
||||
logExp, err := otlploggrpc.New(ctx, logOpts...)
|
||||
|
||||
Reference in New Issue
Block a user