feat(ratelimit): track per-window token usage and utilization

Poll /api/oauth/usage every 5 min and extract utilization from
/v1/messages response headers for real-time updates. Track proxy
tokens in/out per rate limit window (5h/7d), resetting on window
change. Expose as OTel observable gauges for Grafana dashboards.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alexander
2026-04-14 12:51:31 +02:00
parent 76aeeb6be1
commit fac9578975
7 changed files with 364 additions and 13 deletions
+14 -5
View File
@@ -14,10 +14,11 @@ import (
"github.com/fujin/anthropic-proxy/internal/auth"
"github.com/fujin/anthropic-proxy/internal/logging"
"github.com/fujin/anthropic-proxy/internal/ratelimit"
"github.com/fujin/anthropic-proxy/internal/telemetry"
)
func HandleMessages(pool *auth.Pool, profile *SniffedProfile, getSanitizer func() *Sanitizer) gin.HandlerFunc {
func HandleMessages(pool *auth.Pool, profile *SniffedProfile, getSanitizer func() *Sanitizer, tracker *ratelimit.Tracker) gin.HandlerFunc {
upstream := NewUpstreamClient(profile)
return func(c *gin.Context) {
@@ -49,14 +50,14 @@ func HandleMessages(pool *auth.Pool, profile *SniffedProfile, getSanitizer func(
isStream := gjson.GetBytes(body, "stream").Bool()
if isStream {
handleStream(c, upstream, san, pool, cred, body, originalBody)
handleStream(c, upstream, san, pool, cred, body, originalBody, tracker)
} else {
handleNonStream(c, upstream, san, pool, cred, body, originalBody)
handleNonStream(c, upstream, san, pool, cred, body, originalBody, tracker)
}
}
}
func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool *auth.Pool, cred *auth.Credential, body []byte, originalBody []byte) {
func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool *auth.Pool, cred *auth.Credential, body []byte, originalBody []byte, tracker *ratelimit.Tracker) {
startTime := time.Now()
model := gjson.GetBytes(body, "model").String()
ctx := c.Request.Context()
@@ -147,6 +148,10 @@ func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, p
)
telemetry.TokensInput.Add(ctx, inputTokens, tokenAttrs)
telemetry.TokensOutput.Add(ctx, outputTokens, tokenAttrs)
if tracker != nil {
tracker.RecordTokens(inputTokens, outputTokens)
tracker.UpdateFromHeaders(headers)
}
log.Info().
Int("status", statusCode).
@@ -166,7 +171,7 @@ func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, p
c.Data(statusCode, headers.Get("Content-Type"), respBody)
}
func handleStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool *auth.Pool, cred *auth.Credential, body []byte, originalBody []byte) {
func handleStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool *auth.Pool, cred *auth.Credential, body []byte, originalBody []byte, tracker *ratelimit.Tracker) {
startTime := time.Now()
model := gjson.GetBytes(body, "model").String()
ctx := c.Request.Context()
@@ -299,6 +304,10 @@ func handleStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool
)
telemetry.TokensInput.Add(ctx, inputTokens, tokenAttrs)
telemetry.TokensOutput.Add(ctx, outputTokens, tokenAttrs)
if tracker != nil {
tracker.RecordTokens(inputTokens, outputTokens)
tracker.UpdateFromHeaders(resp.Header)
}
}
log.Info().