feat(ratelimit): track per-window token usage and utilization
Poll /api/oauth/usage every 5 min and extract utilization from /v1/messages response headers for real-time updates. Track proxy tokens in/out per rate limit window (5h/7d), resetting on window change. Expose as OTel observable gauges for Grafana dashboards. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,10 +14,11 @@ import (
|
||||
|
||||
"github.com/fujin/anthropic-proxy/internal/auth"
|
||||
"github.com/fujin/anthropic-proxy/internal/logging"
|
||||
"github.com/fujin/anthropic-proxy/internal/ratelimit"
|
||||
"github.com/fujin/anthropic-proxy/internal/telemetry"
|
||||
)
|
||||
|
||||
func HandleMessages(pool *auth.Pool, profile *SniffedProfile, getSanitizer func() *Sanitizer) gin.HandlerFunc {
|
||||
func HandleMessages(pool *auth.Pool, profile *SniffedProfile, getSanitizer func() *Sanitizer, tracker *ratelimit.Tracker) gin.HandlerFunc {
|
||||
upstream := NewUpstreamClient(profile)
|
||||
|
||||
return func(c *gin.Context) {
|
||||
@@ -49,14 +50,14 @@ func HandleMessages(pool *auth.Pool, profile *SniffedProfile, getSanitizer func(
|
||||
isStream := gjson.GetBytes(body, "stream").Bool()
|
||||
|
||||
if isStream {
|
||||
handleStream(c, upstream, san, pool, cred, body, originalBody)
|
||||
handleStream(c, upstream, san, pool, cred, body, originalBody, tracker)
|
||||
} else {
|
||||
handleNonStream(c, upstream, san, pool, cred, body, originalBody)
|
||||
handleNonStream(c, upstream, san, pool, cred, body, originalBody, tracker)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool *auth.Pool, cred *auth.Credential, body []byte, originalBody []byte) {
|
||||
func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool *auth.Pool, cred *auth.Credential, body []byte, originalBody []byte, tracker *ratelimit.Tracker) {
|
||||
startTime := time.Now()
|
||||
model := gjson.GetBytes(body, "model").String()
|
||||
ctx := c.Request.Context()
|
||||
@@ -147,6 +148,10 @@ func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, p
|
||||
)
|
||||
telemetry.TokensInput.Add(ctx, inputTokens, tokenAttrs)
|
||||
telemetry.TokensOutput.Add(ctx, outputTokens, tokenAttrs)
|
||||
if tracker != nil {
|
||||
tracker.RecordTokens(inputTokens, outputTokens)
|
||||
tracker.UpdateFromHeaders(headers)
|
||||
}
|
||||
|
||||
log.Info().
|
||||
Int("status", statusCode).
|
||||
@@ -166,7 +171,7 @@ func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, p
|
||||
c.Data(statusCode, headers.Get("Content-Type"), respBody)
|
||||
}
|
||||
|
||||
func handleStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool *auth.Pool, cred *auth.Credential, body []byte, originalBody []byte) {
|
||||
func handleStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool *auth.Pool, cred *auth.Credential, body []byte, originalBody []byte, tracker *ratelimit.Tracker) {
|
||||
startTime := time.Now()
|
||||
model := gjson.GetBytes(body, "model").String()
|
||||
ctx := c.Request.Context()
|
||||
@@ -299,6 +304,10 @@ func handleStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool
|
||||
)
|
||||
telemetry.TokensInput.Add(ctx, inputTokens, tokenAttrs)
|
||||
telemetry.TokensOutput.Add(ctx, outputTokens, tokenAttrs)
|
||||
if tracker != nil {
|
||||
tracker.RecordTokens(inputTokens, outputTokens)
|
||||
tracker.UpdateFromHeaders(resp.Header)
|
||||
}
|
||||
}
|
||||
|
||||
log.Info().
|
||||
|
||||
Reference in New Issue
Block a user