refactor(ratelimit): remove in-memory per-window token tracking
Token counts per rate limit window are now derived in Grafana via increase(counter[5h/168h]) on the existing cumulative OTel counters. Removes TokensIn/Out from Window, RecordTokens, setResetTime, and the window_tokens observable gauges.
This commit is contained in:
@@ -149,7 +149,6 @@ func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, p
|
||||
telemetry.TokensInput.Add(ctx, inputTokens, tokenAttrs)
|
||||
telemetry.TokensOutput.Add(ctx, outputTokens, tokenAttrs)
|
||||
if tracker != nil {
|
||||
tracker.RecordTokens(inputTokens, outputTokens)
|
||||
tracker.UpdateFromHeaders(headers)
|
||||
}
|
||||
|
||||
@@ -305,7 +304,6 @@ func handleStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool
|
||||
telemetry.TokensInput.Add(ctx, inputTokens, tokenAttrs)
|
||||
telemetry.TokensOutput.Add(ctx, outputTokens, tokenAttrs)
|
||||
if tracker != nil {
|
||||
tracker.RecordTokens(inputTokens, outputTokens)
|
||||
tracker.UpdateFromHeaders(resp.Header)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user