refactor(ratelimit): remove in-memory per-window token tracking

Token counts per rate limit window are now derived in Grafana via
increase(counter[5h/168h]) on the existing cumulative OTel counters.
Removes TokensIn/Out from Window, RecordTokens, setResetTime, and
the window_tokens observable gauges.
This commit is contained in:
Alexander
2026-04-14 13:49:05 +02:00
parent 744abc1d24
commit eda66ff7d4
3 changed files with 7 additions and 62 deletions
-2
View File
@@ -149,7 +149,6 @@ func handleNonStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, p
telemetry.TokensInput.Add(ctx, inputTokens, tokenAttrs)
telemetry.TokensOutput.Add(ctx, outputTokens, tokenAttrs)
if tracker != nil {
tracker.RecordTokens(inputTokens, outputTokens)
tracker.UpdateFromHeaders(headers)
}
@@ -305,7 +304,6 @@ func handleStream(c *gin.Context, upstream *UpstreamClient, san *Sanitizer, pool
telemetry.TokensInput.Add(ctx, inputTokens, tokenAttrs)
telemetry.TokensOutput.Add(ctx, outputTokens, tokenAttrs)
if tracker != nil {
tracker.RecordTokens(inputTokens, outputTokens)
tracker.UpdateFromHeaders(resp.Header)
}
}