feat(ratelimit): track per-window token usage and utilization

Poll /api/oauth/usage every 5 min and extract utilization from
/v1/messages response headers for real-time updates. Track proxy
tokens in/out per rate limit window (5h/7d), resetting on window
change. Expose as OTel observable gauges for Grafana dashboards.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alexander
2026-04-14 12:51:31 +02:00
parent 76aeeb6be1
commit fac9578975
7 changed files with 364 additions and 13 deletions
+3 -2
View File
@@ -14,6 +14,7 @@ import (
"github.com/fujin/anthropic-proxy/internal/config"
"github.com/fujin/anthropic-proxy/internal/logging"
"github.com/fujin/anthropic-proxy/internal/proxy"
"github.com/fujin/anthropic-proxy/internal/ratelimit"
"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin"
)
@@ -25,7 +26,7 @@ type Server struct {
apiKeys atomic.Pointer[map[string]struct{}]
}
func New(cfg *config.Config, pool *auth.Pool, profile *proxy.SniffedProfile) *Server {
func New(cfg *config.Config, pool *auth.Pool, profile *proxy.SniffedProfile, tracker *ratelimit.Tracker) *Server {
s := &Server{configPath: "config.yaml"}
san := proxy.NewSanitizer(cfg.Sanitize)
@@ -46,7 +47,7 @@ func New(cfg *config.Config, pool *auth.Pool, profile *proxy.SniffedProfile) *Se
handler := proxy.HandleMessages(pool, profile, func() *proxy.Sanitizer {
return s.sanitizer.Load()
})
}, tracker)
engine.POST("/v1/messages", handler)
engine.POST("/messages", handler)