chore: initial snapshot for gitea/github upload

This commit is contained in:
Your Name
2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions

View File

@@ -0,0 +1,84 @@
"""
Tracks the number of HTTP requests currently in-flight on this uvicorn worker.
Used by /health/backlog to expose per-pod queue depth, and emitted as the
Prometheus gauge `litellm_in_flight_requests`.
"""
import os
from typing import Any, Optional
from starlette.types import ASGIApp, Receive, Scope, Send
class InFlightRequestsMiddleware:
"""
ASGI middleware that increments a counter when a request arrives and
decrements it when the response is sent (or an error occurs).
The counter is class-level and therefore scoped to a single uvicorn worker
process — exactly the per-pod granularity we want.
Also updates the `litellm_in_flight_requests` Prometheus gauge if
prometheus_client is installed. The gauge is lazily initialised on the
first request so that PROMETHEUS_MULTIPROC_DIR is already set by the time
we register the metric. Initialisation is attempted only once — if
prometheus_client is absent the class remembers and never retries.
"""
_in_flight: int = 0
_gauge: Optional[Any] = None
_gauge_init_attempted: bool = False
def __init__(self, app: ASGIApp) -> None:
self.app = app
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
if scope["type"] != "http":
await self.app(scope, receive, send)
return
InFlightRequestsMiddleware._in_flight += 1
gauge = InFlightRequestsMiddleware._get_gauge()
if gauge is not None:
gauge.inc() # type: ignore
try:
await self.app(scope, receive, send)
finally:
InFlightRequestsMiddleware._in_flight -= 1
if gauge is not None:
gauge.dec() # type: ignore
@staticmethod
def get_count() -> int:
"""Return the number of HTTP requests currently in-flight."""
return InFlightRequestsMiddleware._in_flight
@staticmethod
def _get_gauge() -> Optional[Any]:
if InFlightRequestsMiddleware._gauge_init_attempted:
return InFlightRequestsMiddleware._gauge
InFlightRequestsMiddleware._gauge_init_attempted = True
try:
from prometheus_client import Gauge
if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
# livesum aggregates across all worker processes in the scrape response
InFlightRequestsMiddleware._gauge = Gauge(
"litellm_in_flight_requests",
"Number of HTTP requests currently in-flight on this uvicorn worker",
multiprocess_mode="livesum",
)
else:
InFlightRequestsMiddleware._gauge = Gauge(
"litellm_in_flight_requests",
"Number of HTTP requests currently in-flight on this uvicorn worker",
)
except Exception:
InFlightRequestsMiddleware._gauge = None
return InFlightRequestsMiddleware._gauge
def get_in_flight_requests() -> int:
"""Module-level convenience wrapper used by the /health/backlog endpoint."""
return InFlightRequestsMiddleware.get_count()

View File

@@ -0,0 +1,73 @@
"""
Prometheus Auth Middleware - Pure ASGI implementation
"""
import json
from fastapi import Request
from starlette.types import ASGIApp, Receive, Scope, Send
import litellm
from litellm.proxy._types import SpecialHeaders
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
# Cache the header name at module level to avoid repeated enum attribute access
_AUTHORIZATION_HEADER = SpecialHeaders.openai_authorization.value # "Authorization"
class PrometheusAuthMiddleware:
"""
Middleware to authenticate requests to the metrics endpoint.
By default, auth is not run on the metrics endpoint.
Enabled by setting the following in proxy_config.yaml:
```yaml
litellm_settings:
require_auth_for_metrics_endpoint: true
```
"""
def __init__(self, app: ASGIApp) -> None:
self.app = app
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
# Fast path: only inspect HTTP requests; pass through websocket/lifespan immediately
if scope["type"] != "http" or "/metrics" not in scope.get("path", ""):
await self.app(scope, receive, send)
return
# Only run auth if configured to do so
if litellm.require_auth_for_metrics_endpoint is True:
# Construct Request only when auth is actually needed
request = Request(scope, receive)
api_key = request.headers.get(_AUTHORIZATION_HEADER) or ""
try:
await user_api_key_auth(request=request, api_key=api_key)
except Exception as e:
# Send 401 response directly via ASGI protocol
error_message = getattr(e, "message", str(e))
body = json.dumps(
f"Unauthorized access to metrics endpoint: {error_message}"
).encode("utf-8")
await send(
{
"type": "http.response.start",
"status": 401,
"headers": [
[b"content-type", b"application/json"],
[b"content-length", str(len(body)).encode("ascii")],
],
}
)
await send(
{
"type": "http.response.body",
"body": body,
}
)
return
# Pass through to the inner application
await self.app(scope, receive, send)