chore: initial snapshot for gitea/github upload
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
Usage endpoints package.
|
||||
|
||||
Re-exports the router from endpoints module.
|
||||
"""
|
||||
|
||||
from litellm.proxy.management_endpoints.usage_endpoints.endpoints import ( # noqa: F401
|
||||
router,
|
||||
)
|
||||
@@ -0,0 +1,578 @@
|
||||
"""
|
||||
AI Usage Chat - uses LLM tool calling to answer questions about
|
||||
usage/spend data by querying the aggregated daily activity endpoints.
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import date
|
||||
from typing import Any, AsyncIterator, Callable, Dict, List, Literal, Optional, cast
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.constants import DEFAULT_COMPETITOR_DISCOVERY_MODEL
|
||||
from litellm.types.proxy.management_endpoints.common_daily_activity import (
|
||||
SpendAnalyticsPaginatedResponse,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
USAGE_AI_TEMPERATURE = 0.2
|
||||
|
||||
TABLE_DAILY_USER_SPEND = "litellm_dailyuserspend"
|
||||
TABLE_DAILY_TEAM_SPEND = "litellm_dailyteamspend"
|
||||
TABLE_DAILY_TAG_SPEND = "litellm_dailytagspend"
|
||||
|
||||
ENTITY_FIELD_USER = "user_id"
|
||||
ENTITY_FIELD_TEAM = "team_id"
|
||||
ENTITY_FIELD_TAG = "tag"
|
||||
|
||||
PAGINATED_PAGE_SIZE = 200
|
||||
MAX_CHAT_MESSAGES = 20
|
||||
TOP_N_MODELS = 15
|
||||
TOP_N_PROVIDERS = 10
|
||||
TOP_N_KEYS = 10
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SSEStatusEvent(TypedDict):
|
||||
type: Literal["status"]
|
||||
message: str
|
||||
|
||||
|
||||
class SSEToolCallEvent(TypedDict, total=False):
|
||||
type: Literal["tool_call"]
|
||||
tool_name: str
|
||||
tool_label: str
|
||||
arguments: Dict[str, str]
|
||||
status: Literal["running", "complete", "error"]
|
||||
error: str
|
||||
|
||||
|
||||
class SSEChunkEvent(TypedDict):
|
||||
type: Literal["chunk"]
|
||||
content: str
|
||||
|
||||
|
||||
class SSEDoneEvent(TypedDict):
|
||||
type: Literal["done"]
|
||||
|
||||
|
||||
class SSEErrorEvent(TypedDict):
|
||||
type: Literal["error"]
|
||||
message: str
|
||||
|
||||
|
||||
SSEEvent = (
|
||||
SSEStatusEvent | SSEToolCallEvent | SSEChunkEvent | SSEDoneEvent | SSEErrorEvent
|
||||
)
|
||||
|
||||
|
||||
class ToolHandler(TypedDict):
|
||||
fetch: Callable[..., Any]
|
||||
summarise: Callable[[Dict[str, Any]], str]
|
||||
label: str
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool definitions (OpenAI function-calling schema)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DATE_PARAMS = {
|
||||
"start_date": {"type": "string", "description": "Start date in YYYY-MM-DD format"},
|
||||
"end_date": {"type": "string", "description": "End date in YYYY-MM-DD format"},
|
||||
}
|
||||
|
||||
_TOOL_USAGE = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_usage_data",
|
||||
"description": (
|
||||
"Fetch aggregated global usage/spend data. Returns daily spend, "
|
||||
"token counts, request counts, and breakdowns by model, provider, "
|
||||
"and API key. Use for overall spend, top models, top providers."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
**_DATE_PARAMS,
|
||||
"user_id": {
|
||||
"type": "string",
|
||||
"description": "Optional user ID filter. Omit for global view.",
|
||||
},
|
||||
},
|
||||
"required": ["start_date", "end_date"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_TOOL_TEAM = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_team_usage_data",
|
||||
"description": (
|
||||
"Fetch usage/spend data broken down by team. Use for questions "
|
||||
"like 'which team spends the most' or 'show me team X usage'."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
**_DATE_PARAMS,
|
||||
"team_ids": {
|
||||
"type": "string",
|
||||
"description": "Optional comma-separated team IDs. Omit for all teams.",
|
||||
},
|
||||
},
|
||||
"required": ["start_date", "end_date"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_TOOL_TAG = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_tag_usage_data",
|
||||
"description": (
|
||||
"Fetch usage/spend data broken down by tag. Tags are labels "
|
||||
"attached to requests (features, environments, credentials)."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
**_DATE_PARAMS,
|
||||
"tags": {
|
||||
"type": "string",
|
||||
"description": "Optional comma-separated tag names. Omit for all tags.",
|
||||
},
|
||||
},
|
||||
"required": ["start_date", "end_date"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
TOOLS_BASE = [_TOOL_USAGE]
|
||||
TOOLS_ADMIN = [_TOOL_USAGE, _TOOL_TEAM, _TOOL_TAG]
|
||||
|
||||
|
||||
def get_tools_for_role(is_admin: bool) -> List[Dict[str, Any]]:
|
||||
"""Return the tool list appropriate for the user's role."""
|
||||
return TOOLS_ADMIN if is_admin else TOOLS_BASE
|
||||
|
||||
|
||||
_SYSTEM_PROMPT_BASE = (
|
||||
"You are an AI assistant embedded in the LiteLLM Usage dashboard. "
|
||||
"You help users understand their LLM API spend and usage data.\n\n"
|
||||
"ALWAYS call the appropriate tool(s) first to fetch data before answering. "
|
||||
"You may call multiple tools if the question spans different dimensions.\n\n"
|
||||
"Guidelines:\n"
|
||||
"- Be concise and specific. Use exact numbers from the data.\n"
|
||||
"- Format costs as dollar amounts (e.g. $12.34).\n"
|
||||
"- When comparing entities, show a ranked list.\n"
|
||||
"- If data is empty or no results found, say so clearly.\n"
|
||||
"- Do not hallucinate data — only use what the tools return.\n"
|
||||
"- Today's date will be provided below. Use it to interpret relative dates "
|
||||
"like 'this week', 'this month', 'last 7 days', etc."
|
||||
)
|
||||
|
||||
_TOOL_DESCRIPTIONS_ADMIN = (
|
||||
"You have access to these tools:\n"
|
||||
"- `get_usage_data`: Global/user-level usage (spend, models, providers, API keys)\n"
|
||||
"- `get_team_usage_data`: Team-level usage breakdown\n"
|
||||
"- `get_tag_usage_data`: Tag-level usage breakdown\n\n"
|
||||
)
|
||||
|
||||
_TOOL_DESCRIPTIONS_BASE = (
|
||||
"You have access to this tool:\n"
|
||||
"- `get_usage_data`: Your usage data (spend, models, providers, API keys)\n\n"
|
||||
)
|
||||
|
||||
|
||||
def _build_system_prompt(is_admin: bool) -> str:
|
||||
"""Build role-appropriate system prompt with today's date."""
|
||||
tool_desc = _TOOL_DESCRIPTIONS_ADMIN if is_admin else _TOOL_DESCRIPTIONS_BASE
|
||||
return (
|
||||
f"{_SYSTEM_PROMPT_BASE}\n\n{tool_desc}"
|
||||
f"Today's date: {date.today().isoformat()}"
|
||||
)
|
||||
|
||||
|
||||
# keep a public reference for test assertions
|
||||
SYSTEM_PROMPT = _SYSTEM_PROMPT_BASE
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data fetchers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _parse_csv_ids(raw: Optional[str]) -> Optional[List[str]]:
|
||||
if not raw:
|
||||
return None
|
||||
return [t.strip() for t in raw.split(",") if t.strip()]
|
||||
|
||||
|
||||
async def _query_activity(
|
||||
table_name: str,
|
||||
entity_id_field: str,
|
||||
entity_id: Optional[Any],
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
*,
|
||||
use_aggregated: bool = False,
|
||||
) -> SpendAnalyticsPaginatedResponse:
|
||||
"""Shared helper that calls the daily activity query layer."""
|
||||
from litellm.proxy.management_endpoints.common_daily_activity import (
|
||||
get_daily_activity,
|
||||
get_daily_activity_aggregated,
|
||||
)
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if use_aggregated:
|
||||
return await get_daily_activity_aggregated(
|
||||
prisma_client=prisma_client,
|
||||
table_name=table_name,
|
||||
entity_id_field=entity_id_field,
|
||||
entity_id=entity_id,
|
||||
entity_metadata_field=None,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
model=None,
|
||||
api_key=None,
|
||||
)
|
||||
return await get_daily_activity(
|
||||
prisma_client=prisma_client,
|
||||
table_name=table_name,
|
||||
entity_id_field=entity_id_field,
|
||||
entity_id=entity_id,
|
||||
entity_metadata_field=None,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
model=None,
|
||||
api_key=None,
|
||||
page=1,
|
||||
page_size=PAGINATED_PAGE_SIZE,
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_usage_data(
|
||||
start_date: str, end_date: str, user_id: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
resp = await _query_activity(
|
||||
TABLE_DAILY_USER_SPEND,
|
||||
ENTITY_FIELD_USER,
|
||||
user_id,
|
||||
start_date,
|
||||
end_date,
|
||||
use_aggregated=True,
|
||||
)
|
||||
return resp.model_dump(mode="json")
|
||||
|
||||
|
||||
async def _fetch_team_usage_data(
|
||||
start_date: str, end_date: str, team_ids: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
resp = await _query_activity(
|
||||
TABLE_DAILY_TEAM_SPEND,
|
||||
ENTITY_FIELD_TEAM,
|
||||
_parse_csv_ids(team_ids),
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
return resp.model_dump(mode="json")
|
||||
|
||||
|
||||
async def _fetch_tag_usage_data(
|
||||
start_date: str, end_date: str, tags: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
resp = await _query_activity(
|
||||
TABLE_DAILY_TAG_SPEND,
|
||||
ENTITY_FIELD_TAG,
|
||||
_parse_csv_ids(tags),
|
||||
start_date,
|
||||
end_date,
|
||||
)
|
||||
return resp.model_dump(mode="json")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Summarisers — convert raw JSON to concise text the LLM can reason over
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _accumulate_breakdown(
|
||||
results: List[Dict[str, Any]], dimension: str, fields: List[str]
|
||||
) -> Dict[str, Dict[str, float]]:
|
||||
"""Aggregate a single breakdown dimension across days."""
|
||||
totals: Dict[str, Dict[str, float]] = {}
|
||||
for day in results:
|
||||
for key, entry in day.get("breakdown", {}).get(dimension, {}).items():
|
||||
if key not in totals:
|
||||
totals[key] = {f: 0.0 for f in fields}
|
||||
m = entry.get("metrics", {})
|
||||
for f in fields:
|
||||
totals[key][f] += m.get(f, 0)
|
||||
return totals
|
||||
|
||||
|
||||
def _ranked_lines(
|
||||
totals: Dict[str, Dict[str, float]],
|
||||
fmt: Callable[[str, Dict[str, float]], str],
|
||||
limit: int,
|
||||
) -> List[str]:
|
||||
"""Sort by spend descending, format each entry, and truncate."""
|
||||
return [
|
||||
fmt(name, vals)
|
||||
for name, vals in sorted(totals.items(), key=lambda x: -x[1].get("spend", 0))[
|
||||
:limit
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def _summarise_usage_data(data: Dict[str, Any]) -> str:
|
||||
meta = data.get("metadata", {})
|
||||
results = data.get("results", [])
|
||||
|
||||
header = (
|
||||
f"Total Spend: ${meta.get('total_spend', 0):.4f}\n"
|
||||
f"Total Requests: {meta.get('total_api_requests', 0)}\n"
|
||||
f"Successful: {meta.get('total_successful_requests', 0)} | "
|
||||
f"Failed: {meta.get('total_failed_requests', 0)}\n"
|
||||
f"Total Tokens: {meta.get('total_tokens', 0)}"
|
||||
)
|
||||
|
||||
models = _accumulate_breakdown(
|
||||
results, "models", ["spend", "api_requests", "total_tokens"]
|
||||
)
|
||||
providers = _accumulate_breakdown(results, "providers", ["spend", "api_requests"])
|
||||
|
||||
model_lines = _ranked_lines(
|
||||
models,
|
||||
lambda n, d: f" - {n}: ${d['spend']:.4f} ({int(d['api_requests'])} reqs, {int(d['total_tokens'])} tokens)",
|
||||
TOP_N_MODELS,
|
||||
)
|
||||
provider_lines = _ranked_lines(
|
||||
providers,
|
||||
lambda n, d: f" - {n}: ${d['spend']:.4f} ({int(d['api_requests'])} reqs)",
|
||||
TOP_N_PROVIDERS,
|
||||
)
|
||||
|
||||
sections = [header, ""]
|
||||
sections += ["Top Models by Spend:"] + (model_lines or [" (no data)"]) + [""]
|
||||
sections += ["Top Providers by Spend:"] + (provider_lines or [" (no data)"])
|
||||
return "\n".join(sections)
|
||||
|
||||
|
||||
def _summarise_entity_data(data: Dict[str, Any], entity_label: str) -> str:
|
||||
"""Summarise team/tag entity usage data."""
|
||||
results = data.get("results", [])
|
||||
if not results:
|
||||
return f"No {entity_label} usage data found for the given date range."
|
||||
|
||||
totals: Dict[str, Dict[str, Any]] = {}
|
||||
for day in results:
|
||||
for eid, entry in day.get("breakdown", {}).get("entities", {}).items():
|
||||
if eid not in totals:
|
||||
alias = entry.get("metadata", {}).get("alias", eid)
|
||||
totals[eid] = {"alias": alias, "spend": 0.0, "requests": 0, "tokens": 0}
|
||||
m = entry.get("metrics", {})
|
||||
totals[eid]["spend"] += m.get("spend", 0)
|
||||
totals[eid]["requests"] += m.get("api_requests", 0)
|
||||
totals[eid]["tokens"] += m.get("total_tokens", 0)
|
||||
|
||||
lines = [f"{entity_label} Usage ({len(totals)} {entity_label.lower()}s):", ""]
|
||||
for eid, d in sorted(totals.items(), key=lambda x: -x[1]["spend"]):
|
||||
label = d["alias"] if d["alias"] != eid else eid
|
||||
lines.append(
|
||||
f"- {label} (ID: {eid}): ${d['spend']:.4f} | "
|
||||
f"{int(d['requests'])} reqs | {int(d['tokens'])} tokens"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool dispatch registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TOOL_HANDLERS: Dict[str, ToolHandler] = {
|
||||
"get_usage_data": ToolHandler(
|
||||
fetch=_fetch_usage_data,
|
||||
summarise=_summarise_usage_data,
|
||||
label="global usage data",
|
||||
),
|
||||
"get_team_usage_data": ToolHandler(
|
||||
fetch=_fetch_team_usage_data,
|
||||
summarise=lambda data: _summarise_entity_data(data, "Team"),
|
||||
label="team usage data",
|
||||
),
|
||||
"get_tag_usage_data": ToolHandler(
|
||||
fetch=_fetch_tag_usage_data,
|
||||
summarise=lambda data: _summarise_entity_data(data, "Tag"),
|
||||
label="tag usage data",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SSE streaming
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _sse(event: SSEEvent) -> str:
|
||||
return f"data: {json.dumps(event)}\n\n"
|
||||
|
||||
|
||||
def _resolve_fetch_kwargs(
|
||||
fn_name: str,
|
||||
fn_args: Dict[str, str],
|
||||
user_id: Optional[str],
|
||||
is_admin: bool,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build keyword arguments for a tool's fetch function."""
|
||||
start_date = fn_args.get("start_date", "")
|
||||
end_date = fn_args.get("end_date", "")
|
||||
if not start_date or not end_date:
|
||||
raise ValueError("Missing required start_date or end_date from tool arguments")
|
||||
kwargs: Dict[str, Any] = {"start_date": start_date, "end_date": end_date}
|
||||
if fn_name == "get_usage_data":
|
||||
if not is_admin:
|
||||
kwargs["user_id"] = user_id
|
||||
elif fn_args.get("user_id"):
|
||||
kwargs["user_id"] = fn_args["user_id"]
|
||||
elif fn_name == "get_team_usage_data" and fn_args.get("team_ids"):
|
||||
kwargs["team_ids"] = fn_args["team_ids"]
|
||||
elif fn_name == "get_tag_usage_data" and fn_args.get("tags"):
|
||||
kwargs["tags"] = fn_args["tags"]
|
||||
return kwargs
|
||||
|
||||
|
||||
async def _execute_tool_call(
|
||||
handler: ToolHandler,
|
||||
fn_name: str,
|
||||
fn_args: Dict[str, str],
|
||||
user_id: Optional[str],
|
||||
is_admin: bool,
|
||||
) -> str:
|
||||
"""Run a single tool and return the summarised result text."""
|
||||
kwargs = _resolve_fetch_kwargs(fn_name, fn_args, user_id, is_admin)
|
||||
raw_data = await handler["fetch"](**kwargs)
|
||||
return handler["summarise"](raw_data)
|
||||
|
||||
|
||||
async def _process_tool_call(
|
||||
tc: Any,
|
||||
chat_messages: List[Dict[str, Any]],
|
||||
user_id: Optional[str],
|
||||
is_admin: bool,
|
||||
) -> AsyncIterator[str]:
|
||||
"""Execute a single tool call, yielding SSE events for status."""
|
||||
fn_name = tc.function.name
|
||||
fn_args = json.loads(tc.function.arguments)
|
||||
|
||||
allowed_names = {t["function"]["name"] for t in get_tools_for_role(is_admin)}
|
||||
handler = TOOL_HANDLERS.get(fn_name)
|
||||
|
||||
if fn_name not in allowed_names or not handler:
|
||||
chat_messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tc.id,
|
||||
"content": f"Tool not available: {fn_name}",
|
||||
}
|
||||
)
|
||||
return
|
||||
|
||||
tool_event_base = {
|
||||
"type": "tool_call",
|
||||
"tool_name": fn_name,
|
||||
"tool_label": handler["label"],
|
||||
"arguments": fn_args,
|
||||
}
|
||||
yield _sse(cast(SSEToolCallEvent, {**tool_event_base, "status": "running"}))
|
||||
|
||||
try:
|
||||
tool_result = await _execute_tool_call(
|
||||
handler, fn_name, fn_args, user_id, is_admin
|
||||
)
|
||||
yield _sse(cast(SSEToolCallEvent, {**tool_event_base, "status": "complete"}))
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error("Tool %s failed: %s", fn_name, e)
|
||||
tool_result = f"Error fetching {handler['label']}. Please try again."
|
||||
yield _sse(cast(SSEToolCallEvent, {**tool_event_base, "status": "error"}))
|
||||
|
||||
chat_messages.append(
|
||||
{"role": "tool", "tool_call_id": tc.id, "content": tool_result}
|
||||
)
|
||||
|
||||
|
||||
async def _stream_final_response(
|
||||
model: str, chat_messages: List[Dict[str, Any]]
|
||||
) -> AsyncIterator[str]:
|
||||
"""Stream the final LLM response after tool results are appended."""
|
||||
yield _sse({"type": "status", "message": "Analyzing results..."})
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
messages=chat_messages,
|
||||
stream=True,
|
||||
temperature=USAGE_AI_TEMPERATURE,
|
||||
)
|
||||
async for chunk in response:
|
||||
delta = chunk.choices[0].delta.content
|
||||
if delta:
|
||||
yield _sse({"type": "chunk", "content": delta})
|
||||
|
||||
|
||||
async def stream_usage_ai_chat(
|
||||
messages: List[Dict[str, str]],
|
||||
model: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
is_admin: bool = False,
|
||||
) -> AsyncIterator[str]:
|
||||
"""Stream SSE events: status → tool_call → chunk → done."""
|
||||
resolved_model = (model or "").strip() or DEFAULT_COMPETITOR_DISCOVERY_MODEL
|
||||
truncated = (
|
||||
messages[-MAX_CHAT_MESSAGES:] if len(messages) > MAX_CHAT_MESSAGES else messages
|
||||
)
|
||||
chat_messages: List[Dict[str, Any]] = [
|
||||
{"role": "system", "content": _build_system_prompt(is_admin)},
|
||||
*truncated,
|
||||
]
|
||||
|
||||
try:
|
||||
yield _sse({"type": "status", "message": "Thinking..."})
|
||||
tools = get_tools_for_role(is_admin)
|
||||
response = await litellm.acompletion(
|
||||
model=resolved_model,
|
||||
messages=chat_messages,
|
||||
tools=tools,
|
||||
temperature=USAGE_AI_TEMPERATURE,
|
||||
)
|
||||
choice = response.choices[0] # type: ignore
|
||||
|
||||
if not choice.message.tool_calls:
|
||||
if choice.message.content:
|
||||
yield _sse({"type": "chunk", "content": choice.message.content})
|
||||
yield _sse({"type": "done"})
|
||||
return
|
||||
|
||||
chat_messages.append(choice.message.model_dump())
|
||||
for tc in choice.message.tool_calls:
|
||||
async for event in _process_tool_call(tc, chat_messages, user_id, is_admin):
|
||||
yield event
|
||||
async for event in _stream_final_response(resolved_model, chat_messages):
|
||||
yield event
|
||||
yield _sse({"type": "done"})
|
||||
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error("AI usage chat failed: %s", e)
|
||||
yield _sse(
|
||||
{
|
||||
"type": "error",
|
||||
"message": "An internal error occurred. Please try again.",
|
||||
}
|
||||
)
|
||||
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
USAGE AI CHAT ENDPOINTS
|
||||
|
||||
/usage/ai/chat - Stream AI chat responses about usage data
|
||||
"""
|
||||
|
||||
from typing import List, Literal, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: Literal["user", "assistant"]
|
||||
content: str
|
||||
|
||||
|
||||
class UsageAIChatRequest(BaseModel):
|
||||
messages: List[ChatMessage] = Field(
|
||||
..., description="Chat messages (user/assistant history)"
|
||||
)
|
||||
model: Optional[str] = Field(default=None, description="Model to use for AI chat")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/usage/ai/chat",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def usage_ai_chat(
|
||||
data: UsageAIChatRequest,
|
||||
request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
AI chat about usage data. Streams SSE events with the AI response.
|
||||
The AI agent has access to tools that query aggregated daily activity data.
|
||||
"""
|
||||
from litellm.proxy.management_endpoints.common_utils import (
|
||||
_user_has_admin_view,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.usage_endpoints.ai_usage_chat import (
|
||||
stream_usage_ai_chat,
|
||||
)
|
||||
|
||||
is_admin = _user_has_admin_view(user_api_key_dict)
|
||||
user_id = user_api_key_dict.user_id
|
||||
messages = [{"role": m.role, "content": m.content} for m in data.messages]
|
||||
|
||||
return StreamingResponse(
|
||||
stream_usage_ai_chat(
|
||||
messages=messages,
|
||||
model=data.model,
|
||||
user_id=user_id,
|
||||
is_admin=is_admin,
|
||||
),
|
||||
media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
||||
)
|
||||
Reference in New Issue
Block a user