84 lines
3.1 KiB
Python
84 lines
3.1 KiB
Python
"""
|
|
Helper util for handling XAI-specific cost calculation
|
|
- Uses the generic cost calculator which already handles tiered pricing correctly
|
|
- Handles XAI-specific reasoning token billing (billed as part of completion tokens)
|
|
"""
|
|
|
|
from typing import TYPE_CHECKING, Tuple
|
|
|
|
from litellm.types.utils import Usage
|
|
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
|
|
|
|
if TYPE_CHECKING:
|
|
from litellm.types.utils import ModelInfo
|
|
|
|
|
|
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
|
"""
|
|
Calculates the cost per token for a given XAI model, prompt tokens, and completion tokens.
|
|
Uses the generic cost calculator for all pricing logic, with XAI-specific reasoning token handling.
|
|
|
|
Input:
|
|
- model: str, the model name without provider prefix
|
|
- usage: LiteLLM Usage block, containing XAI-specific usage information
|
|
|
|
Returns:
|
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
|
"""
|
|
# XAI-specific completion cost calculation
|
|
# For XAI models, completion is billed as (visible completion tokens + reasoning tokens)
|
|
completion_tokens = int(getattr(usage, "completion_tokens", 0) or 0)
|
|
reasoning_tokens = 0
|
|
if hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details:
|
|
reasoning_tokens = int(
|
|
getattr(usage.completion_tokens_details, "reasoning_tokens", 0) or 0
|
|
)
|
|
|
|
total_completion_tokens = completion_tokens + reasoning_tokens
|
|
|
|
modified_usage = Usage(
|
|
prompt_tokens=usage.prompt_tokens,
|
|
completion_tokens=total_completion_tokens,
|
|
total_tokens=usage.total_tokens,
|
|
prompt_tokens_details=usage.prompt_tokens_details,
|
|
completion_tokens_details=None,
|
|
)
|
|
|
|
prompt_cost, completion_cost = generic_cost_per_token(
|
|
model=model, usage=modified_usage, custom_llm_provider="xai"
|
|
)
|
|
|
|
return prompt_cost, completion_cost
|
|
|
|
|
|
def cost_per_web_search_request(usage: "Usage", model_info: "ModelInfo") -> float:
|
|
"""
|
|
Calculate the cost of web search requests for X.AI models.
|
|
|
|
X.AI Live Search costs $25 per 1,000 sources used.
|
|
Each source costs $0.025.
|
|
|
|
The number of sources is stored in prompt_tokens_details.web_search_requests
|
|
by the transformation layer to be compatible with the existing detection system.
|
|
"""
|
|
# Cost per source used: $25 per 1,000 sources = $0.025 per source
|
|
cost_per_source = 25.0 / 1000.0 # $0.025
|
|
|
|
num_sources_used = 0
|
|
|
|
if (
|
|
hasattr(usage, "prompt_tokens_details")
|
|
and usage.prompt_tokens_details is not None
|
|
and hasattr(usage.prompt_tokens_details, "web_search_requests")
|
|
and usage.prompt_tokens_details.web_search_requests is not None
|
|
):
|
|
num_sources_used = int(usage.prompt_tokens_details.web_search_requests)
|
|
|
|
# Fallback: try to get from num_sources_used if set directly
|
|
elif hasattr(usage, "num_sources_used") and usage.num_sources_used is not None:
|
|
num_sources_used = int(usage.num_sources_used)
|
|
|
|
total_cost = cost_per_source * num_sources_used
|
|
|
|
return total_cost
|