lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/perplexity/cost_calculator.py

"""
Helper util for handling perplexity-specific cost calculation
- e.g.: citation tokens, search queries
"""

from typing import Tuple, Union

from litellm.types.utils import Usage
from litellm.utils import get_model_info


def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.

    Input:
        - model: str, the model name without provider prefix
        - usage: LiteLLM Usage block, containing perplexity-specific usage information

    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
    """
    ## USE PRE-CALCULATED COST FROM PERPLEXITY IF AVAILABLE
    ## Perplexity returns accurate cost in usage.cost.total_cost including request fees
    cost_info = getattr(usage, "cost", None)
    if cost_info is not None and isinstance(cost_info, dict):
        total_cost = cost_info.get("total_cost")
        if total_cost is not None:
            # Return total cost as completion_cost (prompt_cost=0) since Perplexity
            # doesn't break down by input/output in their cost object
            return (0.0, float(total_cost))

    ## FALLBACK: Calculate cost manually if Perplexity doesn't provide it
    ## GET MODEL INFO
    model_info = get_model_info(model=model, custom_llm_provider="perplexity")

    def _safe_float_cast(
        value: Union[str, int, float, None, object], default: float = 0.0
    ) -> float:
        """Safely cast a value to float with proper type handling for mypy."""
        if value is None:
            return default
        try:
            return float(value)  # type: ignore
        except (ValueError, TypeError):
            return default

    ## CALCULATE INPUT COST
    input_cost_per_token = _safe_float_cast(model_info.get("input_cost_per_token"))
    prompt_cost: float = (usage.prompt_tokens or 0) * input_cost_per_token

    ## ADD CITATION TOKENS COST (if present)
    citation_tokens = getattr(usage, "citation_tokens", 0) or 0
    citation_cost_value = model_info.get("citation_cost_per_token")
    if citation_tokens > 0 and citation_cost_value is not None:
        citation_cost_per_token = _safe_float_cast(citation_cost_value)
        prompt_cost += citation_tokens * citation_cost_per_token

    ## CALCULATE OUTPUT COST
    output_cost_per_token = _safe_float_cast(model_info.get("output_cost_per_token"))
    completion_cost: float = (usage.completion_tokens or 0) * output_cost_per_token

    ## ADD REASONING TOKENS COST (if present)
    reasoning_tokens = getattr(usage, "reasoning_tokens", 0) or 0
    # Also check completion_tokens_details if reasoning_tokens is not directly available
    if (
        reasoning_tokens == 0
        and hasattr(usage, "completion_tokens_details")
        and usage.completion_tokens_details
    ):
        reasoning_tokens = (
            getattr(usage.completion_tokens_details, "reasoning_tokens", 0) or 0
        )

    reasoning_cost_value = model_info.get("output_cost_per_reasoning_token")
    if reasoning_tokens > 0 and reasoning_cost_value is not None:
        reasoning_cost_per_token = _safe_float_cast(reasoning_cost_value)
        completion_cost += reasoning_tokens * reasoning_cost_per_token

    ## ADD SEARCH QUERIES COST (if present)
    num_search_queries = 0
    if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
        num_search_queries = (
            getattr(usage.prompt_tokens_details, "web_search_requests", 0) or 0
        )

    # Check both possible keys for search cost (legacy and current)
    search_cost_value = model_info.get(
        "search_queries_cost_per_query"
    ) or model_info.get("search_context_cost_per_query")
    if num_search_queries > 0 and search_cost_value is not None:
        # Handle both dict and float formats
        if isinstance(search_cost_value, dict):
            # Use the "low" size as default - tests expect 0.005 / 1000
            search_cost_per_query = (
                _safe_float_cast(search_cost_value.get("search_context_size_low", 0))
                / 1000
            )
        else:
            search_cost_per_query = _safe_float_cast(search_cost_value)
        search_cost = num_search_queries * search_cost_per_query
        # Add search cost to completion cost (similar to how other providers handle it)
        completion_cost += search_cost

    return prompt_cost, completion_cost