lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/anthropic/cost_calculation.py

"""
Helper util for handling anthropic-specific cost calculation
- e.g.: prompt caching
"""

from typing import TYPE_CHECKING, Optional, Tuple

from litellm.litellm_core_utils.llm_cost_calc.utils import (
    _get_token_base_cost,
    _parse_prompt_tokens_details,
    calculate_cache_writing_cost,
    generic_cost_per_token,
)

if TYPE_CHECKING:
    from litellm.types.utils import ModelInfo, Usage
import litellm


def _compute_cache_only_cost(model_info: "ModelInfo", usage: "Usage") -> float:
    """
    Return only the cache-related portion of the prompt cost (cache read + cache write).

    These costs must NOT be scaled by geo/speed multipliers because the old
    explicit ``fast/`` model entries carried unchanged cache rates while
    multiplying only the regular input/output token costs.
    """
    if usage.prompt_tokens_details is None:
        return 0.0

    prompt_tokens_details = _parse_prompt_tokens_details(usage)
    (
        _,
        _,
        cache_creation_cost,
        cache_creation_cost_above_1hr,
        cache_read_cost,
    ) = _get_token_base_cost(model_info=model_info, usage=usage)

    cache_cost = float(prompt_tokens_details["cache_hit_tokens"]) * cache_read_cost

    if (
        prompt_tokens_details["cache_creation_tokens"]
        or prompt_tokens_details["cache_creation_token_details"] is not None
    ):
        cache_cost += calculate_cache_writing_cost(
            cache_creation_tokens=prompt_tokens_details["cache_creation_tokens"],
            cache_creation_token_details=prompt_tokens_details[
                "cache_creation_token_details"
            ],
            cache_creation_cost_above_1hr=cache_creation_cost_above_1hr,
            cache_creation_cost=cache_creation_cost,
        )

    return cache_cost


def cost_per_token(model: str, usage: "Usage") -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.

    Input:
        - model: str, the model name without provider prefix
        - usage: LiteLLM Usage block, containing anthropic caching information

    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
    """
    prompt_cost, completion_cost = generic_cost_per_token(
        model=model, usage=usage, custom_llm_provider="anthropic"
    )

    # Apply provider_specific_entry multipliers for geo/speed routing
    try:
        model_info = litellm.get_model_info(
            model=model, custom_llm_provider="anthropic"
        )
        provider_specific_entry: dict = model_info.get("provider_specific_entry") or {}

        multiplier = 1.0
        if (
            hasattr(usage, "inference_geo")
            and usage.inference_geo
            and usage.inference_geo.lower() not in ["global", "not_available"]
        ):
            multiplier *= provider_specific_entry.get(usage.inference_geo.lower(), 1.0)
        if hasattr(usage, "speed") and usage.speed == "fast":
            multiplier *= provider_specific_entry.get("fast", 1.0)

        if multiplier != 1.0:
            cache_cost = _compute_cache_only_cost(model_info=model_info, usage=usage)
            prompt_cost = (prompt_cost - cache_cost) * multiplier + cache_cost
            completion_cost *= multiplier
    except Exception:
        pass

    return prompt_cost, completion_cost


def get_cost_for_anthropic_web_search(
    model_info: Optional["ModelInfo"] = None,
    usage: Optional["Usage"] = None,
) -> float:
    """
    Get the cost of using a web search tool for Anthropic.
    """
    from litellm.types.utils import SearchContextCostPerQuery

    ## Check if web search requests are in the usage object
    if model_info is None:
        return 0.0

    if (
        usage is None
        or usage.server_tool_use is None
        or usage.server_tool_use.web_search_requests is None
    ):
        return 0.0

    ## Get the cost per web search request
    search_context_pricing: SearchContextCostPerQuery = (
        model_info.get("search_context_cost_per_query") or SearchContextCostPerQuery()
    )
    cost_per_web_search_request = search_context_pricing.get(
        "search_context_size_medium", 0.0
    )
    if cost_per_web_search_request is None or cost_per_web_search_request == 0.0:
        return 0.0

    ## Calculate the total cost
    total_cost = cost_per_web_search_request * usage.server_tool_use.web_search_requests
    return total_cost