Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/anthropic/count_tokens/handler.py

129 lines
4.2 KiB
Python
Raw Normal View History

"""
Anthropic CountTokens API handler.
Uses httpx for HTTP requests instead of the Anthropic SDK.
"""
from typing import Any, Dict, List, Optional, Union
import httpx
import litellm
from litellm._logging import verbose_logger
from litellm.llms.anthropic.common_utils import AnthropicError
from litellm.llms.anthropic.count_tokens.transformation import (
AnthropicCountTokensConfig,
)
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
class AnthropicCountTokensHandler(AnthropicCountTokensConfig):
"""
Handler for Anthropic CountTokens API requests.
Uses httpx for HTTP requests, following the same pattern as BedrockCountTokensHandler.
"""
async def handle_count_tokens_request(
self,
model: str,
messages: List[Dict[str, Any]],
api_key: str,
api_base: Optional[str] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
tools: Optional[List[Dict[str, Any]]] = None,
system: Optional[Any] = None,
) -> Dict[str, Any]:
"""
Handle a CountTokens request using httpx.
Args:
model: The model identifier (e.g., "claude-3-5-sonnet-20241022")
messages: The messages to count tokens for
api_key: The Anthropic API key
api_base: Optional custom API base URL
timeout: Optional timeout for the request (defaults to litellm.request_timeout)
Returns:
Dictionary containing token count response
Raises:
AnthropicError: If the API request fails
"""
try:
# Validate the request
self.validate_request(model, messages)
verbose_logger.debug(
f"Processing Anthropic CountTokens request for model: {model}"
)
# Transform request to Anthropic format
request_body = self.transform_request_to_count_tokens(
model=model,
messages=messages,
tools=tools,
system=system,
)
verbose_logger.debug(f"Transformed request: {request_body}")
# Get endpoint URL
endpoint_url = api_base or self.get_anthropic_count_tokens_endpoint()
verbose_logger.debug(f"Making request to: {endpoint_url}")
# Get required headers
headers = self.get_required_headers(api_key)
# Use LiteLLM's async httpx client
async_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.ANTHROPIC
)
# Use provided timeout or fall back to litellm.request_timeout
request_timeout = (
timeout if timeout is not None else litellm.request_timeout
)
response = await async_client.post(
endpoint_url,
headers=headers,
json=request_body,
timeout=request_timeout,
)
verbose_logger.debug(f"Response status: {response.status_code}")
if response.status_code != 200:
error_text = response.text
verbose_logger.error(f"Anthropic API error: {error_text}")
raise AnthropicError(
status_code=response.status_code,
message=error_text,
)
anthropic_response = response.json()
verbose_logger.debug(f"Anthropic response: {anthropic_response}")
# Return Anthropic response directly - no transformation needed
return anthropic_response
except AnthropicError:
# Re-raise Anthropic exceptions as-is
raise
except httpx.HTTPStatusError as e:
# HTTP errors - preserve the actual status code
verbose_logger.error(f"HTTP error in CountTokens handler: {str(e)}")
raise AnthropicError(
status_code=e.response.status_code,
message=e.response.text,
)
except Exception as e:
verbose_logger.error(f"Error in CountTokens handler: {str(e)}")
raise AnthropicError(
status_code=500,
message=f"CountTokens processing error: {str(e)}",
)