Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/custom_httpx/http_handler.py

1304 lines
47 KiB
Python
Raw Normal View History

import asyncio
import os
import ssl
import sys
import time
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Mapping,
Optional,
Tuple,
Union,
)
import certifi
import httpx
from aiohttp import ClientSession, TCPConnector
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
from httpx._types import RequestFiles
import litellm
from litellm._logging import verbose_logger
from litellm.constants import (
_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
AIOHTTP_CONNECTOR_LIMIT,
AIOHTTP_CONNECTOR_LIMIT_PER_HOST,
AIOHTTP_KEEPALIVE_TIMEOUT,
AIOHTTP_NEEDS_CLEANUP_CLOSED,
AIOHTTP_TTL_DNS_CACHE,
DEFAULT_SSL_CIPHERS,
)
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
from litellm.types.llms.custom_http import *
if TYPE_CHECKING:
from litellm import LlmProviders
from litellm.litellm_core_utils.litellm_logging import (
Logging as LiteLLMLoggingObject,
)
from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
else:
LlmProviders = Any
LiteLLMLoggingObject = Any
LiteLLMAiohttpTransport = Any
try:
from litellm._version import version
except Exception:
version = "0.0.0"
def get_default_headers() -> dict:
"""
Get default headers for HTTP requests.
- Default: `User-Agent: litellm/{version}`
- Override: set `LITELLM_USER_AGENT` to fully override the header value.
"""
user_agent = os.environ.get("LITELLM_USER_AGENT")
if user_agent is not None:
return {"User-Agent": user_agent}
return {"User-Agent": f"litellm/{version}"}
# Initialize headers (User-Agent)
headers = get_default_headers()
# https://www.python-httpx.org/advanced/timeouts
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
def _prepare_request_data_and_content(
data: Optional[Union[dict, str, bytes]] = None,
content: Any = None,
) -> Tuple[Optional[Union[dict, Mapping]], Any]:
"""
Helper function to route data/content parameters correctly for httpx requests
This prevents httpx DeprecationWarnings that cause memory leaks.
Background:
- httpx shows a DeprecationWarning when you pass bytes/str to `data=`
- It wants you to use `content=` instead for bytes/str
- The warning itself leaks memory when triggered repeatedly
Solution:
- Move bytes/str from `data=` to `content=` before calling build_request
- Keep dicts in `data=` (that's still the correct parameter for dicts)
Args:
data: Request data (can be dict, str, or bytes)
content: Request content (raw bytes/str)
Returns:
Tuple of (request_data, request_content) properly routed for httpx
"""
request_data = None
request_content = content
if data is not None:
if isinstance(data, (bytes, str)):
# Bytes/strings belong in content= (only if not already provided)
if content is None:
request_content = data
else:
# dict/Mapping stays in data= parameter
request_data = data
return request_data, request_content
# Cache for SSL contexts to avoid creating duplicate contexts with the same configuration
# Key: tuple of (cafile, ssl_security_level, ssl_ecdh_curve)
# Value: ssl.SSLContext
_ssl_context_cache: Dict[
Tuple[Optional[str], Optional[str], Optional[str]], ssl.SSLContext
] = {}
def _create_ssl_context(
cafile: Optional[str],
ssl_security_level: Optional[str],
ssl_ecdh_curve: Optional[str],
) -> ssl.SSLContext:
"""
Create an SSL context with the given configuration.
This is separated from get_ssl_configuration to enable caching.
"""
custom_ssl_context = ssl.create_default_context(cafile=cafile)
# Optimize SSL handshake performance
# Set minimum TLS version to 1.2 for better performance
custom_ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
# Configure cipher suites for optimal performance
if ssl_security_level and isinstance(ssl_security_level, str):
# User provided custom cipher configuration (e.g., via SSL_SECURITY_LEVEL env var)
custom_ssl_context.set_ciphers(ssl_security_level)
else:
# Use optimized cipher list that strongly prefers fast ciphers
# but falls back to widely compatible ones
custom_ssl_context.set_ciphers(DEFAULT_SSL_CIPHERS)
# Configure ECDH curve for key exchange (e.g., to disable PQC and improve performance)
# Set SSL_ECDH_CURVE env var or litellm.ssl_ecdh_curve to 'X25519' to disable PQC
# Common valid curves: X25519, prime256v1, secp384r1, secp521r1
if ssl_ecdh_curve and isinstance(ssl_ecdh_curve, str):
try:
custom_ssl_context.set_ecdh_curve(ssl_ecdh_curve)
verbose_logger.debug(f"SSL ECDH curve set to: {ssl_ecdh_curve}")
except AttributeError:
verbose_logger.warning(
f"SSL ECDH curve configuration not supported. "
f"Python version: {sys.version.split()[0]}, OpenSSL version: {ssl.OPENSSL_VERSION}. "
f"Requested curve: {ssl_ecdh_curve}. Continuing with default curves."
)
except ValueError as e:
# Invalid curve name
verbose_logger.warning(
f"Invalid SSL ECDH curve name: '{ssl_ecdh_curve}'. {e}. "
f"Common valid curves: X25519, prime256v1, secp384r1, secp521r1. "
f"Continuing with default curves (including PQC)."
)
return custom_ssl_context
def get_ssl_verify(
ssl_verify: Optional[Union[bool, str]] = None,
) -> Union[bool, str]:
"""
Common utility to resolve the SSL verification setting.
Prioritizes:
1. Passed-in ssl_verify
2. os.environ["SSL_VERIFY"]
3. litellm.ssl_verify
4. os.environ["SSL_CERT_FILE"] (if ssl_verify is True)
Returns:
Union[bool, str]: The resolved SSL verification setting (bool or path to CA bundle)
"""
from litellm.secret_managers.main import str_to_bool
if ssl_verify is None:
ssl_verify = os.getenv("SSL_VERIFY", litellm.ssl_verify)
# Convert string "False"/"True" to boolean if applicable
if isinstance(ssl_verify, str):
# If it's a file path, return it directly
if os.path.exists(ssl_verify):
return ssl_verify
# Otherwise, check if it's a boolean string
ssl_verify_bool = str_to_bool(ssl_verify)
if ssl_verify_bool is not None:
ssl_verify = ssl_verify_bool
# If SSL verification is enabled, check for SSL_CERT_FILE override
if ssl_verify is True:
ssl_cert_file = os.getenv("SSL_CERT_FILE")
if ssl_cert_file and os.path.exists(ssl_cert_file):
return ssl_cert_file
return ssl_verify if ssl_verify is not None else True
def get_ssl_configuration(
ssl_verify: Optional[VerifyTypes] = None,
) -> Union[bool, str, ssl.SSLContext]:
"""
Unified SSL configuration function that handles ssl_context and ssl_verify logic.
SSL Configuration Priority:
1. If ssl_verify is provided -> is a SSL context use the custom SSL context
2. If ssl_verify is False -> disable SSL verification (ssl=False)
3. If ssl_verify is a string -> use it as a path to CA bundle file
4. If SSL_CERT_FILE environment variable is set and exists -> use it as CA bundle file
5. Else will use default SSL context with certifi CA bundle
If ssl_security_level is set, it will apply the security level to the SSL context.
SSL contexts are cached to avoid creating duplicate contexts with the same configuration,
which reduces memory allocation and improves performance.
Args:
ssl_verify: SSL verification setting. Can be:
- None: Use default from environment/litellm settings
- False: Disable SSL verification
- True: Enable SSL verification
- str: Path to CA bundle file
Returns:
Union[bool, str, ssl.SSLContext]: Appropriate SSL configuration
"""
if isinstance(ssl_verify, ssl.SSLContext):
# If ssl_verify is already an SSLContext, return it directly
return ssl_verify
# Get resolved ssl_verify
ssl_verify = get_ssl_verify(ssl_verify=ssl_verify)
ssl_security_level = os.getenv("SSL_SECURITY_LEVEL", litellm.ssl_security_level)
ssl_ecdh_curve = os.getenv("SSL_ECDH_CURVE", litellm.ssl_ecdh_curve)
cafile = None
if isinstance(ssl_verify, str) and os.path.exists(ssl_verify):
cafile = ssl_verify
if not cafile:
ssl_cert_file = os.getenv("SSL_CERT_FILE")
if ssl_cert_file and os.path.exists(ssl_cert_file):
cafile = ssl_cert_file
else:
cafile = certifi.where()
if ssl_verify is not False:
# Create cache key from configuration parameters
cache_key = (cafile, ssl_security_level, ssl_ecdh_curve)
# Check if we have a cached SSL context for this configuration
if cache_key not in _ssl_context_cache:
_ssl_context_cache[cache_key] = _create_ssl_context(
cafile=cafile,
ssl_security_level=ssl_security_level,
ssl_ecdh_curve=ssl_ecdh_curve,
)
# Return the cached SSL context
return _ssl_context_cache[cache_key]
return ssl_verify
_shared_realtime_ssl_context: Optional[Union[bool, str, ssl.SSLContext]] = None
def get_shared_realtime_ssl_context() -> Union[bool, str, ssl.SSLContext]:
"""
Lazily create the SSL context reused by realtime websocket clients so we avoid
import-order cycles during startup while keeping a single shared configuration.
"""
global _shared_realtime_ssl_context
if _shared_realtime_ssl_context is None:
_shared_realtime_ssl_context = get_ssl_configuration()
return _shared_realtime_ssl_context
def mask_sensitive_info(error_message):
# Find the start of the key parameter
if isinstance(error_message, str):
key_index = error_message.find("key=")
else:
return error_message
# If key is found
if key_index != -1:
# Find the end of the key parameter (next & or end of string)
next_param = error_message.find("&", key_index)
if next_param == -1:
# If no more parameters, mask until the end of the string
masked_message = error_message[: key_index + 4] + "[REDACTED_API_KEY]"
else:
# Replace the key with redacted value, keeping other parameters
masked_message = (
error_message[: key_index + 4]
+ "[REDACTED_API_KEY]"
+ error_message[next_param:]
)
return masked_message
return error_message
class MaskedHTTPStatusError(httpx.HTTPStatusError):
def __init__(
self, original_error, message: Optional[str] = None, text: Optional[str] = None
):
# Create a new error with the masked URL
masked_url = mask_sensitive_info(str(original_error.request.url))
# Create a new error that looks like the original, but with a masked URL
super().__init__(
message=original_error.message,
request=httpx.Request(
method=original_error.request.method,
url=masked_url,
headers=original_error.request.headers,
content=original_error.request.content,
),
response=httpx.Response(
status_code=original_error.response.status_code,
content=original_error.response.content,
headers=original_error.response.headers,
),
)
self.message = message
self.text = text
class AsyncHTTPHandler:
def __init__(
self,
timeout: Optional[Union[float, httpx.Timeout]] = None,
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None,
concurrent_limit=None, # Kept for backward compatibility, but ignored (no limits)
client_alias: Optional[str] = None, # name for client in logs
ssl_verify: Optional[VerifyTypes] = None,
shared_session: Optional["ClientSession"] = None,
):
self.timeout = timeout
self.event_hooks = event_hooks
self.client = self.create_client(
timeout=timeout,
event_hooks=event_hooks,
ssl_verify=ssl_verify,
shared_session=shared_session,
)
self.client_alias = client_alias
def create_client(
self,
timeout: Optional[Union[float, httpx.Timeout]],
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
ssl_verify: Optional[VerifyTypes] = None,
shared_session: Optional["ClientSession"] = None,
) -> httpx.AsyncClient:
# Get unified SSL configuration
ssl_config = get_ssl_configuration(ssl_verify)
# An SSL certificate used by the requested host to authenticate the client.
# /path/to/client.pem
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
if timeout is None:
timeout = _DEFAULT_TIMEOUT
# Create a client with a connection pool
transport = AsyncHTTPHandler._create_async_transport(
ssl_context=ssl_config if isinstance(ssl_config, ssl.SSLContext) else None,
ssl_verify=ssl_config if isinstance(ssl_config, bool) else None,
shared_session=shared_session,
)
# Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
default_headers = get_default_headers()
return httpx.AsyncClient(
transport=transport,
event_hooks=event_hooks,
timeout=timeout,
verify=ssl_config,
cert=cert,
headers=default_headers,
follow_redirects=True,
)
async def close(self):
# Close the client when you're done with it
await self.client.aclose()
async def __aenter__(self):
return self.client
async def __aexit__(self):
# close the client when exiting
await self.client.aclose()
async def get(
self,
url: str,
params: Optional[dict] = None,
headers: Optional[dict] = None,
follow_redirects: Optional[bool] = None,
):
# Set follow_redirects to UseClientDefault if None
_follow_redirects = (
follow_redirects if follow_redirects is not None else USE_CLIENT_DEFAULT
)
params = params or {}
params.update(HTTPHandler.extract_query_params(url))
response = await self.client.get(
url, params=params, headers=headers, follow_redirects=_follow_redirects # type: ignore
)
return response
@track_llm_api_timing()
async def post(
self,
url: str,
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
stream: bool = False,
logging_obj: Optional[LiteLLMLoggingObject] = None,
files: Optional[RequestFiles] = None,
content: Any = None,
):
start_time = time.time()
try:
if timeout is None:
timeout = self.timeout
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(
data, content
)
req = self.client.build_request(
"POST",
url,
data=request_data,
json=json,
params=params,
headers=headers,
timeout=timeout,
files=files,
content=request_content,
)
response = await self.client.send(req, stream=stream)
response.raise_for_status()
return response
except (httpx.RemoteProtocolError, httpx.ConnectError):
# Retry the request with a new session if there is a connection error
new_client = self.create_client(
timeout=timeout, event_hooks=self.event_hooks
)
try:
return await self.single_connection_post_request(
url=url,
client=new_client,
data=data,
json=json,
params=params,
headers=headers,
stream=stream,
)
finally:
await new_client.aclose()
except httpx.TimeoutException as e:
end_time = time.time()
time_delta = round(end_time - start_time, 3)
headers = {}
error_response = getattr(e, "response", None)
if error_response is not None:
for key, value in error_response.headers.items():
headers["response_headers-{}".format(key)] = value
raise litellm.Timeout(
message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
model="default-model-name",
llm_provider="litellm-httpx-handler",
headers=headers,
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", await e.response.aread())
setattr(e, "text", await e.response.aread())
else:
setattr(e, "message", mask_sensitive_info(e.response.text))
setattr(e, "text", mask_sensitive_info(e.response.text))
setattr(e, "status_code", e.response.status_code)
raise e
except Exception as e:
raise e
async def put(
self,
url: str,
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
stream: bool = False,
content: Any = None,
):
try:
if timeout is None:
timeout = self.timeout
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(
data, content
)
req = self.client.build_request(
"PUT", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
)
response = await self.client.send(req)
response.raise_for_status()
return response
except (httpx.RemoteProtocolError, httpx.ConnectError):
# Retry the request with a new session if there is a connection error
new_client = self.create_client(
timeout=timeout, event_hooks=self.event_hooks
)
try:
return await self.single_connection_post_request(
url=url,
client=new_client,
data=data,
json=json,
params=params,
headers=headers,
stream=stream,
)
finally:
await new_client.aclose()
except httpx.TimeoutException as e:
headers = {}
error_response = getattr(e, "response", None)
if error_response is not None:
for key, value in error_response.headers.items():
headers["response_headers-{}".format(key)] = value
raise litellm.Timeout(
message=f"Connection timed out after {timeout} seconds.",
model="default-model-name",
llm_provider="litellm-httpx-handler",
headers=headers,
)
except httpx.HTTPStatusError as e:
setattr(e, "status_code", e.response.status_code)
if stream is True:
setattr(e, "message", await e.response.aread())
else:
setattr(e, "message", e.response.text)
raise e
except Exception as e:
raise e
async def patch(
self,
url: str,
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
stream: bool = False,
content: Any = None,
):
try:
if timeout is None:
timeout = self.timeout
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(
data, content
)
req = self.client.build_request(
"PATCH", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
)
response = await self.client.send(req)
response.raise_for_status()
return response
except (httpx.RemoteProtocolError, httpx.ConnectError):
# Retry the request with a new session if there is a connection error
new_client = self.create_client(
timeout=timeout, event_hooks=self.event_hooks
)
try:
return await self.single_connection_post_request(
url=url,
client=new_client,
data=data,
json=json,
params=params,
headers=headers,
stream=stream,
)
finally:
await new_client.aclose()
except httpx.TimeoutException as e:
headers = {}
error_response = getattr(e, "response", None)
if error_response is not None:
for key, value in error_response.headers.items():
headers["response_headers-{}".format(key)] = value
raise litellm.Timeout(
message=f"Connection timed out after {timeout} seconds.",
model="default-model-name",
llm_provider="litellm-httpx-handler",
headers=headers,
)
except httpx.HTTPStatusError as e:
setattr(e, "status_code", e.response.status_code)
if stream is True:
setattr(e, "message", await e.response.aread())
else:
setattr(e, "message", e.response.text)
raise e
except Exception as e:
raise e
async def delete(
self,
url: str,
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
stream: bool = False,
content: Any = None,
):
try:
if timeout is None:
timeout = self.timeout
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(
data, content
)
req = self.client.build_request(
"DELETE", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
)
response = await self.client.send(req, stream=stream)
response.raise_for_status()
return response
except (httpx.RemoteProtocolError, httpx.ConnectError):
# Retry the request with a new session if there is a connection error
new_client = self.create_client(
timeout=timeout, event_hooks=self.event_hooks
)
try:
return await self.single_connection_post_request(
url=url,
client=new_client,
data=data,
json=json,
params=params,
headers=headers,
stream=stream,
)
finally:
await new_client.aclose()
except httpx.HTTPStatusError as e:
setattr(e, "status_code", e.response.status_code)
if stream is True:
setattr(e, "message", await e.response.aread())
else:
setattr(e, "message", e.response.text)
raise e
except Exception as e:
raise e
async def single_connection_post_request(
self,
url: str,
client: httpx.AsyncClient,
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
stream: bool = False,
content: Any = None,
):
"""
Making POST request for a single connection client.
Used for retrying connection client errors.
"""
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(data, content)
req = client.build_request(
"POST", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
)
response = await client.send(req, stream=stream)
response.raise_for_status()
return response
def __del__(self) -> None:
try:
asyncio.get_running_loop().create_task(self.close())
except Exception:
pass
@staticmethod
def _create_async_transport(
ssl_context: Optional[ssl.SSLContext] = None,
ssl_verify: Optional[bool] = None,
shared_session: Optional["ClientSession"] = None,
) -> Optional[Union[LiteLLMAiohttpTransport, AsyncHTTPTransport]]:
"""
- Creates a transport for httpx.AsyncClient
- if litellm.force_ipv4 is True, it will return AsyncHTTPTransport with local_address="0.0.0.0"
- [Default] It will return AiohttpTransport
- Users can opt out of using AiohttpTransport by setting litellm.use_aiohttp_transport to False
Notes on this handler:
- Why AiohttpTransport?
- By default, we use AiohttpTransport since it offers much higher throughput and lower latency than httpx.
- Why force ipv4?
- Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
"""
#########################################################
# AIOHTTP TRANSPORT is off by default
#########################################################
if AsyncHTTPHandler._should_use_aiohttp_transport():
return AsyncHTTPHandler._create_aiohttp_transport(
ssl_context=ssl_context,
ssl_verify=ssl_verify,
shared_session=shared_session,
)
#########################################################
# HTTPX TRANSPORT is used when aiohttp is not installed
#########################################################
return AsyncHTTPHandler._create_httpx_transport()
@staticmethod
def _should_use_aiohttp_transport() -> bool:
"""
AiohttpTransport is the default transport for litellm.
Httpx can be used by the following
- litellm.disable_aiohttp_transport = True
- os.getenv("DISABLE_AIOHTTP_TRANSPORT") = "True"
"""
import os
from litellm.secret_managers.main import str_to_bool
#########################################################
# Check if user disabled aiohttp transport
########################################################
if (
litellm.disable_aiohttp_transport is True
or str_to_bool(os.getenv("DISABLE_AIOHTTP_TRANSPORT", "False")) is True
):
return False
#########################################################
# Default: Use AiohttpTransport
########################################################
verbose_logger.debug("Using AiohttpTransport...")
return True
@staticmethod
def _get_ssl_connector_kwargs(
ssl_verify: Optional[bool] = None,
ssl_context: Optional[ssl.SSLContext] = None,
) -> Dict[str, Any]:
"""
Helper method to get SSL connector initialization arguments for aiohttp TCPConnector.
SSL Configuration Priority:
1. If ssl_context is provided -> use the custom SSL context
2. If ssl_verify is False -> disable SSL verification (ssl=False)
Returns:
Dict with appropriate SSL configuration for TCPConnector
"""
connector_kwargs: Dict[str, Any] = {
"local_addr": ("0.0.0.0", 0) if litellm.force_ipv4 else None,
}
if ssl_context is not None:
# Priority 1: Use the provided custom SSL context
connector_kwargs["ssl"] = ssl_context
elif ssl_verify is False:
# Priority 2: Explicitly disable SSL verification
connector_kwargs["ssl"] = False
return connector_kwargs
@staticmethod
def _create_aiohttp_transport(
ssl_verify: Optional[bool] = None,
ssl_context: Optional[ssl.SSLContext] = None,
shared_session: Optional["ClientSession"] = None,
) -> LiteLLMAiohttpTransport:
"""
Creates an AiohttpTransport with RequestNotRead error handling
Note: aiohttp TCPConnector ssl parameter accepts:
- SSLContext: custom SSL context
- False: disable SSL verification
"""
from litellm.llms.custom_httpx.aiohttp_transport import LiteLLMAiohttpTransport
from litellm.secret_managers.main import str_to_bool
connector_kwargs = AsyncHTTPHandler._get_ssl_connector_kwargs(
ssl_verify=ssl_verify, ssl_context=ssl_context
)
#########################################################
# Check if user enabled aiohttp trust env
# use for HTTP_PROXY, HTTPS_PROXY, etc.
########################################################
trust_env: bool = litellm.aiohttp_trust_env
if str_to_bool(os.getenv("AIOHTTP_TRUST_ENV", "False")) is True:
trust_env = True
#########################################################
# Determine SSL config to pass to transport for per-request override
# This ensures ssl_verify works even with shared sessions
#########################################################
ssl_for_transport: Optional[Union[bool, ssl.SSLContext]] = None
if ssl_context is not None:
ssl_for_transport = ssl_context
elif ssl_verify is False:
ssl_for_transport = False
verbose_logger.debug("Creating AiohttpTransport...")
# Use shared session if provided and valid
if shared_session is not None and not shared_session.closed:
verbose_logger.debug(
f"SHARED SESSION: Reusing existing ClientSession (ID: {id(shared_session)})"
)
return LiteLLMAiohttpTransport(
client=shared_session,
ssl_verify=ssl_for_transport,
owns_session=False,
)
# Create new session only if none provided or existing one is invalid
verbose_logger.debug(
"NEW SESSION: Creating new ClientSession (no shared session provided)"
)
transport_connector_kwargs = {
"keepalive_timeout": AIOHTTP_KEEPALIVE_TIMEOUT,
"ttl_dns_cache": AIOHTTP_TTL_DNS_CACHE,
**connector_kwargs,
}
if AIOHTTP_NEEDS_CLEANUP_CLOSED:
transport_connector_kwargs["enable_cleanup_closed"] = True
if AIOHTTP_CONNECTOR_LIMIT > 0:
transport_connector_kwargs["limit"] = AIOHTTP_CONNECTOR_LIMIT
if AIOHTTP_CONNECTOR_LIMIT_PER_HOST > 0:
transport_connector_kwargs[
"limit_per_host"
] = AIOHTTP_CONNECTOR_LIMIT_PER_HOST
return LiteLLMAiohttpTransport(
client=lambda: ClientSession(
connector=TCPConnector(**transport_connector_kwargs),
trust_env=trust_env,
),
ssl_verify=ssl_for_transport,
)
@staticmethod
def _create_httpx_transport() -> Optional[AsyncHTTPTransport]:
"""
Creates an AsyncHTTPTransport
- If force_ipv4 is True, it will create an AsyncHTTPTransport with local_address set to "0.0.0.0"
- [Default] If force_ipv4 is False, it will return None
"""
if litellm.force_ipv4:
return AsyncHTTPTransport(local_address="0.0.0.0")
else:
return None
class HTTPHandler:
def __init__(
self,
timeout: Optional[Union[float, httpx.Timeout]] = None,
concurrent_limit=None, # Kept for backward compatibility, but ignored (no limits)
client: Optional[httpx.Client] = None,
ssl_verify: Optional[Union[bool, str]] = None,
disable_default_headers: Optional[
bool
] = False, # arize phoenix returns different API responses when user agent header in request
):
if timeout is None:
timeout = _DEFAULT_TIMEOUT
# Get unified SSL configuration
ssl_config = get_ssl_configuration(ssl_verify)
# An SSL certificate used by the requested host to authenticate the client.
# /path/to/client.pem
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
# Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
default_headers = get_default_headers() if not disable_default_headers else None
if client is None:
transport = self._create_sync_transport()
# Create a client with a connection pool
self.client = httpx.Client(
transport=transport,
timeout=timeout,
verify=ssl_config,
cert=cert,
headers=default_headers,
follow_redirects=True,
)
else:
self.client = client
def close(self):
# Close the client when you're done with it
self.client.close()
def get(
self,
url: str,
params: Optional[dict] = None,
headers: Optional[dict] = None,
follow_redirects: Optional[bool] = None,
):
# Set follow_redirects to UseClientDefault if None
_follow_redirects = (
follow_redirects if follow_redirects is not None else USE_CLIENT_DEFAULT
)
params = params or {}
params.update(self.extract_query_params(url))
response = self.client.get(
url,
params=params,
headers=headers,
)
return response
@staticmethod
def extract_query_params(url: str) -> Dict[str, str]:
"""
Parse a URLs query-string into a dict.
:param url: full URL, e.g. "https://.../path?foo=1&bar=2"
:return: {"foo": "1", "bar": "2"}
"""
from urllib.parse import parse_qsl, urlsplit
parts = urlsplit(url)
return dict(parse_qsl(parts.query))
def post(
self,
url: str,
data: Optional[Union[dict, str, bytes]] = None,
json: Optional[Union[dict, str, List]] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
stream: bool = False,
timeout: Optional[Union[float, httpx.Timeout]] = None,
files: Optional[Union[dict, RequestFiles]] = None,
content: Any = None,
logging_obj: Optional[LiteLLMLoggingObject] = None,
):
try:
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(
data, content
)
if timeout is not None:
req = self.client.build_request(
"POST",
url,
data=request_data, # type: ignore
json=json,
params=params,
headers=headers,
timeout=timeout,
files=files,
content=request_content, # type: ignore
)
else:
req = self.client.build_request(
"POST", url, data=request_data, json=json, params=params, headers=headers, files=files, content=request_content # type: ignore
)
response = self.client.send(req, stream=stream)
response.raise_for_status()
return response
except httpx.TimeoutException:
raise litellm.Timeout(
message=f"Connection timed out after {timeout} seconds.",
model="default-model-name",
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", mask_sensitive_info(e.response.read()))
setattr(e, "text", mask_sensitive_info(e.response.read()))
else:
error_text = mask_sensitive_info(e.response.text)
setattr(e, "message", error_text)
setattr(e, "text", error_text)
setattr(e, "status_code", e.response.status_code)
raise e
except Exception as e:
raise e
def patch(
self,
url: str,
data: Optional[Union[dict, str, bytes]] = None,
json: Optional[Union[dict, str]] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
stream: bool = False,
timeout: Optional[Union[float, httpx.Timeout]] = None,
content: Any = None,
):
try:
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(
data, content
)
if timeout is not None:
req = self.client.build_request(
"PATCH", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
)
else:
req = self.client.build_request(
"PATCH", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
)
response = self.client.send(req, stream=stream)
response.raise_for_status()
return response
except httpx.TimeoutException:
raise litellm.Timeout(
message=f"Connection timed out after {timeout} seconds.",
model="default-model-name",
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", mask_sensitive_info(e.response.read()))
setattr(e, "text", mask_sensitive_info(e.response.read()))
else:
error_text = mask_sensitive_info(e.response.text)
setattr(e, "message", error_text)
setattr(e, "text", error_text)
setattr(e, "status_code", e.response.status_code)
raise e
except Exception as e:
raise e
def put(
self,
url: str,
data: Optional[Union[dict, str, bytes]] = None,
json: Optional[Union[dict, str]] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
stream: bool = False,
timeout: Optional[Union[float, httpx.Timeout]] = None,
content: Any = None,
):
try:
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(
data, content
)
if timeout is not None:
req = self.client.build_request(
"PUT", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
)
else:
req = self.client.build_request(
"PUT", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
)
response = self.client.send(req, stream=stream)
return response
except httpx.TimeoutException:
raise litellm.Timeout(
message=f"Connection timed out after {timeout} seconds.",
model="default-model-name",
llm_provider="litellm-httpx-handler",
)
except Exception as e:
raise e
def delete(
self,
url: str,
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
stream: bool = False,
content: Any = None,
):
try:
# Prepare data/content parameters to prevent httpx DeprecationWarning (memory leak fix)
request_data, request_content = _prepare_request_data_and_content(
data, content
)
if timeout is not None:
req = self.client.build_request(
"DELETE", url, data=request_data, json=json, params=params, headers=headers, timeout=timeout, content=request_content # type: ignore
)
else:
req = self.client.build_request(
"DELETE", url, data=request_data, json=json, params=params, headers=headers, content=request_content # type: ignore
)
response = self.client.send(req, stream=stream)
response.raise_for_status()
return response
except httpx.TimeoutException:
raise litellm.Timeout(
message=f"Connection timed out after {timeout} seconds.",
model="default-model-name",
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", mask_sensitive_info(e.response.read()))
setattr(e, "text", mask_sensitive_info(e.response.read()))
else:
error_text = mask_sensitive_info(e.response.text)
setattr(e, "message", error_text)
setattr(e, "text", error_text)
setattr(e, "status_code", e.response.status_code)
raise e
except Exception as e:
raise e
def __del__(self) -> None:
try:
self.close()
except Exception:
pass
def _create_sync_transport(self) -> Optional[HTTPTransport]:
"""
Create an HTTP transport with IPv4 only if litellm.force_ipv4 is True.
Otherwise, return None.
Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
"""
if litellm.force_ipv4:
return HTTPTransport(local_address="0.0.0.0")
else:
return getattr(litellm, "sync_transport", None)
def get_async_httpx_client(
llm_provider: Union[LlmProviders, httpxSpecialProvider],
params: Optional[dict] = None,
shared_session: Optional["ClientSession"] = None,
) -> AsyncHTTPHandler:
"""
Retrieves the async HTTP client from the cache
If not present, creates a new client
Caches the new client and returns it.
"""
_params_key_name = ""
if params is not None:
for key, value in params.items():
try:
_params_key_name += f"{key}_{value}"
except Exception:
pass
_cache_key_name = "async_httpx_client" + _params_key_name + llm_provider
# Lazily initialize the global in-memory client cache to avoid relying on
# litellm globals being fully populated during import time.
cache = getattr(litellm, "in_memory_llm_clients_cache", None)
if cache is None:
from litellm.caching.llm_caching_handler import LLMClientCache
cache = LLMClientCache()
setattr(litellm, "in_memory_llm_clients_cache", cache)
_cached_client = cache.get_cache(_cache_key_name)
if _cached_client:
return _cached_client
if params is not None:
# Filter out params that are only used for cache key, not for AsyncHTTPHandler.__init__
handler_params = {
k: v for k, v in params.items() if k != "disable_aiohttp_transport"
}
handler_params["shared_session"] = shared_session
_new_client = AsyncHTTPHandler(**handler_params)
else:
_new_client = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0),
shared_session=shared_session,
)
cache.set_cache(
key=_cache_key_name,
value=_new_client,
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
)
return _new_client
def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
"""
Retrieves the HTTP client from the cache
If not present, creates a new client
Caches the new client and returns it.
"""
_params_key_name = ""
if params is not None:
for key, value in params.items():
try:
_params_key_name += f"{key}_{value}"
except Exception:
pass
_cache_key_name = "httpx_client" + _params_key_name
# Lazily initialize the global in-memory client cache to avoid relying on
# litellm globals being fully populated during import time.
cache = getattr(litellm, "in_memory_llm_clients_cache", None)
if cache is None:
from litellm.caching.llm_caching_handler import LLMClientCache
cache = LLMClientCache()
setattr(litellm, "in_memory_llm_clients_cache", cache)
_cached_client = cache.get_cache(_cache_key_name)
if _cached_client:
return _cached_client
if params is not None:
# Filter out params that are only used for cache key, not for HTTPHandler.__init__
handler_params = {
k: v for k, v in params.items() if k != "disable_aiohttp_transport"
}
_new_client = HTTPHandler(**handler_params)
else:
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
cache.set_cache(
key=_cache_key_name,
value=_new_client,
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
)
return _new_client