Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/__init__.py
2026-03-26 20:06:14 +08:00

2171 lines
83 KiB
Python

### Hide pydantic namespace conflict warnings globally ###
from __future__ import annotations
import warnings
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
# Suppress Pydantic 2.11+ deprecation warning about accessing model_fields on instances
# This warning can accumulate during streaming and cause memory leaks
warnings.filterwarnings(
"ignore", message=".*Accessing the.*attribute on the instance is deprecated.*"
)
### INIT VARIABLES #########################
import threading
import os
# Load .env before any other litellm imports so env vars (e.g. LITELLM_UI_SESSION_DURATION) are available
import dotenv as _dotenv
if os.getenv("LITELLM_MODE", "DEV") == "DEV":
_dotenv.load_dotenv()
from typing import (
Callable,
List,
Optional,
Dict,
Union,
Any,
Literal,
get_args,
TYPE_CHECKING,
Tuple,
overload,
Type,
)
from litellm.types.integrations.datadog import DatadogInitParams
from litellm._logging import (
set_verbose,
_turn_on_debug,
verbose_logger,
json_logs,
_turn_on_json,
log_level,
)
import re
from litellm.constants import (
DEFAULT_BATCH_SIZE,
DEFAULT_FLUSH_INTERVAL_SECONDS,
ROUTER_MAX_FALLBACKS,
DEFAULT_MAX_RETRIES,
DEFAULT_REPLICATE_POLLING_RETRIES,
DEFAULT_REPLICATE_POLLING_DELAY_SECONDS,
LITELLM_CHAT_PROVIDERS,
HUMANLOOP_PROMPT_CACHE_TTL_SECONDS,
OPENAI_CHAT_COMPLETION_PARAMS,
OPENAI_CHAT_COMPLETION_PARAMS as _openai_completion_params, # backwards compatibility
OPENAI_FINISH_REASONS,
OPENAI_FINISH_REASONS as _openai_finish_reasons, # backwards compatibility
openai_compatible_endpoints,
openai_compatible_providers,
openai_text_completion_compatible_providers,
_openai_like_providers,
replicate_models,
clarifai_models,
huggingface_models,
empower_models,
together_ai_models,
baseten_models,
WANDB_MODELS,
REPEATED_STREAMING_CHUNK_LIMIT,
request_timeout,
open_ai_embedding_models,
cohere_embedding_models,
bedrock_embedding_models,
known_tokenizer_config,
BEDROCK_INVOKE_PROVIDERS_LITERAL,
BEDROCK_EMBEDDING_PROVIDERS_LITERAL,
BEDROCK_CONVERSE_MODELS,
DEFAULT_MAX_TOKENS,
DEFAULT_SOFT_BUDGET,
DEFAULT_ALLOWED_FAILS,
)
import httpx
# register_async_client_cleanup is lazy-loaded and called on first access
litellm_mode = os.getenv("LITELLM_MODE", "DEV") # "PRODUCTION", "DEV"
####################################################
if set_verbose:
_turn_on_debug()
####################################################
### Callbacks /Logging / Success / Failure Handlers #####
CALLBACK_TYPES = Union[str, Callable, "CustomLogger"] # CustomLogger is lazy-loaded
input_callback: List[CALLBACK_TYPES] = []
success_callback: List[CALLBACK_TYPES] = []
failure_callback: List[CALLBACK_TYPES] = []
service_callback: List[CALLBACK_TYPES] = []
# logging_callback_manager is lazy-loaded via __getattr__
_custom_logger_compatible_callbacks_literal = Literal[
"lago",
"openmeter",
"logfire",
"literalai",
"litellm_agent",
"dynamic_rate_limiter",
"dynamic_rate_limiter_v3",
"langsmith",
"prometheus",
"otel",
"datadog",
"datadog_metrics",
"datadog_llm_observability",
"galileo",
"braintrust",
"arize",
"arize_phoenix",
"langtrace",
"gcs_bucket",
"azure_storage",
"opik",
"argilla",
"mlflow",
"langfuse",
"langfuse_otel",
"weave_otel",
"pagerduty",
"humanloop",
"azure_sentinel",
"gcs_pubsub",
"agentops",
"anthropic_cache_control_hook",
"generic_api",
"resend_email",
"sendgrid_email",
"smtp_email",
"deepeval",
"s3_v2",
"aws_sqs",
"vector_store_pre_call_hook",
"dotprompt",
"bitbucket",
"gitlab",
"cloudzero",
"focus",
"posthog",
"levo",
]
cold_storage_custom_logger: Optional[_custom_logger_compatible_callbacks_literal] = None
logged_real_time_event_types: Optional[Union[List[str], Literal["*"]]] = None
_known_custom_logger_compatible_callbacks: List = list(
get_args(_custom_logger_compatible_callbacks_literal)
)
callbacks: List[
Union[
Callable, _custom_logger_compatible_callbacks_literal, "CustomLogger"
] # CustomLogger is lazy-loaded
] = []
callback_settings: Dict[str, Dict[str, Any]] = {}
initialized_langfuse_clients: int = 0
langfuse_default_tags: Optional[List[str]] = None
langsmith_batch_size: Optional[int] = None
prometheus_initialize_budget_metrics: Optional[bool] = False
require_auth_for_metrics_endpoint: Optional[bool] = False
argilla_batch_size: Optional[int] = None
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload.
gcs_pub_sub_use_v1: Optional[
bool
] = False # if you want to use v1 gcs pubsub logged payload
generic_api_use_v1: Optional[
bool
] = False # if you want to use v1 generic api logged payload
argilla_transformation_object: Optional[Dict[str, Any]] = None
_async_input_callback: List[
Union[str, Callable, "CustomLogger"]
] = ( # CustomLogger is lazy-loaded
[]
) # internal variable - async custom callbacks are routed here.
_async_success_callback: List[
Union[str, Callable, "CustomLogger"]
] = ( # CustomLogger is lazy-loaded
[]
) # internal variable - async custom callbacks are routed here.
_async_failure_callback: List[
Union[str, Callable, "CustomLogger"]
] = ( # CustomLogger is lazy-loaded
[]
) # internal variable - async custom callbacks are routed here.
pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False
standard_logging_payload_excluded_fields: Optional[
List[str]
] = None # Fields to exclude from StandardLoggingPayload before callbacks receive it
log_raw_request_response: bool = False
redact_messages_in_exceptions: Optional[bool] = False
redact_user_api_key_info: Optional[bool] = False
filter_invalid_headers: Optional[bool] = False
add_user_information_to_llm_headers: Optional[
bool
] = None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
store_audit_logs = False # Enterprise feature, allow users to see audit logs
### end of callbacks #############
email: Optional[
str
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
token: Optional[
str
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
telemetry = True
max_tokens: int = DEFAULT_MAX_TOKENS # OpenAI Defaults
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
modify_params = bool(os.getenv("LITELLM_MODIFY_PARAMS", False))
use_chat_completions_url_for_anthropic_messages: bool = bool(
os.getenv("LITELLM_USE_CHAT_COMPLETIONS_URL_FOR_ANTHROPIC_MESSAGES", False)
) # When True, routes OpenAI /v1/messages requests to chat/completions instead of the Responses API
retry = True
### AUTH ###
api_key: Optional[str] = None
openai_key: Optional[str] = None
groq_key: Optional[str] = None
gigachat_key: Optional[str] = None
databricks_key: Optional[str] = None
openai_like_key: Optional[str] = None
azure_key: Optional[str] = None
anthropic_key: Optional[str] = None
replicate_key: Optional[str] = None
bytez_key: Optional[str] = None
cohere_key: Optional[str] = None
infinity_key: Optional[str] = None
clarifai_key: Optional[str] = None
maritalk_key: Optional[str] = None
ai21_key: Optional[str] = None
ollama_key: Optional[str] = None
openrouter_key: Optional[str] = None
datarobot_key: Optional[str] = None
predibase_key: Optional[str] = None
huggingface_key: Optional[str] = None
vertex_project: Optional[str] = None
vertex_location: Optional[str] = None
predibase_tenant_id: Optional[str] = None
togetherai_api_key: Optional[str] = None
cloudflare_api_key: Optional[str] = None
vercel_ai_gateway_key: Optional[str] = None
baseten_key: Optional[str] = None
llama_api_key: Optional[str] = None
aleph_alpha_key: Optional[str] = None
nlp_cloud_key: Optional[str] = None
novita_api_key: Optional[str] = None
snowflake_key: Optional[str] = None
gradient_ai_api_key: Optional[str] = None
nebius_key: Optional[str] = None
wandb_key: Optional[str] = None
heroku_key: Optional[str] = None
cometapi_key: Optional[str] = None
ovhcloud_key: Optional[str] = None
lemonade_key: Optional[str] = None
sap_service_key: Optional[str] = None
amazon_nova_api_key: Optional[str] = None
common_cloud_provider_auth_params: dict = {
"params": ["project", "region_name", "token"],
"providers": ["vertex_ai", "bedrock", "watsonx", "azure", "vertex_ai_beta"],
}
use_litellm_proxy: bool = (
False # when True, requests will be sent to the specified litellm proxy endpoint
)
use_client: bool = False
ssl_verify: Union[str, bool] = True
ssl_security_level: Optional[str] = None
ssl_certificate: Optional[str] = None
ssl_ecdh_curve: Optional[
str
] = None # Set to 'X25519' to disable PQC and improve performance
disable_streaming_logging: bool = False
disable_token_counter: bool = False
disable_add_transform_inline_image_block: bool = False
disable_add_user_agent_to_request_tags: bool = False
disable_anthropic_gemini_context_caching_transform: bool = False
extra_spend_tag_headers: Optional[List[str]] = None
in_memory_llm_clients_cache: "LLMClientCache"
safe_memory_mode: bool = False
enable_azure_ad_token_refresh: Optional[bool] = False
# Proxy Authentication - auto-obtain/refresh OAuth2/JWT tokens for LiteLLM Proxy
proxy_auth: Optional[Any] = None
### DEFAULT AZURE API VERSION ###
AZURE_DEFAULT_API_VERSION = "2025-02-01-preview" # this is updated to the latest
### DEFAULT WATSONX API VERSION ###
WATSONX_DEFAULT_API_VERSION = "2024-03-13"
### COHERE EMBEDDINGS DEFAULT TYPE ###
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE: "COHERE_EMBEDDING_INPUT_TYPES" = "search_document"
### CREDENTIALS ###
credential_list: List["CredentialItem"] = []
### GUARDRAILS ###
llamaguard_model_name: Optional[str] = None
openai_moderations_model_name: Optional[str] = None
presidio_ad_hoc_recognizers: Optional[str] = None
google_moderation_confidence_threshold: Optional[float] = None
llamaguard_unsafe_content_categories: Optional[str] = None
blocked_user_list: Optional[Union[str, List]] = None
banned_keywords_list: Optional[Union[str, List]] = None
llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
guardrail_name_config_map: Dict[str, GuardrailItem] = {}
include_cost_in_streaming_usage: bool = False
reasoning_auto_summary: bool = False
### PROMPTS ####
from litellm.types.prompts.init_prompts import PromptSpec
prompt_name_config_map: Dict[str, PromptSpec] = {}
##################
### PREVIEW FEATURES ###
enable_preview_features: bool = False
return_response_headers: bool = (
False # get response headers from LLM Api providers - example x-remaining-requests,
)
enable_json_schema_validation: bool = False
enable_key_alias_format_validation: bool = (
False # opt-in validation of key_alias format on /key/generate and /key/update
)
####################
logging: bool = True
enable_loadbalancing_on_batch_endpoints: Optional[bool] = None
enable_caching_on_provider_specific_optional_params: bool = (
False # feature-flag for caching on optional params - e.g. 'top_k'
)
caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
cache: Optional[
"Cache"
] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
default_in_memory_ttl: Optional[float] = None
default_redis_ttl: Optional[float] = None
default_redis_batch_cache_expiry: Optional[float] = None
model_alias_map: Dict[str, str] = {}
model_group_settings: Optional["ModelGroupSettings"] = None
max_budget: float = 0.0 # set the max budget across all providers
budget_duration: Optional[
str
] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
default_soft_budget: float = (
DEFAULT_SOFT_BUDGET # by default all litellm proxy keys have a soft budget of 50.0
)
forward_traceparent_to_llm_provider: bool = False
_current_cost = 0.0 # private variable, used if max budget is set
error_logs: Dict = {}
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
client_session: Optional[httpx.Client] = None
aclient_session: Optional[httpx.AsyncClient] = None
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
model_cost_map_url: str = os.getenv(
"LITELLM_MODEL_COST_MAP_URL",
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
)
blog_posts_url: str = os.getenv(
"LITELLM_BLOG_POSTS_URL",
"https://raw.githubusercontent.com/BerriAI/litellm/main/litellm/blog_posts.json",
)
anthropic_beta_headers_url: str = os.getenv(
"LITELLM_ANTHROPIC_BETA_HEADERS_URL",
"https://raw.githubusercontent.com/BerriAI/litellm/main/litellm/anthropic_beta_headers_config.json",
)
suppress_debug_info = False
dynamodb_table_name: Optional[str] = None
s3_callback_params: Optional[Dict] = None
datadog_llm_observability_params: Optional[Union[DatadogLLMObsInitParams, Dict]] = None
datadog_params: Optional[Union[DatadogInitParams, Dict]] = None
aws_sqs_callback_params: Optional[Dict] = None
generic_logger_headers: Optional[Dict] = None
default_key_generate_params: Optional[Dict] = None
upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] = None
key_generation_settings: Optional["StandardKeyGenerationConfig"] = None
default_internal_user_params: Optional[Dict] = None
default_team_params: Optional[Union[DefaultTeamSSOParams, Dict]] = None
default_team_settings: Optional[List] = None
max_user_budget: Optional[float] = None
default_max_internal_user_budget: Optional[float] = None
max_internal_user_budget: Optional[float] = None
max_ui_session_budget: Optional[float] = 0.25 # $0.25 USD budgets for UI Chat sessions
internal_user_budget_duration: Optional[str] = None
tag_budget_config: Optional[Dict[str, "BudgetConfig"]] = None
max_end_user_budget: Optional[float] = None
max_end_user_budget_id: Optional[str] = None
disable_end_user_cost_tracking: Optional[bool] = None
disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
enable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
custom_prometheus_metadata_labels: List[str] = []
custom_prometheus_tags: List[str] = []
prometheus_metrics_config: Optional[List] = None
prometheus_emit_stream_label: bool = False
disable_add_prefix_to_prompt: bool = (
False # used by anthropic, to disable adding prefix to prompt
)
disable_copilot_system_to_assistant: bool = False # If false (default), converts all 'system' role messages to 'assistant' for GitHub Copilot compatibility. Set to true to disable this behavior.
public_mcp_servers: Optional[List[str]] = None
public_model_groups: Optional[List[str]] = None
public_agent_groups: Optional[List[str]] = None
# Supports both old format (Dict[str, str]) and new format (Dict[str, Dict[str, Any]])
# New format: { "displayName": { "url": "...", "index": 0 } }
# Old format: { "displayName": "url" } (for backward compatibility)
public_model_groups_links: Dict[str, Union[str, Dict[str, Any]]] = {}
#### REQUEST PRIORITIZATION #######
priority_reservation: Optional[
Dict[str, Union[float, "PriorityReservationDict"]]
] = None
# priority_reservation_settings is lazy-loaded via __getattr__
# Only declare for type checking - at runtime __getattr__ handles it
if TYPE_CHECKING:
priority_reservation_settings: Optional["PriorityReservationSettings"] = None
######## Networking Settings ########
use_aiohttp_transport: bool = True # Older variable, aiohttp is now the default. use disable_aiohttp_transport instead.
aiohttp_trust_env: bool = False # set to true to use HTTP_ Proxy settings
disable_aiohttp_transport: bool = False # Set this to true to use httpx instead
disable_aiohttp_trust_env: bool = (
False # When False, aiohttp will respect HTTP(S)_PROXY env vars
)
force_ipv4: bool = False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
network_mock: bool = False # When True, use mock transport — no real network calls
####### STOP SEQUENCE LIMIT #######
disable_stop_sequence_limit: bool = False # when True, stop sequence limit is disabled
#### RETRIES ####
num_retries: Optional[int] = None # per model endpoint
max_fallbacks: Optional[int] = None
default_fallbacks: Optional[List] = None
fallbacks: Optional[List] = None
context_window_fallbacks: Optional[List] = None
content_policy_fallbacks: Optional[List] = None
allowed_fails: int = 3
allow_dynamic_callback_disabling: bool = True
num_retries_per_request: Optional[
int
] = None # for the request overall (incl. fallbacks + model retries)
####### SECRET MANAGERS #####################
secret_manager_client: Optional[
Any
] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
_google_kms_resource_name: Optional[str] = None
_key_management_system: Optional["KeyManagementSystem"] = None
# Note: KeyManagementSettings must be eagerly imported because _key_management_settings
# is accessed during import time in secret_managers/main.py
# We'll import it after the lazy import system is set up
# We can't define it here because KeyManagementSettings is lazy-loaded
#### PII MASKING ####
output_parse_pii: bool = False
#############################################
from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map
model_cost = get_model_cost_map(url=model_cost_map_url)
cost_discount_config: Dict[
str, float
] = {} # Provider-specific cost discounts {"vertex_ai": 0.05} = 5% discount
cost_margin_config: Dict[
str, Union[float, Dict[str, float]]
] = {} # Provider-specific or global cost margins. Examples:
# Percentage: {"openai": 0.10} = 10% margin
# Fixed: {"openai": {"fixed_amount": 0.001}} = $0.001 per request
# Global: {"global": 0.05} = 5% global margin on all providers
# Combined: {"vertex_ai": {"percentage": 0.08, "fixed_amount": 0.0005}}
custom_prompt_dict: Dict[str, dict] = {}
check_provider_endpoint = False
####### THREAD-SPECIFIC DATA ####################
class MyLocal(threading.local):
def __init__(self):
self.user = "Hello World"
_thread_context = MyLocal()
def identify(event_details):
# Store user in thread local data
if "user" in event_details:
_thread_context.user = event_details["user"]
####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
api_base: Optional[str] = None
headers = None
api_version: Optional[str] = None
organization = None
project = None
config_path = None
vertex_ai_safety_settings: Optional[dict] = None
####### COMPLETION MODELS ###################
from typing import Set
open_ai_chat_completion_models: Set = set()
open_ai_text_completion_models: Set = set()
cohere_models: Set = set()
cohere_chat_models: Set = set()
mistral_chat_models: Set = set()
text_completion_codestral_models: Set = set()
anthropic_models: Set = set()
openrouter_models: Set = set()
datarobot_models: Set = set()
vertex_language_models: Set = set()
vertex_vision_models: Set = set()
vertex_chat_models: Set = set()
vertex_code_chat_models: Set = set()
vertex_ai_image_models: Set = set()
vertex_ai_video_models: Set = set()
vertex_text_models: Set = set()
vertex_code_text_models: Set = set()
vertex_embedding_models: Set = set()
vertex_anthropic_models: Set = set()
vertex_llama3_models: Set = set()
vertex_deepseek_models: Set = set()
vertex_ai_ai21_models: Set = set()
vertex_mistral_models: Set = set()
vertex_openai_models: Set = set()
vertex_minimax_models: Set = set()
vertex_moonshot_models: Set = set()
vertex_zai_models: Set = set()
ai21_models: Set = set()
ai21_chat_models: Set = set()
nlp_cloud_models: Set = set()
aleph_alpha_models: Set = set()
bedrock_models: Set = set()
bedrock_converse_models: Set = set(BEDROCK_CONVERSE_MODELS)
fal_ai_models: Set = set()
fireworks_ai_models: Set = set()
fireworks_ai_embedding_models: Set = set()
deepinfra_models: Set = set()
perplexity_models: Set = set()
watsonx_models: Set = set()
gemini_models: Set = set()
xai_models: Set = set()
zai_models: Set = set()
deepseek_models: Set = set()
runwayml_models: Set = set()
azure_ai_models: Set = set()
jina_ai_models: Set = set()
voyage_models: Set = set()
infinity_models: Set = set()
heroku_models: Set = set()
databricks_models: Set = set()
cloudflare_models: Set = set()
codestral_models: Set = set()
friendliai_models: Set = set()
featherless_ai_models: Set = set()
palm_models: Set = set()
groq_models: Set = set()
azure_models: Set = set()
azure_anthropic_models: Set = set()
azure_text_models: Set = set()
anyscale_models: Set = set()
cerebras_models: Set = set()
galadriel_models: Set = set()
nvidia_nim_models: Set = set()
sambanova_models: Set = set()
sambanova_embedding_models: Set = set()
novita_models: Set = set()
assemblyai_models: Set = set()
snowflake_models: Set = set()
gradient_ai_models: Set = set()
llama_models: Set = set()
nscale_models: Set = set()
nebius_models: Set = set()
nebius_embedding_models: Set = set()
aiml_models: Set = set()
deepgram_models: Set = set()
elevenlabs_models: Set = set()
dashscope_models: Set = set()
moonshot_models: Set = set()
publicai_models: Set = set()
v0_models: Set = set()
morph_models: Set = set()
lambda_ai_models: Set = set()
hyperbolic_models: Set = set()
black_forest_labs_models: Set = set()
recraft_models: Set = set()
cometapi_models: Set = set()
oci_models: Set = set()
vercel_ai_gateway_models: Set = set()
volcengine_models: Set = set()
wandb_models: Set = set(WANDB_MODELS)
ovhcloud_models: Set = set()
ovhcloud_embedding_models: Set = set()
lemonade_models: Set = set()
docker_model_runner_models: Set = set()
amazon_nova_models: Set = set()
stability_models: Set = set()
github_copilot_models: Set = set()
chatgpt_models: Set = set()
minimax_models: Set = set()
aws_polly_models: Set = set()
gigachat_models: Set = set()
llamagate_models: Set = set()
bedrock_mantle_models: Set = set()
def is_bedrock_pricing_only_model(key: str) -> bool:
"""
Excludes keys with the pattern 'bedrock/<region>/<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
Args:
key (str): A key to filter.
Returns:
bool: True if the key matches the Bedrock pattern, False otherwise.
"""
# Regex to match 'bedrock/<region>/<model>'
bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$")
if "month-commitment" in key:
return True
is_match = bedrock_pattern.match(key)
return is_match is not None
def is_openai_finetune_model(key: str) -> bool:
"""
Excludes model cost keys with the pattern 'ft:<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
Args:
key (str): A key to filter.
Returns:
bool: True if the key matches the OpenAI finetune pattern, False otherwise.
"""
return key.startswith("ft:") and not key.count(":") > 1
def add_known_models(model_cost_map: Optional[Dict] = None):
_map = model_cost_map if model_cost_map is not None else model_cost
for key, value in _map.items():
if value.get("litellm_provider") == "openai" and not is_openai_finetune_model(
key
):
open_ai_chat_completion_models.add(key)
elif value.get("litellm_provider") == "text-completion-openai":
open_ai_text_completion_models.add(key)
elif value.get("litellm_provider") == "azure_text":
azure_text_models.add(key)
elif value.get("litellm_provider") == "cohere":
cohere_models.add(key)
elif value.get("litellm_provider") == "cohere_chat":
cohere_chat_models.add(key)
elif value.get("litellm_provider") == "mistral":
mistral_chat_models.add(key)
elif value.get("litellm_provider") == "anthropic":
anthropic_models.add(key)
elif value.get("litellm_provider") == "empower":
empower_models.add(key)
elif value.get("litellm_provider") == "openrouter":
openrouter_models.add(key)
elif value.get("litellm_provider") == "vercel_ai_gateway":
vercel_ai_gateway_models.add(key)
elif value.get("litellm_provider") == "datarobot":
datarobot_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-text-models":
vertex_text_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-code-text-models":
vertex_code_text_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-language-models":
vertex_language_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-vision-models":
vertex_vision_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-chat-models":
vertex_chat_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-code-chat-models":
vertex_code_chat_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-embedding-models":
vertex_embedding_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-anthropic_models":
key = key.replace("vertex_ai/", "")
vertex_anthropic_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-llama_models":
key = key.replace("vertex_ai/", "")
vertex_llama3_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-deepseek_models":
key = key.replace("vertex_ai/", "")
vertex_deepseek_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-mistral_models":
key = key.replace("vertex_ai/", "")
vertex_mistral_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-ai21_models":
key = key.replace("vertex_ai/", "")
vertex_ai_ai21_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-image-models":
key = key.replace("vertex_ai/", "")
vertex_ai_image_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-video-models":
key = key.replace("vertex_ai/", "")
vertex_ai_video_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-openai_models":
key = key.replace("vertex_ai/", "")
vertex_openai_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-minimax_models":
key = key.replace("vertex_ai/", "")
vertex_minimax_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-moonshot_models":
key = key.replace("vertex_ai/", "")
vertex_moonshot_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-zai_models":
key = key.replace("vertex_ai/", "")
vertex_zai_models.add(key)
elif value.get("litellm_provider") == "ai21":
if value.get("mode") == "chat":
ai21_chat_models.add(key)
else:
ai21_models.add(key)
elif value.get("litellm_provider") == "nlp_cloud":
nlp_cloud_models.add(key)
elif value.get("litellm_provider") == "aleph_alpha":
aleph_alpha_models.add(key)
elif value.get(
"litellm_provider"
) == "bedrock" and not is_bedrock_pricing_only_model(key):
bedrock_models.add(key)
elif value.get("litellm_provider") == "bedrock_converse":
bedrock_converse_models.add(key)
elif value.get("litellm_provider") == "deepinfra":
deepinfra_models.add(key)
elif value.get("litellm_provider") == "perplexity":
perplexity_models.add(key)
elif value.get("litellm_provider") == "watsonx":
watsonx_models.add(key)
elif value.get("litellm_provider") == "gemini":
gemini_models.add(key)
elif value.get("litellm_provider") == "fireworks_ai":
# ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params.
if "-to-" not in key and "fireworks-ai-default" not in key:
fireworks_ai_models.add(key)
elif value.get("litellm_provider") == "fireworks_ai-embedding-models":
# ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params.
if "-to-" not in key:
fireworks_ai_embedding_models.add(key)
elif value.get("litellm_provider") == "text-completion-codestral":
text_completion_codestral_models.add(key)
elif value.get("litellm_provider") == "xai":
xai_models.add(key)
elif value.get("litellm_provider") == "zai":
zai_models.add(key)
elif value.get("litellm_provider") == "fal_ai":
fal_ai_models.add(key)
elif value.get("litellm_provider") == "deepseek":
deepseek_models.add(key)
elif value.get("litellm_provider") == "runwayml":
runwayml_models.add(key)
elif value.get("litellm_provider") == "meta_llama":
llama_models.add(key)
elif value.get("litellm_provider") == "nscale":
nscale_models.add(key)
elif value.get("litellm_provider") == "azure_ai":
azure_ai_models.add(key)
elif value.get("litellm_provider") == "voyage":
voyage_models.add(key)
elif value.get("litellm_provider") == "infinity":
infinity_models.add(key)
elif value.get("litellm_provider") == "databricks":
databricks_models.add(key)
elif value.get("litellm_provider") == "cloudflare":
cloudflare_models.add(key)
elif value.get("litellm_provider") == "codestral":
codestral_models.add(key)
elif value.get("litellm_provider") == "friendliai":
friendliai_models.add(key)
elif value.get("litellm_provider") == "palm":
palm_models.add(key)
elif value.get("litellm_provider") == "groq":
groq_models.add(key)
elif value.get("litellm_provider") == "azure":
azure_models.add(key)
elif value.get("litellm_provider") == "azure_anthropic":
azure_anthropic_models.add(key)
elif value.get("litellm_provider") == "anyscale":
anyscale_models.add(key)
elif value.get("litellm_provider") == "cerebras":
cerebras_models.add(key)
elif value.get("litellm_provider") == "galadriel":
galadriel_models.add(key)
elif value.get("litellm_provider") == "nvidia_nim":
nvidia_nim_models.add(key)
elif value.get("litellm_provider") == "sambanova":
sambanova_models.add(key)
elif value.get("litellm_provider") == "sambanova-embedding-models":
sambanova_embedding_models.add(key)
elif value.get("litellm_provider") == "novita":
novita_models.add(key)
elif value.get("litellm_provider") == "nebius-chat-models":
nebius_models.add(key)
elif value.get("litellm_provider") == "nebius-embedding-models":
nebius_embedding_models.add(key)
elif value.get("litellm_provider") == "aiml":
aiml_models.add(key)
elif value.get("litellm_provider") == "assemblyai":
assemblyai_models.add(key)
elif value.get("litellm_provider") == "jina_ai":
jina_ai_models.add(key)
elif value.get("litellm_provider") == "snowflake":
snowflake_models.add(key)
elif value.get("litellm_provider") == "gradient_ai":
gradient_ai_models.add(key)
elif value.get("litellm_provider") == "featherless_ai":
featherless_ai_models.add(key)
elif value.get("litellm_provider") == "deepgram":
deepgram_models.add(key)
elif value.get("litellm_provider") == "elevenlabs":
elevenlabs_models.add(key)
elif value.get("litellm_provider") == "heroku":
heroku_models.add(key)
elif value.get("litellm_provider") == "dashscope":
dashscope_models.add(key)
elif value.get("litellm_provider") == "moonshot":
moonshot_models.add(key)
elif value.get("litellm_provider") == "publicai":
publicai_models.add(key)
elif value.get("litellm_provider") == "v0":
v0_models.add(key)
elif value.get("litellm_provider") == "morph":
morph_models.add(key)
elif value.get("litellm_provider") == "lambda_ai":
lambda_ai_models.add(key)
elif value.get("litellm_provider") == "hyperbolic":
hyperbolic_models.add(key)
elif value.get("litellm_provider") == "black_forest_labs":
black_forest_labs_models.add(key)
elif value.get("litellm_provider") == "recraft":
recraft_models.add(key)
elif value.get("litellm_provider") == "cometapi":
cometapi_models.add(key)
elif value.get("litellm_provider") == "oci":
oci_models.add(key)
elif value.get("litellm_provider") == "volcengine":
volcengine_models.add(key)
elif value.get("litellm_provider") == "wandb":
wandb_models.add(key)
elif value.get("litellm_provider") == "ovhcloud":
ovhcloud_models.add(key)
elif value.get("litellm_provider") == "ovhcloud-embedding-models":
ovhcloud_embedding_models.add(key)
elif value.get("litellm_provider") == "lemonade":
lemonade_models.add(key)
elif value.get("litellm_provider") == "docker_model_runner":
docker_model_runner_models.add(key)
elif value.get("litellm_provider") == "amazon_nova":
amazon_nova_models.add(key)
elif value.get("litellm_provider") == "stability":
stability_models.add(key)
elif value.get("litellm_provider") == "github_copilot":
github_copilot_models.add(key)
elif value.get("litellm_provider") == "chatgpt":
chatgpt_models.add(key)
elif value.get("litellm_provider") == "minimax":
minimax_models.add(key)
elif value.get("litellm_provider") == "aws_polly":
aws_polly_models.add(key)
elif value.get("litellm_provider") == "gigachat":
gigachat_models.add(key)
elif value.get("litellm_provider") == "llamagate":
llamagate_models.add(key)
elif value.get("litellm_provider") == "bedrock_mantle":
bedrock_mantle_models.add(key)
add_known_models()
# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
# this is maintained for Exception Mapping
# used for Cost Tracking & Token counting
# https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/
# Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting
azure_llms = {
"gpt-35-turbo": "azure/gpt-35-turbo",
"gpt-35-turbo-16k": "azure/gpt-35-turbo-16k",
"gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct",
"azure/gpt-41": "gpt-4.1",
"azure/gpt-41-mini": "gpt-4.1-mini",
"azure/gpt-41-nano": "gpt-4.1-nano",
}
azure_embedding_models = {
"ada": "azure/ada",
}
petals_models = [
"petals-team/StableBeluga2",
]
ollama_models = ["llama2"]
maritalk_models = ["maritalk"]
model_list = list(
open_ai_chat_completion_models
| open_ai_text_completion_models
| cohere_models
| cohere_chat_models
| anthropic_models
| set(replicate_models)
| openrouter_models
| datarobot_models
| set(huggingface_models)
| vertex_chat_models
| vertex_text_models
| ai21_models
| ai21_chat_models
| set(together_ai_models)
| set(baseten_models)
| aleph_alpha_models
| nlp_cloud_models
| set(ollama_models)
| bedrock_models
| deepinfra_models
| perplexity_models
| set(maritalk_models)
| runwayml_models
| vertex_language_models
| watsonx_models
| gemini_models
| text_completion_codestral_models
| xai_models
| zai_models
| fal_ai_models
| deepseek_models
| azure_ai_models
| voyage_models
| infinity_models
| databricks_models
| cloudflare_models
| codestral_models
| friendliai_models
| palm_models
| groq_models
| azure_models
| azure_anthropic_models
| anyscale_models
| cerebras_models
| galadriel_models
| nvidia_nim_models
| sambanova_models
| azure_text_models
| novita_models
| assemblyai_models
| jina_ai_models
| snowflake_models
| gradient_ai_models
| llama_models
| featherless_ai_models
| nscale_models
| deepgram_models
| elevenlabs_models
| dashscope_models
| moonshot_models
| publicai_models
| v0_models
| morph_models
| lambda_ai_models
| black_forest_labs_models
| recraft_models
| cometapi_models
| oci_models
| heroku_models
| vercel_ai_gateway_models
| volcengine_models
| wandb_models
| ovhcloud_models
| lemonade_models
| docker_model_runner_models
| bedrock_mantle_models
| set(clarifai_models)
)
model_list_set = set(model_list)
# provider_list is lazy-loaded via __getattr__ to avoid importing LlmProviders at import time
models_by_provider: dict = {
"openai": open_ai_chat_completion_models | open_ai_text_completion_models,
"text-completion-openai": open_ai_text_completion_models,
"cohere": cohere_models | cohere_chat_models,
"cohere_chat": cohere_chat_models,
"anthropic": anthropic_models,
"replicate": replicate_models,
"huggingface": huggingface_models,
"together_ai": together_ai_models,
"baseten": baseten_models,
"openrouter": openrouter_models,
"vercel_ai_gateway": vercel_ai_gateway_models,
"datarobot": datarobot_models,
"vertex_ai": vertex_chat_models
| vertex_text_models
| vertex_anthropic_models
| vertex_vision_models
| vertex_language_models
| vertex_deepseek_models
| vertex_minimax_models
| vertex_moonshot_models
| vertex_zai_models,
"ai21": ai21_models,
"bedrock": bedrock_models | bedrock_converse_models,
"petals": petals_models,
"ollama": ollama_models,
"ollama_chat": ollama_models,
"deepinfra": deepinfra_models,
"perplexity": perplexity_models,
"maritalk": maritalk_models,
"watsonx": watsonx_models,
"gemini": gemini_models,
"fireworks_ai": fireworks_ai_models | fireworks_ai_embedding_models,
"aleph_alpha": aleph_alpha_models,
"text-completion-codestral": text_completion_codestral_models,
"xai": xai_models,
"zai": zai_models,
"fal_ai": fal_ai_models,
"deepseek": deepseek_models,
"runwayml": runwayml_models,
"mistral": mistral_chat_models,
"azure_ai": azure_ai_models,
"voyage": voyage_models,
"infinity": infinity_models,
"databricks": databricks_models,
"cloudflare": cloudflare_models,
"codestral": codestral_models,
"nlp_cloud": nlp_cloud_models,
"friendliai": friendliai_models,
"palm": palm_models,
"groq": groq_models,
"azure": azure_models | azure_text_models,
"azure_anthropic": azure_anthropic_models,
"azure_text": azure_text_models,
"anyscale": anyscale_models,
"cerebras": cerebras_models,
"galadriel": galadriel_models,
"nvidia_nim": nvidia_nim_models,
"sambanova": sambanova_models | sambanova_embedding_models,
"novita": novita_models,
"nebius": nebius_models | nebius_embedding_models,
"aiml": aiml_models,
"assemblyai": assemblyai_models,
"jina_ai": jina_ai_models,
"snowflake": snowflake_models,
"gradient_ai": gradient_ai_models,
"meta_llama": llama_models,
"nscale": nscale_models,
"featherless_ai": featherless_ai_models,
"deepgram": deepgram_models,
"elevenlabs": elevenlabs_models,
"heroku": heroku_models,
"dashscope": dashscope_models,
"moonshot": moonshot_models,
"publicai": publicai_models,
"v0": v0_models,
"morph": morph_models,
"lambda_ai": lambda_ai_models,
"hyperbolic": hyperbolic_models,
"black_forest_labs": black_forest_labs_models,
"recraft": recraft_models,
"cometapi": cometapi_models,
"oci": oci_models,
"volcengine": volcengine_models,
"wandb": wandb_models,
"ovhcloud": ovhcloud_models | ovhcloud_embedding_models,
"lemonade": lemonade_models,
"clarifai": clarifai_models,
"amazon_nova": amazon_nova_models,
"stability": stability_models,
"github_copilot": github_copilot_models,
"chatgpt": chatgpt_models,
"minimax": minimax_models,
"aws_polly": aws_polly_models,
"gigachat": gigachat_models,
"llamagate": llamagate_models,
"bedrock_mantle": bedrock_mantle_models,
}
# mapping for those models which have larger equivalents
longer_context_model_fallback_dict: dict = {
# openai chat completion models
"gpt-3.5-turbo": "gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301",
"gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613",
"gpt-4": "gpt-4-32k",
"gpt-4-0314": "gpt-4-32k-0314",
"gpt-4-0613": "gpt-4-32k-0613",
# anthropic
"claude-instant-1": "claude-2",
"claude-instant-1.2": "claude-2",
# vertexai
"chat-bison": "chat-bison-32k",
"chat-bison@001": "chat-bison-32k",
"codechat-bison": "codechat-bison-32k",
"codechat-bison@001": "codechat-bison-32k",
# openrouter
"openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k",
"openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2",
}
####### EMBEDDING MODELS ###################
all_embedding_models = (
open_ai_embedding_models
| set(cohere_embedding_models)
| set(bedrock_embedding_models)
| vertex_embedding_models
| fireworks_ai_embedding_models
| nebius_embedding_models
| sambanova_embedding_models
| ovhcloud_embedding_models
)
####### IMAGE GENERATION MODELS ###################
openai_image_generation_models = ["dall-e-2", "dall-e-3"]
####### VIDEO GENERATION MODELS ###################
openai_video_generation_models = ["sora-2"]
# timeout is lazy-loaded via __getattr__
# get_llm_provider is lazy-loaded via __getattr__
# remove_index_from_tool_calls is lazy-loaded via __getattr__
# Import KeyManagementSettings here (before utils import) because _key_management_settings
# is accessed during import time in secret_managers/main.py (via dd_tracing -> datadog -> _service_logger -> utils)
from litellm.types.secret_managers.main import KeyManagementSettings
_key_management_settings: KeyManagementSettings = KeyManagementSettings()
# client must be imported immediately as it's used as a decorator at function definition time
from .utils import client
# Note: Most other utils imports are lazy-loaded via __getattr__ to avoid loading utils.py
# (which imports tiktoken) at import time
from .llms.custom_llm import CustomLLM
from .llms.anthropic.common_utils import AnthropicModelInfo
from .llms.ai21.chat.transformation import AI21ChatConfig, AI21ChatConfig as AI21Config
from .llms.deprecated_providers.palm import (
PalmConfig,
) # here to prevent breaking changes
from .llms.deprecated_providers.aleph_alpha import AlephAlphaConfig
from .llms.gemini.common_utils import GeminiModelInfo
from .llms.vertex_ai.vertex_embeddings.transformation import (
VertexAITextEmbeddingConfig,
)
vertexAITextEmbeddingConfig = VertexAITextEmbeddingConfig()
from .llms.bedrock.embed.amazon_titan_v2_transformation import (
AmazonTitanV2Config,
)
from .llms.topaz.common_utils import TopazModelInfo
# OpenAIOSeriesConfig is lazy loaded - openaiOSeriesConfig will be created on first access
# OpenAIGPTConfig, OpenAIGPT5Config, etc. are lazy loaded - instances will be created on first access
from .llms.xai.common_utils import XAIModelInfo
# PublicAI now uses JSON-based configuration (see litellm/llms/openai_like/providers.json)
# All remaining configs are now lazy loaded - see _lazy_imports_registry.py
# Import LlmProviders here (before main import) because it's imported during import time
# in multiple places including openai.py (via main import)
from litellm.types.utils import LlmProviders
## Lazy loading this is not straightforward, will leave it here for now.
from .main import * # type: ignore
# Skills API
from .skills.main import (
create_skill,
acreate_skill,
list_skills,
alist_skills,
get_skill,
aget_skill,
delete_skill,
adelete_skill,
)
from .evals.main import (
create_eval,
acreate_eval,
list_evals,
alist_evals,
get_eval,
aget_eval,
delete_eval,
adelete_eval,
cancel_eval,
acancel_eval,
create_run,
acreate_run,
list_runs,
alist_runs,
get_run,
aget_run,
delete_run,
adelete_run,
cancel_run,
acancel_run,
)
from .integrations import *
from .llms.custom_httpx.async_client_cleanup import close_litellm_async_clients
from .exceptions import (
AuthenticationError,
InvalidRequestError,
BadRequestError,
ImageFetchError,
NotFoundError,
PermissionDeniedError,
RateLimitError,
ServiceUnavailableError,
BadGatewayError,
OpenAIError,
ContextWindowExceededError,
ContentPolicyViolationError,
BudgetExceededError,
APIError,
Timeout,
APIConnectionError,
UnsupportedParamsError,
APIResponseValidationError,
UnprocessableEntityError,
InternalServerError,
JSONSchemaValidationError,
LITELLM_EXCEPTION_TYPES,
MockException,
)
from .budget_manager import BudgetManager
from .proxy.proxy_cli import run_server
from .router import Router
from .assistants.main import *
from .batches.main import *
from .images.main import *
from .videos.main import *
from .batch_completion.main import * # type: ignore
from .rerank_api.main import *
from .llms.anthropic.experimental_pass_through.messages.handler import *
from .responses.main import *
# Interactions API is available as litellm.interactions module
# Usage: litellm.interactions.create(), litellm.interactions.get(), etc.
from . import interactions
from .skills.main import (
create_skill,
acreate_skill,
list_skills,
alist_skills,
get_skill,
aget_skill,
delete_skill,
adelete_skill,
)
from .containers.main import *
from .ocr.main import *
from .rag.main import *
from .search.main import *
from .realtime_api.main import (
_arealtime,
acreate_realtime_client_secret,
arealtime_calls,
)
from .responses.main import _aresponses_websocket
from .fine_tuning.main import *
from .files.main import *
from .vector_store_files.main import (
acreate as avector_store_file_create,
adelete as avector_store_file_delete,
alist as avector_store_file_list,
aretrieve as avector_store_file_retrieve,
aretrieve_content as avector_store_file_content,
aupdate as avector_store_file_update,
create as vector_store_file_create,
delete as vector_store_file_delete,
list as vector_store_file_list,
retrieve as vector_store_file_retrieve,
retrieve_content as vector_store_file_content,
update as vector_store_file_update,
)
from .scheduler import *
### ADAPTERS ###
from .types.adapter import AdapterItem
import litellm.anthropic_interface as anthropic
adapters: List[AdapterItem] = []
### Vector Store Registry ###
from .vector_stores.vector_store_registry import (
VectorStoreRegistry,
VectorStoreIndexRegistry,
)
vector_store_registry: Optional[VectorStoreRegistry] = None
vector_store_index_registry: Optional[VectorStoreIndexRegistry] = None
### RAG ###
from . import rag
### CUSTOM LLMs ###
from .types.llms.custom_llm import CustomLLMItem
custom_provider_map: List[CustomLLMItem] = []
_custom_providers: List[
str
] = [] # internal helper util, used to track names of custom providers
disable_hf_tokenizer_download: Optional[
bool
] = None # disable huggingface tokenizer download. Defaults to openai clk100
global_disable_no_log_param: bool = False
### CLI UTILITIES ###
from litellm.litellm_core_utils.cli_token_utils import get_litellm_gateway_api_key
### PASSTHROUGH ###
from .passthrough import allm_passthrough_route, llm_passthrough_route
from .google_genai import agenerate_content
### GLOBAL CONFIG ###
global_bitbucket_config: Optional[Dict[str, Any]] = None
def set_global_bitbucket_config(config: Dict[str, Any]) -> None:
"""Set global BitBucket configuration for prompt management."""
global global_bitbucket_config
global_bitbucket_config = config
### GLOBAL CONFIG ###
global_gitlab_config: Optional[Dict[str, Any]] = None
def set_global_gitlab_config(config: Dict[str, Any]) -> None:
"""Set global BitBucket configuration for prompt management."""
global global_gitlab_config
global_gitlab_config = config
# Lazy loading system for heavy modules to reduce initial import time and memory usage
if TYPE_CHECKING:
from litellm.types.utils import ModelInfo as _ModelInfoType
from litellm.types.utils import PriorityReservationSettings
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.caching.caching import Cache
# Type stubs for lazy-loaded configs to help mypy
from .llms.bedrock.chat.converse_transformation import (
AmazonConverseConfig as AmazonConverseConfig,
)
from .llms.openai_like.chat.handler import (
OpenAILikeChatConfig as OpenAILikeChatConfig,
)
from .llms.galadriel.chat.transformation import (
GaladrielChatConfig as GaladrielChatConfig,
)
from .llms.github.chat.transformation import GithubChatConfig as GithubChatConfig
from .llms.azure_ai.anthropic.transformation import (
AzureAnthropicConfig as AzureAnthropicConfig,
)
from .llms.bytez.chat.transformation import BytezChatConfig as BytezChatConfig
from .llms.compactifai.chat.transformation import (
CompactifAIChatConfig as CompactifAIChatConfig,
)
from .llms.empower.chat.transformation import EmpowerChatConfig as EmpowerChatConfig
from .llms.minimax.chat.transformation import MinimaxChatConfig as MinimaxChatConfig
from .llms.aiohttp_openai.chat.transformation import (
AiohttpOpenAIChatConfig as AiohttpOpenAIChatConfig,
)
from .llms.huggingface.chat.transformation import (
HuggingFaceChatConfig as HuggingFaceChatConfig,
)
from .llms.huggingface.embedding.transformation import (
HuggingFaceEmbeddingConfig as HuggingFaceEmbeddingConfig,
)
from .llms.oobabooga.chat.transformation import OobaboogaConfig as OobaboogaConfig
from .llms.maritalk import MaritalkConfig as MaritalkConfig
from .llms.openrouter.chat.transformation import (
OpenrouterConfig as OpenrouterConfig,
)
from .llms.datarobot.chat.transformation import DataRobotConfig as DataRobotConfig
from .llms.anthropic.chat.transformation import AnthropicConfig as AnthropicConfig
from .llms.anthropic.completion.transformation import (
AnthropicTextConfig as AnthropicTextConfig,
)
from .llms.groq.stt.transformation import GroqSTTConfig as GroqSTTConfig
from .llms.triton.completion.transformation import TritonConfig as TritonConfig
from .llms.triton.completion.transformation import (
TritonGenerateConfig as TritonGenerateConfig,
)
from .llms.triton.completion.transformation import (
TritonInferConfig as TritonInferConfig,
)
from .llms.triton.embedding.transformation import (
TritonEmbeddingConfig as TritonEmbeddingConfig,
)
from .llms.huggingface.rerank.transformation import (
HuggingFaceRerankConfig as HuggingFaceRerankConfig,
)
from .llms.databricks.chat.transformation import (
DatabricksConfig as DatabricksConfig,
)
from .llms.databricks.embed.transformation import (
DatabricksEmbeddingConfig as DatabricksEmbeddingConfig,
)
from .llms.predibase.chat.transformation import PredibaseConfig as PredibaseConfig
from .llms.replicate.chat.transformation import ReplicateConfig as ReplicateConfig
from .llms.snowflake.chat.transformation import SnowflakeConfig as SnowflakeConfig
from .llms.cohere.rerank.transformation import (
CohereRerankConfig as CohereRerankConfig,
)
from .llms.cohere.rerank_v2.transformation import (
CohereRerankV2Config as CohereRerankV2Config,
)
from .llms.azure_ai.rerank.transformation import (
AzureAIRerankConfig as AzureAIRerankConfig,
)
from .llms.infinity.rerank.transformation import (
InfinityRerankConfig as InfinityRerankConfig,
)
from .llms.jina_ai.rerank.transformation import (
JinaAIRerankConfig as JinaAIRerankConfig,
)
from .llms.deepinfra.rerank.transformation import (
DeepinfraRerankConfig as DeepinfraRerankConfig,
)
from .llms.hosted_vllm.rerank.transformation import (
HostedVLLMRerankConfig as HostedVLLMRerankConfig,
)
from .llms.nvidia_nim.rerank.transformation import (
NvidiaNimRerankConfig as NvidiaNimRerankConfig,
)
from .llms.nvidia_nim.rerank.ranking_transformation import (
NvidiaNimRankingConfig as NvidiaNimRankingConfig,
)
from .llms.vertex_ai.rerank.transformation import (
VertexAIRerankConfig as VertexAIRerankConfig,
)
from .llms.fireworks_ai.rerank.transformation import (
FireworksAIRerankConfig as FireworksAIRerankConfig,
)
from .llms.voyage.rerank.transformation import (
VoyageRerankConfig as VoyageRerankConfig,
)
from .llms.watsonx.rerank.transformation import (
IBMWatsonXRerankConfig as IBMWatsonXRerankConfig,
)
from .llms.clarifai.chat.transformation import ClarifaiConfig as ClarifaiConfig
from .llms.ai21.chat.transformation import AI21ChatConfig as AI21ChatConfig
from .llms.meta_llama.chat.transformation import LlamaAPIConfig as LlamaAPIConfig
from .llms.together_ai.completion.transformation import (
TogetherAITextCompletionConfig as TogetherAITextCompletionConfig,
)
from .llms.cloudflare.chat.transformation import (
CloudflareChatConfig as CloudflareChatConfig,
)
from .llms.novita.chat.transformation import NovitaConfig as NovitaConfig
from .llms.petals.completion.transformation import PetalsConfig as PetalsConfig
from .llms.ollama.chat.transformation import OllamaChatConfig as OllamaChatConfig
from .llms.ollama.completion.transformation import OllamaConfig as OllamaConfig
from .llms.sagemaker.completion.transformation import (
SagemakerConfig as SagemakerConfig,
)
from .llms.sagemaker.chat.transformation import (
SagemakerChatConfig as SagemakerChatConfig,
)
from .llms.cohere.chat.transformation import CohereChatConfig as CohereChatConfig
from .llms.anthropic.experimental_pass_through.messages.transformation import (
AnthropicMessagesConfig as AnthropicMessagesConfig,
)
from .llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation import (
AmazonAnthropicClaudeMessagesConfig as AmazonAnthropicClaudeMessagesConfig,
)
from .llms.together_ai.chat import TogetherAIConfig as TogetherAIConfig
from .llms.nlp_cloud.chat.handler import NLPCloudConfig as NLPCloudConfig
from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig as VertexGeminiConfig,
)
from .llms.gemini.chat.transformation import (
GoogleAIStudioGeminiConfig as GoogleAIStudioGeminiConfig,
)
from .llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation import (
VertexAIAnthropicConfig as VertexAIAnthropicConfig,
)
from .llms.vertex_ai.vertex_ai_partner_models.llama3.transformation import (
VertexAILlama3Config as VertexAILlama3Config,
)
from .llms.vertex_ai.vertex_ai_partner_models.ai21.transformation import (
VertexAIAi21Config as VertexAIAi21Config,
)
from .llms.bedrock.chat.invoke_handler import (
AmazonCohereChatConfig as AmazonCohereChatConfig,
)
from .llms.bedrock.common_utils import (
AmazonBedrockGlobalConfig as AmazonBedrockGlobalConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation import (
AmazonAI21Config as AmazonAI21Config,
)
from .llms.bedrock.chat.invoke_transformations.amazon_nova_transformation import (
AmazonInvokeNovaConfig as AmazonInvokeNovaConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation import (
AmazonQwen2Config as AmazonQwen2Config,
)
from .llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation import (
AmazonQwen3Config as AmazonQwen3Config,
)
from .llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation import (
AmazonAnthropicConfig as AmazonAnthropicConfig,
)
from .llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation import (
AmazonAnthropicClaudeConfig as AmazonAnthropicClaudeConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation import (
AmazonCohereConfig as AmazonCohereConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_llama_transformation import (
AmazonLlamaConfig as AmazonLlamaConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation import (
AmazonDeepSeekR1Config as AmazonDeepSeekR1Config,
)
from .llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation import (
AmazonMistralConfig as AmazonMistralConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation import (
AmazonMoonshotConfig as AmazonMoonshotConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_titan_transformation import (
AmazonTitanConfig as AmazonTitanConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation import (
AmazonTwelveLabsPegasusConfig as AmazonTwelveLabsPegasusConfig,
)
from .llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
AmazonInvokeConfig as AmazonInvokeConfig,
)
from .llms.bedrock.chat.invoke_transformations.amazon_openai_transformation import (
AmazonBedrockOpenAIConfig as AmazonBedrockOpenAIConfig,
)
from .llms.bedrock.image_generation.amazon_stability1_transformation import (
AmazonStabilityConfig as AmazonStabilityConfig,
)
from .llms.bedrock.image_generation.amazon_stability3_transformation import (
AmazonStability3Config as AmazonStability3Config,
)
from .llms.bedrock.image_generation.amazon_nova_canvas_transformation import (
AmazonNovaCanvasConfig as AmazonNovaCanvasConfig,
)
from .llms.bedrock.embed.amazon_titan_g1_transformation import (
AmazonTitanG1Config as AmazonTitanG1Config,
)
from .llms.bedrock.embed.amazon_titan_multimodal_transformation import (
AmazonTitanMultimodalEmbeddingG1Config as AmazonTitanMultimodalEmbeddingG1Config,
)
from .llms.cohere.chat.v2_transformation import (
CohereV2ChatConfig as CohereV2ChatConfig,
)
from .llms.bedrock.embed.cohere_transformation import (
BedrockCohereEmbeddingConfig as BedrockCohereEmbeddingConfig,
)
from .llms.bedrock.embed.twelvelabs_marengo_transformation import (
TwelveLabsMarengoEmbeddingConfig as TwelveLabsMarengoEmbeddingConfig,
)
from .llms.bedrock.embed.amazon_nova_transformation import (
AmazonNovaEmbeddingConfig as AmazonNovaEmbeddingConfig,
)
from .llms.openai.openai import (
OpenAIConfig as OpenAIConfig,
MistralEmbeddingConfig as MistralEmbeddingConfig,
)
from .llms.openai.image_variations.transformation import (
OpenAIImageVariationConfig as OpenAIImageVariationConfig,
)
from .llms.deepgram.audio_transcription.transformation import (
DeepgramAudioTranscriptionConfig as DeepgramAudioTranscriptionConfig,
)
from .llms.topaz.image_variations.transformation import (
TopazImageVariationConfig as TopazImageVariationConfig,
)
from litellm.llms.openai.completion.transformation import (
OpenAITextCompletionConfig as OpenAITextCompletionConfig,
)
from .llms.groq.chat.transformation import GroqChatConfig as GroqChatConfig
from .llms.bedrock_mantle.chat.transformation import (
BedrockMantleChatConfig as BedrockMantleChatConfig,
)
from .llms.a2a.chat.transformation import A2AConfig as A2AConfig
from .llms.voyage.embedding.transformation import (
VoyageEmbeddingConfig as VoyageEmbeddingConfig,
)
from .llms.voyage.embedding.transformation_contextual import (
VoyageContextualEmbeddingConfig as VoyageContextualEmbeddingConfig,
)
from .llms.infinity.embedding.transformation import (
InfinityEmbeddingConfig as InfinityEmbeddingConfig,
)
from .llms.perplexity.embedding.transformation import (
PerplexityEmbeddingConfig as PerplexityEmbeddingConfig,
)
from .llms.azure_ai.chat.transformation import (
AzureAIStudioConfig as AzureAIStudioConfig,
)
from .llms.mistral.chat.transformation import MistralConfig as MistralConfig
from .llms.openai.responses.transformation import (
OpenAIResponsesAPIConfig as OpenAIResponsesAPIConfig,
)
from .llms.azure.responses.transformation import (
AzureOpenAIResponsesAPIConfig as AzureOpenAIResponsesAPIConfig,
)
from .llms.azure.responses.o_series_transformation import (
AzureOpenAIOSeriesResponsesAPIConfig as AzureOpenAIOSeriesResponsesAPIConfig,
)
from .llms.xai.responses.transformation import (
XAIResponsesAPIConfig as XAIResponsesAPIConfig,
)
from .llms.litellm_proxy.responses.transformation import (
LiteLLMProxyResponsesAPIConfig as LiteLLMProxyResponsesAPIConfig,
)
from .llms.volcengine.responses.transformation import (
VolcEngineResponsesAPIConfig as VolcEngineResponsesAPIConfig,
)
from .llms.manus.responses.transformation import (
ManusResponsesAPIConfig as ManusResponsesAPIConfig,
)
from .llms.perplexity.responses.transformation import (
PerplexityResponsesConfig as PerplexityResponsesConfig,
)
from .llms.databricks.responses.transformation import (
DatabricksResponsesAPIConfig as DatabricksResponsesAPIConfig,
)
from .llms.openrouter.responses.transformation import (
OpenRouterResponsesAPIConfig as OpenRouterResponsesAPIConfig,
)
from .llms.gemini.interactions.transformation import (
GoogleAIStudioInteractionsConfig as GoogleAIStudioInteractionsConfig,
)
from .llms.openai.chat.o_series_transformation import (
OpenAIOSeriesConfig as OpenAIOSeriesConfig,
OpenAIOSeriesConfig as OpenAIO1Config,
)
from .llms.anthropic.skills.transformation import (
AnthropicSkillsConfig as AnthropicSkillsConfig,
)
from .llms.base_llm.skills.transformation import (
BaseSkillsAPIConfig as BaseSkillsAPIConfig,
)
from .llms.gradient_ai.chat.transformation import (
GradientAIConfig as GradientAIConfig,
)
from .llms.openai.chat.gpt_transformation import OpenAIGPTConfig as OpenAIGPTConfig
from .llms.openai.chat.gpt_5_transformation import (
OpenAIGPT5Config as OpenAIGPT5Config,
)
from .llms.openai.transcriptions.whisper_transformation import (
OpenAIWhisperAudioTranscriptionConfig as OpenAIWhisperAudioTranscriptionConfig,
)
from .llms.openai.transcriptions.gpt_transformation import (
OpenAIGPTAudioTranscriptionConfig as OpenAIGPTAudioTranscriptionConfig,
)
from .llms.openai.chat.gpt_audio_transformation import (
OpenAIGPTAudioConfig as OpenAIGPTAudioConfig,
)
from .llms.nvidia_nim.chat.transformation import NvidiaNimConfig as NvidiaNimConfig
from .llms.nvidia_nim.embed import (
NvidiaNimEmbeddingConfig as NvidiaNimEmbeddingConfig,
)
# Type stubs for lazy-loaded config instances
openaiOSeriesConfig: OpenAIOSeriesConfig
openAIGPTConfig: OpenAIGPTConfig
openAIGPTAudioConfig: OpenAIGPTAudioConfig
openAIGPT5Config: OpenAIGPT5Config
nvidiaNimConfig: NvidiaNimConfig
nvidiaNimEmbeddingConfig: NvidiaNimEmbeddingConfig
# Import config classes that need type stubs (for mypy) - import with _ prefix to avoid circular reference
from .llms.vllm.completion.transformation import VLLMConfig as _VLLMConfig
from .llms.deepseek.chat.transformation import (
DeepSeekChatConfig as _DeepSeekChatConfig,
)
from .llms.sap.chat.transformation import (
GenAIHubOrchestrationConfig as _GenAIHubOrchestrationConfig,
)
from .llms.sap.embed.transformation import (
GenAIHubEmbeddingConfig as _GenAIHubEmbeddingConfig,
)
from .llms.azure.chat.o_series_transformation import (
AzureOpenAIO1Config as _AzureOpenAIO1Config,
)
from .llms.perplexity.chat.transformation import (
PerplexityChatConfig as _PerplexityChatConfig,
)
from .llms.nscale.chat.transformation import NscaleConfig as _NscaleConfig
from .llms.watsonx.chat.transformation import (
IBMWatsonXChatConfig as _IBMWatsonXChatConfig,
)
from .llms.watsonx.completion.transformation import (
IBMWatsonXAIConfig as _IBMWatsonXAIConfig,
)
from .llms.litellm_proxy.chat.transformation import (
LiteLLMProxyChatConfig as _LiteLLMProxyChatConfig,
)
from .llms.deepinfra.chat.transformation import DeepInfraConfig as _DeepInfraConfig
from .llms.llamafile.chat.transformation import (
LlamafileChatConfig as _LlamafileChatConfig,
)
from .llms.lm_studio.chat.transformation import (
LMStudioChatConfig as _LMStudioChatConfig,
)
from .llms.lm_studio.embed.transformation import (
LmStudioEmbeddingConfig as _LmStudioEmbeddingConfig,
)
from .llms.watsonx.embed.transformation import (
IBMWatsonXEmbeddingConfig as _IBMWatsonXEmbeddingConfig,
)
from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig as _VertexGeminiConfig,
)
# Type stubs for lazy-loaded config classes (to help mypy understand types)
VLLMConfig: Type[_VLLMConfig]
DeepSeekChatConfig: Type[_DeepSeekChatConfig]
GenAIHubOrchestrationConfig: Type[_GenAIHubOrchestrationConfig]
GenAIHubEmbeddingConfig: Type[_GenAIHubEmbeddingConfig]
AzureOpenAIO1Config: Type[_AzureOpenAIO1Config]
PerplexityChatConfig: Type[_PerplexityChatConfig]
NscaleConfig: Type[_NscaleConfig]
IBMWatsonXChatConfig: Type[_IBMWatsonXChatConfig]
IBMWatsonXAIConfig: Type[_IBMWatsonXAIConfig]
LiteLLMProxyChatConfig: Type[_LiteLLMProxyChatConfig]
DeepInfraConfig: Type[_DeepInfraConfig]
LlamafileChatConfig: Type[_LlamafileChatConfig]
LMStudioChatConfig: Type[_LMStudioChatConfig]
LmStudioEmbeddingConfig: Type[_LmStudioEmbeddingConfig]
IBMWatsonXEmbeddingConfig: Type[_IBMWatsonXEmbeddingConfig]
VertexAIConfig: Type[_VertexGeminiConfig] # Alias for VertexGeminiConfig
from .llms.featherless_ai.chat.transformation import (
FeatherlessAIConfig as FeatherlessAIConfig,
)
from .llms.cerebras.chat import CerebrasConfig as CerebrasConfig
from .llms.baseten.chat import BasetenConfig as BasetenConfig
from .llms.sambanova.chat import SambanovaConfig as SambanovaConfig
from .llms.sambanova.embedding.transformation import (
SambaNovaEmbeddingConfig as SambaNovaEmbeddingConfig,
)
from .llms.fireworks_ai.chat.transformation import (
FireworksAIConfig as FireworksAIConfig,
)
from .llms.fireworks_ai.completion.transformation import (
FireworksAITextCompletionConfig as FireworksAITextCompletionConfig,
)
from .llms.fireworks_ai.audio_transcription.transformation import (
FireworksAIAudioTranscriptionConfig as FireworksAIAudioTranscriptionConfig,
)
from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
FireworksAIEmbeddingConfig as FireworksAIEmbeddingConfig,
)
from .llms.friendliai.chat.transformation import (
FriendliaiChatConfig as FriendliaiChatConfig,
)
from .llms.jina_ai.embedding.transformation import (
JinaAIEmbeddingConfig as JinaAIEmbeddingConfig,
)
from .llms.xai.chat.transformation import XAIChatConfig as XAIChatConfig
from .llms.zai.chat.transformation import ZAIChatConfig as ZAIChatConfig
from .llms.aiml.chat.transformation import AIMLChatConfig as AIMLChatConfig
from .llms.volcengine.chat.transformation import (
VolcEngineChatConfig as VolcEngineChatConfig,
VolcEngineChatConfig as VolcEngineConfig,
)
from .llms.codestral.completion.transformation import (
CodestralTextCompletionConfig as CodestralTextCompletionConfig,
)
from .llms.azure.azure import (
AzureOpenAIAssistantsAPIConfig as AzureOpenAIAssistantsAPIConfig,
)
from .llms.heroku.chat.transformation import HerokuChatConfig as HerokuChatConfig
from .llms.cometapi.chat.transformation import CometAPIConfig as CometAPIConfig
from .llms.azure.chat.gpt_transformation import (
AzureOpenAIConfig as AzureOpenAIConfig,
)
from .llms.azure.chat.gpt_5_transformation import (
AzureOpenAIGPT5Config as AzureOpenAIGPT5Config,
)
from .llms.azure.completion.transformation import (
AzureOpenAITextConfig as AzureOpenAITextConfig,
)
from .llms.hosted_vllm.chat.transformation import (
HostedVLLMChatConfig as HostedVLLMChatConfig,
)
from .llms.hosted_vllm.embedding.transformation import (
HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig,
)
from .llms.hosted_vllm.responses.transformation import (
HostedVLLMResponsesAPIConfig as HostedVLLMResponsesAPIConfig,
)
from .llms.github_copilot.chat.transformation import (
GithubCopilotConfig as GithubCopilotConfig,
)
from .llms.github_copilot.responses.transformation import (
GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig,
)
from .llms.github_copilot.embedding.transformation import (
GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig,
)
from .llms.chatgpt.chat.transformation import ChatGPTConfig as ChatGPTConfig
from .llms.chatgpt.responses.transformation import (
ChatGPTResponsesAPIConfig as ChatGPTResponsesAPIConfig,
)
from .llms.gigachat.chat.transformation import GigaChatConfig as GigaChatConfig
from .llms.gigachat.embedding.transformation import (
GigaChatEmbeddingConfig as GigaChatEmbeddingConfig,
)
from .llms.nebius.chat.transformation import NebiusConfig as NebiusConfig
from .llms.wandb.chat.transformation import WandbConfig as WandbConfig
from .llms.dashscope.chat.transformation import (
DashScopeChatConfig as DashScopeChatConfig,
)
from .llms.moonshot.chat.transformation import (
MoonshotChatConfig as MoonshotChatConfig,
)
from .llms.docker_model_runner.chat.transformation import (
DockerModelRunnerChatConfig as DockerModelRunnerChatConfig,
)
from .llms.v0.chat.transformation import V0ChatConfig as V0ChatConfig
from .llms.oci.chat.transformation import OCIChatConfig as OCIChatConfig
from .llms.morph.chat.transformation import MorphChatConfig as MorphChatConfig
from .llms.ragflow.chat.transformation import RAGFlowConfig as RAGFlowConfig
from .llms.lambda_ai.chat.transformation import (
LambdaAIChatConfig as LambdaAIChatConfig,
)
from .llms.hyperbolic.chat.transformation import (
HyperbolicChatConfig as HyperbolicChatConfig,
)
from .llms.vercel_ai_gateway.chat.transformation import (
VercelAIGatewayConfig as VercelAIGatewayConfig,
)
from .llms.ovhcloud.chat.transformation import (
OVHCloudChatConfig as OVHCloudChatConfig,
)
from .llms.ovhcloud.embedding.transformation import (
OVHCloudEmbeddingConfig as OVHCloudEmbeddingConfig,
)
from .llms.cometapi.embed.transformation import (
CometAPIEmbeddingConfig as CometAPIEmbeddingConfig,
)
from .llms.lemonade.chat.transformation import (
LemonadeChatConfig as LemonadeChatConfig,
)
from .llms.snowflake.embedding.transformation import (
SnowflakeEmbeddingConfig as SnowflakeEmbeddingConfig,
)
from .llms.amazon_nova.chat.transformation import (
AmazonNovaChatConfig as AmazonNovaChatConfig,
)
from litellm.caching.llm_caching_handler import LLMClientCache
from litellm.types.llms.bedrock import COHERE_EMBEDDING_INPUT_TYPES
from litellm.types.utils import (
BudgetConfig,
CredentialItem,
PriorityReservationDict,
StandardKeyGenerationConfig,
)
from litellm.types.guardrails import GuardrailItem
from litellm.types.proxy.management_endpoints.ui_sso import (
DefaultTeamSSOParams,
LiteLLM_UpperboundKeyGenerateParams,
)
# Cost calculator functions
cost_per_token: Callable[..., Tuple[float, float]]
completion_cost: Callable[..., float]
response_cost_calculator: Any
modify_integration: Any
# Utils functions - type stubs for truly lazy loaded functions only
# (functions NOT imported via "from .main import *")
get_response_string: Callable[..., str]
supports_function_calling: Callable[..., bool]
supports_web_search: Callable[..., bool]
supports_url_context: Callable[..., bool]
supports_response_schema: Callable[..., bool]
supports_parallel_function_calling: Callable[..., bool]
supports_vision: Callable[..., bool]
supports_audio_input: Callable[..., bool]
supports_audio_output: Callable[..., bool]
supports_system_messages: Callable[..., bool]
supports_reasoning: Callable[..., bool]
acreate: Callable[..., Any]
get_max_tokens: Callable[..., int]
get_model_info: Callable[..., _ModelInfoType] # type: ignore[no-redef]
register_prompt_template: Callable[..., None]
validate_environment: Callable[..., dict]
check_valid_key: Callable[..., bool]
register_model: Callable[..., None]
encode: Callable[..., list]
decode: Callable[..., str]
_calculate_retry_after: Callable[..., float]
_should_retry: Callable[..., bool]
get_supported_openai_params: Callable[..., Optional[list]]
get_api_base: Callable[..., Optional[str]]
get_first_chars_messages: Callable[..., str]
get_provider_fields: Callable[..., List]
get_valid_models: Callable[..., list]
remove_index_from_tool_calls: Callable[..., None]
# Response types - truly lazy loaded only (not in main.py or elsewhere)
ModelResponseListIterator: Type[Any]
# HTTP handler singletons (created lazily via __getattr__ at runtime)
module_level_aclient: AsyncHTTPHandler
module_level_client: HTTPHandler
# Bedrock tool name mappings instance (lazy-loaded)
from litellm.caching.caching import InMemoryCache
bedrock_tool_name_mappings: InMemoryCache
# Azure exception class (lazy-loaded)
from litellm.llms.azure.common_utils import AzureOpenAIError
# Secret manager types (lazy-loaded)
from litellm.types.secret_managers.main import (
KeyManagementSystem,
KeyManagementSettings, # Not lazy-loaded - needed for _key_management_settings initialization
)
# Custom logger class (lazy-loaded)
from litellm.integrations.custom_logger import CustomLogger
# Datadog LLM observability params (lazy-loaded)
from litellm.types.integrations.datadog_llm_obs import DatadogLLMObsInitParams
# Logging callback manager class and instance (lazy-loaded)
from litellm.litellm_core_utils.logging_callback_manager import (
LoggingCallbackManager,
)
logging_callback_manager: LoggingCallbackManager
# provider_list is lazy-loaded
from litellm.types.utils import LlmProviders
provider_list: List[Union[LlmProviders, str]]
# Note: AmazonConverseConfig and OpenAILikeChatConfig are imported above in TYPE_CHECKING block
# Track if async client cleanup has been registered (for lazy loading)
_async_client_cleanup_registered = False
# Eager loading for backwards compatibility with VCR and other HTTP recording tools
# When LITELLM_DISABLE_LAZY_LOADING is set, lazy-loaded attributes are loaded at import time
# For now, this only affects encoding (tiktoken) as it was the only reported issue
# See: https://github.com/BerriAI/litellm/issues/18659
# This ensures encoding is initialized before VCR starts recording HTTP requests
if os.getenv("LITELLM_DISABLE_LAZY_LOADING", "").lower() in ("1", "true", "yes", "on"):
# Load encoding at import time (pre-#18070 behavior)
# This ensures encoding is initialized before VCR starts recording
from .main import encoding
def __getattr__(name: str) -> Any:
"""Lazy import handler with cached registry for improved performance."""
global _async_client_cleanup_registered
# Register async client cleanup on first access (only once)
if not _async_client_cleanup_registered:
from litellm.llms.custom_httpx.async_client_cleanup import (
register_async_client_cleanup,
)
register_async_client_cleanup()
_async_client_cleanup_registered = True
# Use cached registry from _lazy_imports instead of importing tuples every time
from ._lazy_imports import _get_lazy_import_registry
registry = _get_lazy_import_registry()
# Check if name is in registry and call the cached handler function
if name in registry:
handler_func = registry[name]
return handler_func(name)
# Lazy load encoding from main.py to avoid heavy tiktoken import
if name == "encoding":
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
# Check if already cached
if "encoding" not in _globals:
from .main import encoding as _encoding
_globals["encoding"] = _encoding
return _globals["encoding"]
# Lazy load bedrock_tool_name_mappings instance
if name == "bedrock_tool_name_mappings":
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
# Check if already cached
if "bedrock_tool_name_mappings" not in _globals:
from .llms.bedrock.chat.invoke_handler import (
bedrock_tool_name_mappings as _bedrock_tool_name_mappings,
)
_globals["bedrock_tool_name_mappings"] = _bedrock_tool_name_mappings
return _globals["bedrock_tool_name_mappings"]
# Lazy load AzureOpenAIError exception class
if name == "AzureOpenAIError":
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
# Check if already cached
if "AzureOpenAIError" not in _globals:
from .llms.azure.common_utils import AzureOpenAIError as _AzureOpenAIError
_globals["AzureOpenAIError"] = _AzureOpenAIError
return _globals["AzureOpenAIError"]
# Lazy load openaiOSeriesConfig instance
if name == "openaiOSeriesConfig":
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
if "openaiOSeriesConfig" not in _globals:
# Import the config class and instantiate it
config_class = __getattr__("OpenAIOSeriesConfig")
_globals["openaiOSeriesConfig"] = config_class()
return _globals["openaiOSeriesConfig"]
# Lazy load other config instances
_config_instances = {
"openAIGPTConfig": "OpenAIGPTConfig",
"openAIGPTAudioConfig": "OpenAIGPTAudioConfig",
"openAIGPT5Config": "OpenAIGPT5Config",
"nvidiaNimConfig": "NvidiaNimConfig",
"nvidiaNimEmbeddingConfig": "NvidiaNimEmbeddingConfig",
}
if name in _config_instances:
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
if name not in _globals:
# Import the config class and instantiate it
config_class = __getattr__(_config_instances[name])
_globals[name] = config_class()
return _globals[name]
# Handle OpenAIO1Config alias
if name == "OpenAIO1Config":
return __getattr__("OpenAIOSeriesConfig")
# Lazy load provider_list
if name == "provider_list":
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
# Check if already cached
if "provider_list" not in _globals:
# LlmProviders is eagerly imported above, so we can import it directly
from litellm.types.utils import LlmProviders
_globals["provider_list"] = list(LlmProviders)
return _globals["provider_list"]
# Lazy load priority_reservation_settings instance
if name == "priority_reservation_settings":
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
# Check if already cached
if "priority_reservation_settings" not in _globals:
# Import the class and instantiate it
PriorityReservationSettings = __getattr__("PriorityReservationSettings")
_globals["priority_reservation_settings"] = PriorityReservationSettings()
return _globals["priority_reservation_settings"]
# Lazy load logging_callback_manager instance
if name == "logging_callback_manager":
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
# Check if already cached
if "logging_callback_manager" not in _globals:
# Import the class and instantiate it
LoggingCallbackManager = __getattr__("LoggingCallbackManager")
_globals["logging_callback_manager"] = LoggingCallbackManager()
return _globals["logging_callback_manager"]
# Lazy load _service_logger module
if name == "_service_logger":
from ._lazy_imports import _get_litellm_globals
_globals = _get_litellm_globals()
# Check if already cached
if "_service_logger" not in _globals:
# Import the module lazily
import litellm._service_logger
_globals["_service_logger"] = litellm._service_logger
return _globals["_service_logger"]
# Lazy load evals module functions
if name in [
"acreate_eval",
"alist_evals",
"aget_eval",
"aupdate_eval",
"adelete_eval",
"acancel_eval",
"create_eval",
"list_evals",
"get_eval",
"update_eval",
"delete_eval",
"cancel_eval",
"acreate_run",
"alist_runs",
"aget_run",
"acancel_run",
"adelete_run",
"create_run",
"list_runs",
"get_run",
"cancel_run",
"delete_run",
]:
from litellm.evals.main import (
acreate_eval,
alist_evals,
aget_eval,
aupdate_eval,
adelete_eval,
acancel_eval,
create_eval,
list_evals,
get_eval,
update_eval,
delete_eval,
cancel_eval,
acreate_run,
alist_runs,
aget_run,
acancel_run,
adelete_run,
create_run,
list_runs,
get_run,
cancel_run,
delete_run,
)
return locals()[name]
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
# ALL_LITELLM_RESPONSE_TYPES is lazy-loaded via __getattr__ to avoid loading utils at import time