chore: initial snapshot for gitea/github upload

This commit is contained in:
Your Name
2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions

View File

@@ -0,0 +1,160 @@
"""Support for Azure OpenAI gpt-5 model family."""
from typing import List
import litellm
from litellm.exceptions import UnsupportedParamsError
from litellm.llms.openai.chat.gpt_5_transformation import (
OpenAIGPT5Config,
_get_effort_level,
)
from litellm.types.llms.openai import AllMessageValues
from .gpt_transformation import AzureOpenAIConfig
class AzureOpenAIGPT5Config(AzureOpenAIConfig, OpenAIGPT5Config):
"""Azure specific handling for gpt-5 models."""
GPT5_SERIES_ROUTE = "gpt5_series/"
@classmethod
def _supports_reasoning_effort_level(cls, model: str, level: str) -> bool:
"""Override to handle gpt5_series/ prefix used for Azure routing.
The parent class calls ``_supports_factory(model, custom_llm_provider=None)``
which fails to resolve ``gpt5_series/gpt-5.1`` to the correct Azure model
entry. Strip the prefix and prepend ``azure/`` so the lookup finds
``azure/gpt-5.1`` in model_prices_and_context_window.json.
"""
if model.startswith(cls.GPT5_SERIES_ROUTE):
model = "azure/" + model[len(cls.GPT5_SERIES_ROUTE) :]
elif not model.startswith("azure/"):
model = "azure/" + model
return super()._supports_reasoning_effort_level(model, level)
@classmethod
def is_model_gpt_5_model(cls, model: str) -> bool:
"""Check if the Azure model string refers to a gpt-5 variant.
Accepts both explicit gpt-5 model names and the ``gpt5_series/`` prefix
used for manual routing.
"""
# gpt-5-chat* is a chat model and shouldn't go through GPT-5 reasoning restrictions.
return (
"gpt-5" in model and "gpt-5-chat" not in model
) or "gpt5_series" in model
def get_supported_openai_params(self, model: str) -> List[str]:
"""Get supported parameters for Azure OpenAI GPT-5 models.
Azure OpenAI GPT-5.2/5.4 models support logprobs, unlike OpenAI's GPT-5.
This overrides the parent class to add logprobs support back for gpt-5.2+.
Reference:
- Tested with Azure OpenAI GPT-5.2 (api-version: 2025-01-01-preview)
- Azure returns logprobs successfully despite Microsoft's general
documentation stating reasoning models don't support it.
"""
params = OpenAIGPT5Config.get_supported_openai_params(self, model=model)
# Azure supports tool_choice for GPT-5 deployments, but the base GPT-5 config
# can drop it when the deployment name isn't in the OpenAI model registry.
if "tool_choice" not in params:
params.append("tool_choice")
# Only gpt-5.2+ has been verified to support logprobs on Azure.
# The base OpenAI class includes logprobs for gpt-5.1+, but Azure
# hasn't verified support for gpt-5.1, so remove them unless gpt-5.2/5.4+.
if self._supports_reasoning_effort_level(
model, "none"
) and not self.is_model_gpt_5_2_model(model):
params = [p for p in params if p not in ["logprobs", "top_logprobs"]]
elif self.is_model_gpt_5_2_model(model):
azure_supported_params = ["logprobs", "top_logprobs"]
params.extend(azure_supported_params)
return params
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
api_version: str = "",
) -> dict:
reasoning_effort_value = non_default_params.get(
"reasoning_effort"
) or optional_params.get("reasoning_effort")
effective_effort = _get_effort_level(reasoning_effort_value)
# gpt-5.1/5.2/5.4 support reasoning_effort='none', but other gpt-5 models don't
# See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning
supports_none = self._supports_reasoning_effort_level(model, "none")
if effective_effort == "none" and not supports_none:
if litellm.drop_params is True or (
drop_params is not None and drop_params is True
):
non_default_params = non_default_params.copy()
optional_params = optional_params.copy()
if (
_get_effort_level(non_default_params.get("reasoning_effort"))
== "none"
):
non_default_params.pop("reasoning_effort")
if _get_effort_level(optional_params.get("reasoning_effort")) == "none":
optional_params.pop("reasoning_effort")
else:
raise UnsupportedParamsError(
status_code=400,
message=(
"Azure OpenAI does not support reasoning_effort='none' for this model. "
"Supported values are: 'low', 'medium', and 'high'. "
"To drop this parameter, set `litellm.drop_params=True` or for proxy:\n\n"
"`litellm_settings:\n drop_params: true`\n"
"Issue: https://github.com/BerriAI/litellm/issues/16704"
),
)
result = OpenAIGPT5Config.map_openai_params(
self,
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
drop_params=drop_params,
)
# Only drop reasoning_effort='none' for models that don't support it
result_effort = _get_effort_level(result.get("reasoning_effort"))
if result_effort == "none" and not supports_none:
result.pop("reasoning_effort")
# Azure Chat Completions: gpt-5.4+ does not support tools + reasoning together.
# Drop reasoning_effort when both are present (OpenAI routes to Responses API; Azure does not).
if self.is_model_gpt_5_4_plus_model(model):
has_tools = bool(
non_default_params.get("tools") or optional_params.get("tools")
)
if has_tools and result_effort not in (None, "none"):
result.pop("reasoning_effort", None)
return result
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
model = model.replace(self.GPT5_SERIES_ROUTE, "")
return super().transform_request(
model=model,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
headers=headers,
)

View File

@@ -0,0 +1,334 @@
from typing import TYPE_CHECKING, Any, List, Optional, Union
from httpx._models import Headers, Response
import litellm
from litellm.litellm_core_utils.prompt_templates.factory import (
convert_to_azure_openai_messages,
)
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.types.llms.azure import (
API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT,
API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT,
)
from litellm.types.utils import ModelResponse
from ....exceptions import UnsupportedParamsError
from ....types.llms.openai import AllMessageValues
from ...base_llm.chat.transformation import BaseConfig
from ..common_utils import AzureOpenAIError
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
LoggingClass = LiteLLMLoggingObj
else:
LoggingClass = Any
class AzureOpenAIConfig(BaseConfig):
"""
Reference: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions
The class `AzureOpenAIConfig` provides configuration for the OpenAI's Chat API interface, for use with Azure. Below are the parameters::
- `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
- `function_call` (string or object): This optional parameter controls how the model calls functions.
- `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
- `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
- `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
- `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
- `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
- `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
- `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
- `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
"""
def __init__(
self,
frequency_penalty: Optional[int] = None,
function_call: Optional[Union[str, dict]] = None,
functions: Optional[list] = None,
logit_bias: Optional[dict] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return super().get_config()
def get_supported_openai_params(self, model: str) -> List[str]:
return [
"temperature",
"n",
"stream",
"stream_options",
"stop",
"max_tokens",
"max_completion_tokens",
"tools",
"tool_choice",
"presence_penalty",
"frequency_penalty",
"logit_bias",
"user",
"function_call",
"functions",
"tools",
"tool_choice",
"top_p",
"logprobs",
"top_logprobs",
"response_format",
"seed",
"extra_headers",
"parallel_tool_calls",
"prediction",
"modalities",
"audio",
"web_search_options",
"prompt_cache_key",
"store",
]
def _is_response_format_supported_model(self, model: str) -> bool:
"""
Determines if the model supports response_format.
- Handles Azure deployment names (e.g., azure/gpt-4.1-suffix)
- Normalizes model names (e.g., gpt-4-1 -> gpt-4.1)
- Strips deployment-specific suffixes
- Passes provider to supports_response_schema
- Backwards compatible with previous model name patterns
"""
import re
# Normalize model name: e.g., gpt-3-5-turbo -> gpt-3.5-turbo
normalized_model = re.sub(r"(\d)-(\d)", r"\1.\2", model)
if "gpt-3.5" in normalized_model or "gpt-35" in model:
return False
return True
def _is_response_format_supported_api_version(
self, api_version_year: str, api_version_month: str
) -> bool:
"""
- check if api_version is supported for response_format
- returns True if the API version is equal to or newer than the supported version
"""
api_year = int(api_version_year)
api_month = int(api_version_month)
supported_year = int(API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT)
supported_month = int(API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT)
# If the year is greater than supported year, it's definitely supported
if api_year > supported_year:
return True
# If the year is less than supported year, it's not supported
elif api_year < supported_year:
return False
# If same year, check if month is >= supported month
else:
return api_month >= supported_month
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
api_version: str = "",
) -> dict:
supported_openai_params = self.get_supported_openai_params(model)
api_version_times = api_version.split("-")
if len(api_version_times) >= 3:
api_version_year = api_version_times[0]
api_version_month = api_version_times[1]
api_version_day = api_version_times[2]
else:
api_version_year = None
api_version_month = None
api_version_day = None
for param, value in non_default_params.items():
if param == "tool_choice":
"""
This parameter requires API version 2023-12-01-preview or later
tool_choice='required' is not supported as of 2024-05-01-preview
"""
## check if api version supports this param ##
if (
api_version_year is None
or api_version_month is None
or api_version_day is None
):
optional_params["tool_choice"] = value
else:
if (
api_version_year < "2023"
or (api_version_year == "2023" and api_version_month < "12")
or (
api_version_year == "2023"
and api_version_month == "12"
and api_version_day < "01"
)
):
if litellm.drop_params is True or (
drop_params is not None and drop_params is True
):
pass
else:
raise UnsupportedParamsError(
status_code=400,
message=f"""Azure does not support 'tool_choice', for api_version={api_version}. Bump your API version to '2023-12-01-preview' or later. This parameter requires 'api_version="2023-12-01-preview"' or later. Azure API Reference: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions""",
)
elif value == "required" and (
api_version_year == "2024" and api_version_month <= "05"
): ## check if tool_choice value is supported ##
if litellm.drop_params is True or (
drop_params is not None and drop_params is True
):
pass
else:
raise UnsupportedParamsError(
status_code=400,
message=f"Azure does not support '{value}' as a {param} param, for api_version={api_version}. To drop 'tool_choice=required' for calls with this Azure API version, set `litellm.drop_params=True` or for proxy:\n\n`litellm_settings:\n drop_params: true`\nAzure API Reference: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions",
)
else:
optional_params["tool_choice"] = value
elif param == "response_format" and isinstance(value, dict):
_is_response_format_supported_model = (
self._is_response_format_supported_model(model)
)
if api_version_year is None or api_version_month is None:
is_response_format_supported_api_version = True
else:
is_response_format_supported_api_version = (
self._is_response_format_supported_api_version(
api_version_year, api_version_month
)
)
is_response_format_supported = (
is_response_format_supported_api_version
and _is_response_format_supported_model
)
optional_params = self._add_response_format_to_tools(
optional_params=optional_params,
value=value,
is_response_format_supported=is_response_format_supported,
)
elif param == "tools" and isinstance(value, list):
optional_params.setdefault("tools", [])
optional_params["tools"].extend(value)
elif param in supported_openai_params:
optional_params[param] = value
return optional_params
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
messages = convert_to_azure_openai_messages(messages)
return {
"model": model,
"messages": messages,
**optional_params,
}
def transform_response(
self,
model: str,
raw_response: Response,
model_response: ModelResponse,
logging_obj: LoggingClass,
request_data: dict,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
raise NotImplementedError(
"Azure OpenAI handler.py has custom logic for transforming response, as it uses the OpenAI SDK."
)
def get_mapped_special_auth_params(self) -> dict:
return {"token": "azure_ad_token"}
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "token":
optional_params["azure_ad_token"] = value
return optional_params
def get_eu_regions(self) -> List[str]:
"""
Source: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-model-availability
"""
return ["europe", "sweden", "switzerland", "france", "uk"]
def get_us_regions(self) -> List[str]:
"""
Source: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-model-availability
"""
return [
"us",
"eastus",
"eastus2",
"eastus2euap",
"eastus3",
"southcentralus",
"westus",
"westus2",
"westus3",
"westus4",
]
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, Headers]
) -> BaseLLMException:
return AzureOpenAIError(
message=error_message, status_code=status_code, headers=headers
)
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
raise NotImplementedError(
"Azure OpenAI has custom logic for validating environment, as it uses the OpenAI SDK."
)

View File

@@ -0,0 +1,77 @@
"""
Handler file for calls to Azure OpenAI's o1/o3 family of models
Written separately to handle faking streaming for o1 and o3 models.
"""
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
import httpx
from litellm.types.utils import ModelResponse
from ...openai.openai import OpenAIChatCompletion
from ..common_utils import BaseAzureLLM
if TYPE_CHECKING:
from aiohttp import ClientSession
class AzureOpenAIO1ChatCompletion(BaseAzureLLM, OpenAIChatCompletion):
def completion(
self,
model_response: ModelResponse,
timeout: Union[float, httpx.Timeout],
optional_params: dict,
litellm_params: dict,
logging_obj: Any,
model: Optional[str] = None,
messages: Optional[list] = None,
print_verbose: Optional[Callable] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
dynamic_params: Optional[bool] = None,
azure_ad_token: Optional[str] = None,
acompletion: bool = False,
logger_fn=None,
headers: Optional[dict] = None,
custom_prompt_dict: dict = {},
client=None,
organization: Optional[str] = None,
custom_llm_provider: Optional[str] = None,
drop_params: Optional[bool] = None,
shared_session: Optional["ClientSession"] = None,
):
client = self.get_azure_openai_client(
litellm_params=litellm_params,
api_key=api_key,
api_base=api_base,
api_version=api_version,
client=client,
_is_async=acompletion,
)
return super().completion(
model_response=model_response,
timeout=timeout,
optional_params=optional_params,
litellm_params=litellm_params,
logging_obj=logging_obj,
model=model,
messages=messages,
print_verbose=print_verbose,
api_key=api_key,
api_base=api_base,
api_version=api_version,
dynamic_params=dynamic_params,
azure_ad_token=azure_ad_token,
acompletion=acompletion,
logger_fn=logger_fn,
headers=headers,
custom_prompt_dict=custom_prompt_dict,
client=client,
organization=organization,
custom_llm_provider=custom_llm_provider,
drop_params=drop_params,
shared_session=shared_session,
)

View File

@@ -0,0 +1,123 @@
"""
Support for o1 and o3 model families
https://platform.openai.com/docs/guides/reasoning
Translations handled by LiteLLM:
- modalities: image => drop param (if user opts in to dropping param)
- role: system ==> translate to role 'user'
- streaming => faked by LiteLLM
- Tools, response_format => drop param (if user opts in to dropping param)
- Logprobs => drop param (if user opts in to dropping param)
- Temperature => drop param (if user opts in to dropping param)
"""
from typing import List, Optional
import litellm
from litellm import verbose_logger
from litellm.types.llms.openai import AllMessageValues
from litellm.utils import get_model_info, supports_reasoning
from ...openai.chat.o_series_transformation import OpenAIOSeriesConfig
class AzureOpenAIO1Config(OpenAIOSeriesConfig):
def get_supported_openai_params(self, model: str) -> list:
"""
Get the supported OpenAI params for the Azure O-Series models
"""
all_openai_params = litellm.OpenAIGPTConfig().get_supported_openai_params(
model=model
)
non_supported_params = [
"logprobs",
"top_p",
"presence_penalty",
"frequency_penalty",
"top_logprobs",
]
o_series_only_param = self._get_o_series_only_params(model)
all_openai_params.extend(o_series_only_param)
return [
param for param in all_openai_params if param not in non_supported_params
]
def _get_o_series_only_params(self, model: str) -> list:
"""
Helper function to get the o-series only params for the model
- reasoning_effort
"""
o_series_only_param = []
#########################################################
# Case 1: If the model is recognized and in litellm model cost map
# then check if it supports reasoning
#########################################################
if model in litellm.model_list_set:
if supports_reasoning(model):
o_series_only_param.append("reasoning_effort")
#########################################################
# Case 2: If the model is not recognized, then we assume it supports reasoning
# This is critical because several users tend to use custom deployment names
# for azure o-series models.
#########################################################
else:
o_series_only_param.append("reasoning_effort")
return o_series_only_param
def should_fake_stream(
self,
model: Optional[str],
stream: Optional[bool],
custom_llm_provider: Optional[str] = None,
) -> bool:
"""
Currently no Azure O Series models support native streaming.
"""
if stream is not True:
return False
if (
model and "o3" in model
): # o3 models support streaming - https://github.com/BerriAI/litellm/issues/8274
return False
if model is not None:
try:
model_info = get_model_info(
model=model, custom_llm_provider=custom_llm_provider
) # allow user to override default with model_info={"supports_native_streaming": true}
if (
model_info.get("supports_native_streaming") is True
): # allow user to override default with model_info={"supports_native_streaming": true}
return False
except Exception as e:
verbose_logger.debug(
f"Error getting model info in AzureOpenAIO1Config: {e}"
)
return True
def is_o_series_model(self, model: str) -> bool:
return "o1" in model or "o3" in model or "o4" in model or "o_series/" in model
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
model = model.replace(
"o_series/", ""
) # handle o_series/my-random-deployment-name
return super().transform_request(
model, messages, optional_params, litellm_params, headers
)