Files
lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/llms/watsonx/chat/transformation.py

251 lines
9.8 KiB
Python
Raw Normal View History

"""
Translation from OpenAI's `/chat/completions` endpoint to IBM WatsonX's `/text/chat` endpoint.
Docs: https://cloud.ibm.com/apidocs/watsonx-ai#text-chat
"""
from typing import Dict, List, Optional, Tuple, Union
from litellm import verbose_logger
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.watsonx import (
WatsonXAIEndpoint,
WatsonXModelPattern,
)
from ....utils import _remove_additional_properties, _remove_strict_from_schema
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
from ..common_utils import IBMWatsonXMixin
class IBMWatsonXChatConfig(IBMWatsonXMixin, OpenAIGPTConfig):
def get_supported_openai_params(self, model: str) -> List:
return [
"temperature", # equivalent to temperature
"max_tokens", # equivalent to max_new_tokens
"top_p", # equivalent to top_p
"frequency_penalty", # equivalent to repetition_penalty
"stop", # equivalent to stop_sequences
"seed", # equivalent to random_seed
"stream", # equivalent to stream
"tools",
"tool_choice", # equivalent to tool_choice + tool_choice_option
"logprobs",
"top_logprobs",
"n",
"presence_penalty",
"response_format",
"reasoning_effort",
]
def is_tool_choice_option(self, tool_choice: Optional[Union[str, dict]]) -> bool:
if tool_choice is None:
return False
if isinstance(tool_choice, str):
return tool_choice in ["auto", "none", "required"]
return False
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
## TOOLS ##
_tools = non_default_params.pop("tools", None)
if _tools is not None:
# remove 'additionalProperties' from tools
_tools = _remove_additional_properties(_tools)
# remove 'strict' from tools
_tools = _remove_strict_from_schema(_tools)
if _tools is not None:
non_default_params["tools"] = _tools
## TOOL CHOICE ##
_tool_choice = non_default_params.pop("tool_choice", None)
if self.is_tool_choice_option(_tool_choice):
optional_params["tool_choice_option"] = _tool_choice
elif _tool_choice is not None:
optional_params["tool_choice"] = _tool_choice
return super().map_openai_params(
non_default_params, optional_params, model, drop_params
)
def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str]
) -> Tuple[Optional[str], Optional[str]]:
api_base = api_base or get_secret_str("HOSTED_VLLM_API_BASE") # type: ignore
dynamic_api_key = (
api_key or get_secret_str("HOSTED_VLLM_API_KEY") or ""
) # vllm does not require an api key
return api_base, dynamic_api_key
def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
) -> str:
url = self._get_base_url(api_base=api_base)
if model.startswith("deployment/"):
deployment_id = "/".join(model.split("/")[1:])
endpoint = (
WatsonXAIEndpoint.DEPLOYMENT_CHAT_STREAM.value
if stream
else WatsonXAIEndpoint.DEPLOYMENT_CHAT.value
)
endpoint = endpoint.format(deployment_id=deployment_id)
else:
endpoint = (
WatsonXAIEndpoint.CHAT_STREAM.value
if stream
else WatsonXAIEndpoint.CHAT.value
)
url = url.rstrip("/") + endpoint
## add api version
url = self._add_api_version_to_url(
url=url, api_version=optional_params.pop("api_version", None)
)
return url
@staticmethod
def _apply_prompt_template_core(
model: str, messages: List[Dict[str, str]], hf_template_fn
) -> Optional[str]:
"""Core logic for applying prompt templates"""
from litellm.litellm_core_utils.prompt_templates.factory import (
custom_prompt,
ibm_granite_pt,
mistral_instruct_pt,
)
if WatsonXModelPattern.GRANITE_CHAT.value in model:
return ibm_granite_pt(messages=messages)
elif WatsonXModelPattern.IBM_MISTRAL.value in model:
return mistral_instruct_pt(messages=messages)
elif WatsonXModelPattern.GPT_OSS.value in model:
# Extract HuggingFace model name from watsonx/ or watsonx_text/ prefix
if "watsonx/" in model:
hf_model = model.split("watsonx/")[-1]
elif "watsonx_text/" in model:
hf_model = model.split("watsonx_text/")[-1]
else:
hf_model = model
try:
result = hf_template_fn(model=hf_model, messages=messages)
# Return result if it's truthy (not None and not empty string)
# The caller will handle None/empty by falling back to default
if result:
return result
except Exception:
# Silently fall through to return None - caller will handle fallback
pass
elif WatsonXModelPattern.LLAMA3_INSTRUCT.value in model:
return custom_prompt(
role_dict={
"system": {
"pre_message": "<|start_header_id|>system<|end_header_id|>\n",
"post_message": "<|eot_id|>",
},
"user": {
"pre_message": "<|start_header_id|>user<|end_header_id|>\n",
"post_message": "<|eot_id|>",
},
"assistant": {
"pre_message": "<|start_header_id|>assistant<|end_header_id|>\n",
"post_message": "<|eot_id|>",
},
},
messages=messages,
initial_prompt_value="<|begin_of_text|>",
final_prompt_value="<|start_header_id|>assistant<|end_header_id|>\n",
)
return None
@staticmethod
async def aapply_prompt_template(
model: str, messages: List[Dict[str, str]]
) -> Optional[str]:
"""Apply prompt template (async version)"""
import litellm
from litellm.litellm_core_utils.prompt_templates.factory import (
ahf_chat_template,
custom_prompt,
hf_chat_template,
ibm_granite_pt,
mistral_instruct_pt,
)
if WatsonXModelPattern.GRANITE_CHAT.value in model:
return ibm_granite_pt(messages=messages)
elif WatsonXModelPattern.IBM_MISTRAL.value in model:
return mistral_instruct_pt(messages=messages)
elif WatsonXModelPattern.GPT_OSS.value in model:
# Extract HuggingFace model name from watsonx/ or watsonx_text/ prefix
if "watsonx/" in model:
hf_model = model.split("watsonx/")[-1]
elif "watsonx_text/" in model:
hf_model = model.split("watsonx_text/")[-1]
else:
hf_model = model
try:
# Use sync if cached, async if not
if hf_model in litellm.known_tokenizer_config:
result = hf_chat_template(model=hf_model, messages=messages)
else:
result = await ahf_chat_template(model=hf_model, messages=messages)
# Return result if it's truthy (not None and not empty string)
# The caller (_aconvert_watsonx_messages_core) will handle None/empty by falling back to default
if result:
return result
except Exception as e:
# Log the exception for debugging but don't raise it
# The caller will fall back to default prompt factory
try:
verbose_logger.debug(
f"Failed to apply HuggingFace template for model {hf_model}: {e}"
)
except Exception:
# If logging fails, silently continue - don't break the flow
pass
elif WatsonXModelPattern.LLAMA3_INSTRUCT.value in model:
return custom_prompt(
role_dict={
"system": {
"pre_message": "<|start_header_id|>system<|end_header_id|>\n",
"post_message": "<|eot_id|>",
},
"user": {
"pre_message": "<|start_header_id|>user<|end_header_id|>\n",
"post_message": "<|eot_id|>",
},
"assistant": {
"pre_message": "<|start_header_id|>assistant<|end_header_id|>\n",
"post_message": "<|eot_id|>",
},
},
messages=messages,
initial_prompt_value="<|begin_of_text|>",
final_prompt_value="<|start_header_id|>assistant<|end_header_id|>\n",
)
return None
@staticmethod
def apply_prompt_template(
model: str, messages: List[Dict[str, str]]
) -> Optional[str]:
"""Apply prompt template (sync version)"""
from litellm.litellm_core_utils.prompt_templates.factory import (
hf_chat_template,
)
return IBMWatsonXChatConfig._apply_prompt_template_core(
model=model, messages=messages, hf_template_fn=hf_chat_template
)