chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,298 @@
|
||||
"""
|
||||
Support for OpenAI's `/v1/chat/completions` endpoint.
|
||||
|
||||
Calls done in OpenAI/openai.py as OpenRouter is openai-compatible.
|
||||
|
||||
Docs: https://openrouter.ai/docs/parameters
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, AsyncIterator, Iterator, List, Optional, Tuple, Union, cast
|
||||
|
||||
import httpx
|
||||
import litellm
|
||||
|
||||
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolParam
|
||||
from litellm.types.llms.openrouter import OpenRouterErrorMessage
|
||||
from litellm.types.utils import ModelResponse, ModelResponseStream
|
||||
|
||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
from ..common_utils import OpenRouterException
|
||||
|
||||
|
||||
class CacheControlSupportedModels(str, Enum):
|
||||
"""Models that support cache_control in content blocks."""
|
||||
|
||||
CLAUDE = "claude"
|
||||
GEMINI = "gemini"
|
||||
MINIMAX = "minimax"
|
||||
GLM = "glm"
|
||||
ZAI = "z-ai"
|
||||
|
||||
|
||||
class OpenrouterConfig(OpenAIGPTConfig):
|
||||
def get_supported_openai_params(self, model: str) -> list:
|
||||
"""
|
||||
Allow reasoning parameters for models flagged as reasoning-capable.
|
||||
"""
|
||||
supported_params = super().get_supported_openai_params(model=model)
|
||||
try:
|
||||
if litellm.supports_reasoning(
|
||||
model=model, custom_llm_provider="openrouter"
|
||||
) or litellm.supports_reasoning(model=model):
|
||||
supported_params.append("reasoning_effort")
|
||||
supported_params.append("thinking")
|
||||
except Exception:
|
||||
pass
|
||||
return list(dict.fromkeys(supported_params))
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
mapped_openai_params = super().map_openai_params(
|
||||
non_default_params, optional_params, model, drop_params
|
||||
)
|
||||
|
||||
# OpenRouter-only parameters
|
||||
extra_body = {}
|
||||
transforms = non_default_params.pop("transforms", None)
|
||||
models = non_default_params.pop("models", None)
|
||||
route = non_default_params.pop("route", None)
|
||||
if transforms is not None:
|
||||
extra_body["transforms"] = transforms
|
||||
if models is not None:
|
||||
extra_body["models"] = models
|
||||
if route is not None:
|
||||
extra_body["route"] = route
|
||||
mapped_openai_params[
|
||||
"extra_body"
|
||||
] = extra_body # openai client supports `extra_body` param
|
||||
return mapped_openai_params
|
||||
|
||||
def _supports_cache_control_in_content(self, model: str) -> bool:
|
||||
"""
|
||||
Check if the model supports cache_control in content blocks.
|
||||
|
||||
Returns:
|
||||
bool: True if model supports cache_control (Claude or Gemini models)
|
||||
"""
|
||||
model_lower = model.lower()
|
||||
return any(
|
||||
supported_model.value in model_lower
|
||||
for supported_model in CacheControlSupportedModels
|
||||
)
|
||||
|
||||
def remove_cache_control_flag_from_messages_and_tools(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
tools: Optional[List["ChatCompletionToolParam"]] = None,
|
||||
) -> Tuple[List[AllMessageValues], Optional[List["ChatCompletionToolParam"]]]:
|
||||
if self._supports_cache_control_in_content(model):
|
||||
return messages, tools
|
||||
else:
|
||||
return super().remove_cache_control_flag_from_messages_and_tools(
|
||||
model, messages, tools
|
||||
)
|
||||
|
||||
def _move_cache_control_to_content(
|
||||
self, messages: List[AllMessageValues]
|
||||
) -> List[AllMessageValues]:
|
||||
"""
|
||||
Move cache_control from message level to content blocks.
|
||||
OpenRouter requires cache_control to be inside content blocks, not at message level.
|
||||
|
||||
To avoid exceeding Anthropic's limit of 4 cache breakpoints, cache_control is only
|
||||
added to the LAST content block in each message.
|
||||
"""
|
||||
transformed_messages: List[AllMessageValues] = []
|
||||
for message in messages:
|
||||
message_dict = dict(message)
|
||||
cache_control = message_dict.pop("cache_control", None)
|
||||
|
||||
if cache_control is not None:
|
||||
content = message_dict.get("content")
|
||||
|
||||
if isinstance(content, list):
|
||||
# Content is already a list, add cache_control only to the last block
|
||||
if len(content) > 0:
|
||||
content_copy = []
|
||||
for i, block in enumerate(content):
|
||||
block_dict = dict(block)
|
||||
# Only add cache_control to the last content block
|
||||
if i == len(content) - 1:
|
||||
block_dict["cache_control"] = cache_control
|
||||
content_copy.append(block_dict)
|
||||
message_dict["content"] = content_copy
|
||||
else:
|
||||
# Content is a string, convert to structured format
|
||||
message_dict["content"] = [
|
||||
{
|
||||
"type": "text",
|
||||
"text": content,
|
||||
"cache_control": cache_control,
|
||||
}
|
||||
]
|
||||
|
||||
# Cast back to AllMessageValues after modification
|
||||
transformed_messages.append(cast(AllMessageValues, message_dict))
|
||||
|
||||
return transformed_messages
|
||||
|
||||
def transform_request(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
headers: dict,
|
||||
) -> dict:
|
||||
"""
|
||||
Transform the overall request to be sent to the API.
|
||||
|
||||
Returns:
|
||||
dict: The transformed request. Sent as the body of the API call.
|
||||
"""
|
||||
if self._supports_cache_control_in_content(model):
|
||||
messages = self._move_cache_control_to_content(messages)
|
||||
|
||||
extra_body = optional_params.pop("extra_body", {})
|
||||
response = super().transform_request(
|
||||
model, messages, optional_params, litellm_params, headers
|
||||
)
|
||||
response.update(extra_body)
|
||||
|
||||
# ALWAYS add usage parameter to get cost data from OpenRouter
|
||||
# This ensures cost tracking works for all OpenRouter models
|
||||
if "usage" not in response:
|
||||
response["usage"] = {"include": True}
|
||||
|
||||
return response
|
||||
|
||||
def transform_response(
|
||||
self,
|
||||
model: str,
|
||||
raw_response: httpx.Response,
|
||||
model_response: ModelResponse,
|
||||
logging_obj: Any,
|
||||
request_data: dict,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
encoding: Any,
|
||||
api_key: Optional[str] = None,
|
||||
json_mode: Optional[bool] = None,
|
||||
) -> ModelResponse:
|
||||
"""
|
||||
Transform the response from OpenRouter API.
|
||||
|
||||
Extracts cost information from response headers if available.
|
||||
|
||||
Returns:
|
||||
ModelResponse: The transformed response with cost information.
|
||||
"""
|
||||
# Call parent transform_response to get the standard ModelResponse
|
||||
model_response = super().transform_response(
|
||||
model=model,
|
||||
raw_response=raw_response,
|
||||
model_response=model_response,
|
||||
logging_obj=logging_obj,
|
||||
request_data=request_data,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
encoding=encoding,
|
||||
api_key=api_key,
|
||||
json_mode=json_mode,
|
||||
)
|
||||
|
||||
# Extract cost from OpenRouter response body
|
||||
# OpenRouter returns cost information in the usage object when usage.include=true
|
||||
try:
|
||||
response_json = raw_response.json()
|
||||
if "usage" in response_json and response_json["usage"]:
|
||||
response_cost = response_json["usage"].get("cost")
|
||||
if response_cost is not None:
|
||||
# Store cost in hidden params for the cost calculator to use
|
||||
if not hasattr(model_response, "_hidden_params"):
|
||||
model_response._hidden_params = {}
|
||||
if "additional_headers" not in model_response._hidden_params:
|
||||
model_response._hidden_params["additional_headers"] = {}
|
||||
model_response._hidden_params["additional_headers"][
|
||||
"llm_provider-x-litellm-response-cost"
|
||||
] = float(response_cost)
|
||||
except Exception:
|
||||
# If we can't extract cost, continue without it - don't fail the response
|
||||
pass
|
||||
|
||||
return model_response
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
return OpenRouterException(
|
||||
message=error_message,
|
||||
status_code=status_code,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def get_model_response_iterator(
|
||||
self,
|
||||
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
||||
sync_stream: bool,
|
||||
json_mode: Optional[bool] = False,
|
||||
) -> Any:
|
||||
return OpenRouterChatCompletionStreamingHandler(
|
||||
streaming_response=streaming_response,
|
||||
sync_stream=sync_stream,
|
||||
json_mode=json_mode,
|
||||
)
|
||||
|
||||
|
||||
class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
|
||||
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||
try:
|
||||
## HANDLE ERROR IN CHUNK ##
|
||||
if "error" in chunk:
|
||||
error_chunk = chunk["error"]
|
||||
error_message = OpenRouterErrorMessage(
|
||||
message="Message: {}, Metadata: {}, User ID: {}".format(
|
||||
error_chunk["message"],
|
||||
error_chunk.get("metadata", {}),
|
||||
error_chunk.get("user_id", ""),
|
||||
),
|
||||
code=error_chunk["code"],
|
||||
metadata=error_chunk.get("metadata", {}),
|
||||
)
|
||||
raise OpenRouterException(
|
||||
message=error_message["message"],
|
||||
status_code=error_message["code"],
|
||||
headers=error_message["metadata"].get("headers", {}),
|
||||
)
|
||||
|
||||
new_choices = []
|
||||
for choice in chunk["choices"]:
|
||||
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
|
||||
new_choices.append(choice)
|
||||
return ModelResponseStream(
|
||||
id=chunk["id"],
|
||||
object="chat.completion.chunk",
|
||||
created=chunk["created"],
|
||||
usage=chunk.get("usage"),
|
||||
model=chunk["model"],
|
||||
choices=new_choices,
|
||||
)
|
||||
except KeyError as e:
|
||||
raise OpenRouterException(
|
||||
message=f"KeyError: {e}, Got unexpected response from OpenRouter: {chunk}",
|
||||
status_code=400,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
Reference in New Issue
Block a user