""" Support for OpenAI's `/v1/chat/completions` endpoint. Calls done in OpenAI/openai.py as OpenRouter is openai-compatible. Docs: https://openrouter.ai/docs/parameters """ from enum import Enum from typing import Any, AsyncIterator, Iterator, List, Optional, Tuple, Union, cast import httpx import litellm from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator from litellm.llms.base_llm.chat.transformation import BaseLLMException from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolParam from litellm.types.llms.openrouter import OpenRouterErrorMessage from litellm.types.utils import ModelResponse, ModelResponseStream from ...openai.chat.gpt_transformation import OpenAIGPTConfig from ..common_utils import OpenRouterException class CacheControlSupportedModels(str, Enum): """Models that support cache_control in content blocks.""" CLAUDE = "claude" GEMINI = "gemini" MINIMAX = "minimax" GLM = "glm" ZAI = "z-ai" class OpenrouterConfig(OpenAIGPTConfig): def get_supported_openai_params(self, model: str) -> list: """ Allow reasoning parameters for models flagged as reasoning-capable. """ supported_params = super().get_supported_openai_params(model=model) try: if litellm.supports_reasoning( model=model, custom_llm_provider="openrouter" ) or litellm.supports_reasoning(model=model): supported_params.append("reasoning_effort") supported_params.append("thinking") except Exception: pass return list(dict.fromkeys(supported_params)) def map_openai_params( self, non_default_params: dict, optional_params: dict, model: str, drop_params: bool, ) -> dict: mapped_openai_params = super().map_openai_params( non_default_params, optional_params, model, drop_params ) # OpenRouter-only parameters extra_body = {} transforms = non_default_params.pop("transforms", None) models = non_default_params.pop("models", None) route = non_default_params.pop("route", None) if transforms is not None: extra_body["transforms"] = transforms if models is not None: extra_body["models"] = models if route is not None: extra_body["route"] = route mapped_openai_params[ "extra_body" ] = extra_body # openai client supports `extra_body` param return mapped_openai_params def _supports_cache_control_in_content(self, model: str) -> bool: """ Check if the model supports cache_control in content blocks. Returns: bool: True if model supports cache_control (Claude or Gemini models) """ model_lower = model.lower() return any( supported_model.value in model_lower for supported_model in CacheControlSupportedModels ) def remove_cache_control_flag_from_messages_and_tools( self, model: str, messages: List[AllMessageValues], tools: Optional[List["ChatCompletionToolParam"]] = None, ) -> Tuple[List[AllMessageValues], Optional[List["ChatCompletionToolParam"]]]: if self._supports_cache_control_in_content(model): return messages, tools else: return super().remove_cache_control_flag_from_messages_and_tools( model, messages, tools ) def _move_cache_control_to_content( self, messages: List[AllMessageValues] ) -> List[AllMessageValues]: """ Move cache_control from message level to content blocks. OpenRouter requires cache_control to be inside content blocks, not at message level. To avoid exceeding Anthropic's limit of 4 cache breakpoints, cache_control is only added to the LAST content block in each message. """ transformed_messages: List[AllMessageValues] = [] for message in messages: message_dict = dict(message) cache_control = message_dict.pop("cache_control", None) if cache_control is not None: content = message_dict.get("content") if isinstance(content, list): # Content is already a list, add cache_control only to the last block if len(content) > 0: content_copy = [] for i, block in enumerate(content): block_dict = dict(block) # Only add cache_control to the last content block if i == len(content) - 1: block_dict["cache_control"] = cache_control content_copy.append(block_dict) message_dict["content"] = content_copy else: # Content is a string, convert to structured format message_dict["content"] = [ { "type": "text", "text": content, "cache_control": cache_control, } ] # Cast back to AllMessageValues after modification transformed_messages.append(cast(AllMessageValues, message_dict)) return transformed_messages def transform_request( self, model: str, messages: List[AllMessageValues], optional_params: dict, litellm_params: dict, headers: dict, ) -> dict: """ Transform the overall request to be sent to the API. Returns: dict: The transformed request. Sent as the body of the API call. """ if self._supports_cache_control_in_content(model): messages = self._move_cache_control_to_content(messages) extra_body = optional_params.pop("extra_body", {}) response = super().transform_request( model, messages, optional_params, litellm_params, headers ) response.update(extra_body) # ALWAYS add usage parameter to get cost data from OpenRouter # This ensures cost tracking works for all OpenRouter models if "usage" not in response: response["usage"] = {"include": True} return response def transform_response( self, model: str, raw_response: httpx.Response, model_response: ModelResponse, logging_obj: Any, request_data: dict, messages: List[AllMessageValues], optional_params: dict, litellm_params: dict, encoding: Any, api_key: Optional[str] = None, json_mode: Optional[bool] = None, ) -> ModelResponse: """ Transform the response from OpenRouter API. Extracts cost information from response headers if available. Returns: ModelResponse: The transformed response with cost information. """ # Call parent transform_response to get the standard ModelResponse model_response = super().transform_response( model=model, raw_response=raw_response, model_response=model_response, logging_obj=logging_obj, request_data=request_data, messages=messages, optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, api_key=api_key, json_mode=json_mode, ) # Extract cost from OpenRouter response body # OpenRouter returns cost information in the usage object when usage.include=true try: response_json = raw_response.json() if "usage" in response_json and response_json["usage"]: response_cost = response_json["usage"].get("cost") if response_cost is not None: # Store cost in hidden params for the cost calculator to use if not hasattr(model_response, "_hidden_params"): model_response._hidden_params = {} if "additional_headers" not in model_response._hidden_params: model_response._hidden_params["additional_headers"] = {} model_response._hidden_params["additional_headers"][ "llm_provider-x-litellm-response-cost" ] = float(response_cost) except Exception: # If we can't extract cost, continue without it - don't fail the response pass return model_response def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] ) -> BaseLLMException: return OpenRouterException( message=error_message, status_code=status_code, headers=headers, ) def get_model_response_iterator( self, streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], sync_stream: bool, json_mode: Optional[bool] = False, ) -> Any: return OpenRouterChatCompletionStreamingHandler( streaming_response=streaming_response, sync_stream=sync_stream, json_mode=json_mode, ) class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator): def chunk_parser(self, chunk: dict) -> ModelResponseStream: try: ## HANDLE ERROR IN CHUNK ## if "error" in chunk: error_chunk = chunk["error"] error_message = OpenRouterErrorMessage( message="Message: {}, Metadata: {}, User ID: {}".format( error_chunk["message"], error_chunk.get("metadata", {}), error_chunk.get("user_id", ""), ), code=error_chunk["code"], metadata=error_chunk.get("metadata", {}), ) raise OpenRouterException( message=error_message["message"], status_code=error_message["code"], headers=error_message["metadata"].get("headers", {}), ) new_choices = [] for choice in chunk["choices"]: choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning") new_choices.append(choice) return ModelResponseStream( id=chunk["id"], object="chat.completion.chunk", created=chunk["created"], usage=chunk.get("usage"), model=chunk["model"], choices=new_choices, ) except KeyError as e: raise OpenRouterException( message=f"KeyError: {e}, Got unexpected response from OpenRouter: {chunk}", status_code=400, headers={"Content-Type": "application/json"}, ) except Exception as e: raise e