lijiaoqiao/llm-gateway-competitors/litellm-wheel-src/litellm/google_genai/adapters/transformation.py

import json
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union, cast

from litellm import verbose_logger
from litellm.litellm_core_utils.json_validation_rule import normalize_tool_schema
from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionAssistantMessage,
    ChatCompletionAssistantToolCall,
    ChatCompletionImageObject,
    ChatCompletionRequest,
    ChatCompletionSystemMessage,
    ChatCompletionTextObject,
    ChatCompletionToolCallFunctionChunk,
    ChatCompletionToolChoiceValues,
    ChatCompletionToolMessage,
    ChatCompletionToolParam,
    ChatCompletionUserMessage,
)
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import (
    AdapterCompletionStreamWrapper,
    Choices,
    ModelResponse,
    ModelResponseStream,
    StreamingChoices,
)


class GoogleGenAIStreamWrapper(AdapterCompletionStreamWrapper):
    """
    Wrapper for streaming Google GenAI generate_content responses.
    Transforms OpenAI streaming chunks to Google GenAI format.
    """

    sent_first_chunk: bool = False
    # State tracking for accumulating partial tool calls
    accumulated_tool_calls: Dict[str, Dict[str, Any]]

    def __init__(self, completion_stream: Any):
        self.sent_first_chunk = False
        self.accumulated_tool_calls = {}
        self._returned_response = False
        super().__init__(completion_stream)

    def __next__(self):
        try:
            if not hasattr(self.completion_stream, "__iter__"):
                if self._returned_response:
                    raise StopIteration
                self._returned_response = True
                return GoogleGenAIAdapter().translate_completion_to_generate_content(
                    self.completion_stream
                )

            for chunk in self.completion_stream:
                if chunk == "None" or chunk is None:
                    continue

                transformed_chunk = GoogleGenAIAdapter().translate_streaming_completion_to_generate_content(
                    chunk, self
                )
                if transformed_chunk:
                    return transformed_chunk

            raise StopIteration
        except StopIteration:
            raise
        except Exception:
            raise StopIteration

    async def __anext__(self):
        try:
            if not hasattr(self.completion_stream, "__aiter__"):
                if self._returned_response:
                    raise StopAsyncIteration
                self._returned_response = True
                return GoogleGenAIAdapter().translate_completion_to_generate_content(
                    self.completion_stream
                )

            async for chunk in self.completion_stream:
                if chunk == "None" or chunk is None:
                    continue

                transformed_chunk = GoogleGenAIAdapter().translate_streaming_completion_to_generate_content(
                    chunk, self
                )
                if transformed_chunk:
                    return transformed_chunk

            # After the stream is exhausted, check for any remaining accumulated tool calls
            if self.accumulated_tool_calls:
                try:
                    parts = []
                    for (
                        tool_call_index,
                        tool_call_data,
                    ) in self.accumulated_tool_calls.items():
                        try:
                            # For tool calls with no arguments, accumulated_args will be "", which is not valid JSON.
                            # We default to an empty JSON object in this case.
                            parsed_args = json.loads(
                                tool_call_data["arguments"] or "{}"
                            )
                            function_call_part = {
                                "functionCall": {
                                    "name": tool_call_data["name"]
                                    or "undefined_tool_name",
                                    "args": parsed_args,
                                }
                            }
                            parts.append(function_call_part)
                        except json.JSONDecodeError:
                            # This can happen if the stream is abruptly cut off mid-argument string.
                            verbose_logger.warning(
                                f"Could not parse tool call arguments at end of stream for index {tool_call_index}. "
                                f"Name: {tool_call_data['name']}. "
                                f"Partial args: {tool_call_data['arguments']}"
                            )
                            pass
                    if parts:
                        final_chunk = {
                            "candidates": [
                                {
                                    "content": {"parts": parts, "role": "model"},
                                    "finishReason": "STOP",
                                    "index": 0,
                                    "safetyRatings": [],
                                }
                            ]
                        }
                        return final_chunk
                finally:
                    # Ensure the accumulator is always cleared to prevent memory leaks
                    self.accumulated_tool_calls.clear()
            raise StopAsyncIteration
        except StopAsyncIteration:
            raise
        except Exception:
            raise StopAsyncIteration

    def google_genai_sse_wrapper(self) -> Iterator[bytes]:
        """
        Convert Google GenAI streaming chunks to Server-Sent Events format.
        """
        for chunk in self.completion_stream:
            if isinstance(chunk, dict):
                payload = f"data: {json.dumps(chunk)}\n\n"
                yield payload.encode()
            else:
                yield chunk

    async def async_google_genai_sse_wrapper(self) -> AsyncIterator[bytes]:
        """
        Async version of google_genai_sse_wrapper.
        """
        from litellm.types.utils import ModelResponseStream

        async for chunk in self.completion_stream:
            if isinstance(chunk, dict):
                payload = f"data: {json.dumps(chunk)}\n\n"
                yield payload.encode()
            elif isinstance(chunk, ModelResponseStream):
                # Transform OpenAI streaming chunk to Google GenAI format
                transformed_chunk = GoogleGenAIAdapter().translate_streaming_completion_to_generate_content(
                    chunk, self
                )

                if isinstance(transformed_chunk, dict):  # Only return non-empty chunks
                    payload = f"data: {json.dumps(transformed_chunk)}\n\n"
                    yield payload.encode()
                else:
                    # For empty chunks, continue to next iteration
                    continue
            else:
                # For other chunk types, yield them directly
                if hasattr(chunk, "encode"):
                    yield chunk.encode()
                else:
                    yield str(chunk).encode()


class GoogleGenAIAdapter:
    """Adapter for transforming Google GenAI generate_content requests to/from litellm.completion format"""

    def __init__(self) -> None:
        pass

    def translate_generate_content_to_completion(
        self,
        model: str,
        contents: Union[List[Dict[str, Any]], Dict[str, Any]],
        config: Optional[Dict[str, Any]] = None,
        litellm_params: Optional[GenericLiteLLMParams] = None,
        **kwargs,
    ) -> Dict[str, Any]:
        """
        Transform generate_content request to litellm completion format

        Args:
            model: The model name
            contents: Generate content contents (can be list or single dict)
            config: Optional config parameters
            **kwargs: Additional parameters from the original request

        Returns:
            Dict in OpenAI format
        """

        # Extract top-level fields from kwargs
        system_instruction = kwargs.get("systemInstruction") or kwargs.get(
            "system_instruction"
        )
        tools = kwargs.get("tools")
        tool_config = kwargs.get("toolConfig") or kwargs.get("tool_config")

        # Normalize contents to list format
        if isinstance(contents, dict):
            contents_list = [contents]
        else:
            contents_list = contents

        # Transform contents to OpenAI messages format
        messages = self._transform_contents_to_messages(
            contents_list, system_instruction=system_instruction
        )

        # Create base request as dict (which is compatible with ChatCompletionRequest)
        completion_request: ChatCompletionRequest = {
            "model": model,
            "messages": messages,
        }

        #########################################################
        # Supported OpenAI chat completion params
        # - temperature
        # - max_tokens
        # - top_p
        # - frequency_penalty
        # - presence_penalty
        # - stop
        # - tools
        # - tool_choice
        #########################################################

        # Add config parameters if provided
        if config:
            # Map common Google GenAI config parameters to OpenAI equivalents
            if "temperature" in config:
                completion_request["temperature"] = config["temperature"]
            if "maxOutputTokens" in config:
                completion_request["max_tokens"] = config["maxOutputTokens"]
            if "topP" in config:
                completion_request["top_p"] = config["topP"]
            if "topK" in config:
                # OpenAI doesn't have direct topK, but we can pass it as extra
                pass
            if "stopSequences" in config:
                completion_request["stop"] = config["stopSequences"]

        # Handle tools transformation
        if tools:
            # Check if tools are already in OpenAI format or Google GenAI format
            if isinstance(tools, list) and len(tools) > 0:
                # Tools are in Google GenAI format, transform them
                openai_tools = self._transform_google_genai_tools_to_openai(tools)

                if openai_tools:
                    completion_request["tools"] = openai_tools

        # Handle tool_config (tool choice)
        if tool_config:
            tool_choice = self._transform_google_genai_tool_config_to_openai(
                tool_config
            )
            if tool_choice:
                completion_request["tool_choice"] = tool_choice

        #########################################################
        # forward any litellm specific params
        #########################################################
        completion_request_dict = dict(completion_request)
        if litellm_params:
            completion_request_dict = self._add_generic_litellm_params_to_request(
                completion_request_dict=completion_request_dict,
                litellm_params=litellm_params,
            )

        return completion_request_dict

    def _add_generic_litellm_params_to_request(
        self,
        completion_request_dict: Dict[str, Any],
        litellm_params: Optional[GenericLiteLLMParams] = None,
    ) -> dict:
        """Add generic litellm params to request. e.g add api_base, api_key, api_version, etc.

        Args:
            completion_request_dict: Dict[str, Any]
            litellm_params: GenericLiteLLMParams

        Returns:
            Dict[str, Any]
        """
        allowed_fields = GenericLiteLLMParams.model_fields.keys()
        if litellm_params:
            litellm_dict = litellm_params.model_dump(exclude_none=True)
            for key, value in litellm_dict.items():
                if key in allowed_fields:
                    completion_request_dict[key] = value
        return completion_request_dict

    def translate_completion_output_params_streaming(
        self,
        completion_stream: Any,
    ) -> Union[AsyncIterator[bytes], None]:
        """Transform streaming completion output to Google GenAI format"""
        google_genai_wrapper = GoogleGenAIStreamWrapper(
            completion_stream=completion_stream
        )
        # Return the SSE-wrapped version for proper event formatting
        return google_genai_wrapper.async_google_genai_sse_wrapper()

    def _transform_google_genai_tools_to_openai(
        self,
        tools: List[Dict[str, Any]],
    ) -> List[ChatCompletionToolParam]:
        """Transform Google GenAI tools to OpenAI tools format"""
        openai_tools: List[Dict[str, Any]] = []

        for tool in tools:
            if "functionDeclarations" in tool:
                for func_decl in tool["functionDeclarations"]:
                    function_chunk: Dict[str, Any] = {
                        "name": func_decl.get("name", ""),
                    }

                    if "description" in func_decl:
                        function_chunk["description"] = func_decl["description"]
                    if "parametersJsonSchema" in func_decl:
                        function_chunk["parameters"] = func_decl["parametersJsonSchema"]

                    openai_tool = {"type": "function", "function": function_chunk}
                    openai_tools.append(openai_tool)

        # normalize the tool schemas
        normalized_tools = [normalize_tool_schema(tool) for tool in openai_tools]

        return cast(List[ChatCompletionToolParam], normalized_tools)

    def _transform_google_genai_tool_config_to_openai(
        self,
        tool_config: Dict[str, Any],
    ) -> Optional[ChatCompletionToolChoiceValues]:
        """Transform Google GenAI tool_config to OpenAI tool_choice"""
        function_calling_config = tool_config.get("functionCallingConfig", {})
        mode = function_calling_config.get("mode", "AUTO")

        mode_mapping = {"AUTO": "auto", "ANY": "required", "NONE": "none"}

        tool_choice = mode_mapping.get(mode, "auto")
        return cast(ChatCompletionToolChoiceValues, tool_choice)

    def _transform_contents_to_messages(
        self,
        contents: List[Dict[str, Any]],
        system_instruction: Optional[Dict[str, Any]] = None,
    ) -> List[AllMessageValues]:
        """Transform Google GenAI contents to OpenAI messages format"""
        messages: List[AllMessageValues] = []

        # Handle system instruction
        if system_instruction:
            system_parts = system_instruction.get("parts", [])
            if system_parts and "text" in system_parts[0]:
                messages.append(
                    ChatCompletionSystemMessage(
                        role="system", content=system_parts[0]["text"]
                    )
                )

        for content in contents:
            role = content.get("role", "user")
            parts = content.get("parts", [])

            if role == "user":
                # Handle user messages with potential function responses
                content_parts: List[
                    Union[ChatCompletionTextObject, ChatCompletionImageObject]
                ] = []
                tool_messages: List[ChatCompletionToolMessage] = []

                for part in parts:
                    if isinstance(part, dict):
                        if "text" in part:
                            content_parts.append(
                                cast(
                                    ChatCompletionTextObject,
                                    {"type": "text", "text": part["text"]},
                                )
                            )
                        elif "inline_data" in part:
                            # Handle Base64 image data
                            inline_data = part["inline_data"]
                            mime_type = inline_data.get("mime_type", "image/jpeg")
                            data = inline_data.get("data", "")
                            content_parts.append(
                                cast(
                                    ChatCompletionImageObject,
                                    {
                                        "type": "image_url",
                                        "image_url": {
                                            "url": f"data:{mime_type};base64,{data}"
                                        },
                                    },
                                )
                            )
                        elif "functionResponse" in part:
                            # Transform function response to tool message
                            func_response = part["functionResponse"]
                            tool_message = ChatCompletionToolMessage(
                                role="tool",
                                tool_call_id=f"call_{func_response.get('name', 'unknown')}",
                                content=json.dumps(func_response.get("response", {})),
                            )
                            tool_messages.append(tool_message)
                    elif isinstance(part, str):
                        content_parts.append(
                            cast(
                                ChatCompletionTextObject, {"type": "text", "text": part}
                            )
                        )

                # Add user message if there's content
                if content_parts:
                    # If only one text part, use simple string format for backward compatibility
                    if (
                        len(content_parts) == 1
                        and isinstance(content_parts[0], dict)
                        and content_parts[0].get("type") == "text"
                    ):
                        text_part = cast(ChatCompletionTextObject, content_parts[0])
                        messages.append(
                            ChatCompletionUserMessage(
                                role="user", content=text_part["text"]
                            )
                        )
                    else:
                        # Use multimodal format (array of content parts)
                        messages.append(
                            ChatCompletionUserMessage(
                                role="user", content=content_parts
                            )
                        )

                # Add tool messages
                messages.extend(tool_messages)

            elif role == "model":
                # Handle assistant messages with potential function calls
                combined_text = ""
                tool_calls: List[ChatCompletionAssistantToolCall] = []

                for part in parts:
                    if isinstance(part, dict):
                        if "text" in part:
                            combined_text += part["text"]
                        elif "functionCall" in part:
                            # Transform function call to tool call
                            func_call = part["functionCall"]
                            tool_call = ChatCompletionAssistantToolCall(
                                id=f"call_{func_call.get('name', 'unknown')}",
                                type="function",
                                function=ChatCompletionToolCallFunctionChunk(
                                    name=func_call.get("name", ""),
                                    arguments=json.dumps(func_call.get("args", {})),
                                ),
                            )
                            tool_calls.append(tool_call)
                    elif isinstance(part, str):
                        combined_text += part

                # Create assistant message
                if tool_calls:
                    assistant_message = ChatCompletionAssistantMessage(
                        role="assistant",
                        content=combined_text if combined_text else None,
                        tool_calls=tool_calls,
                    )
                else:
                    assistant_message = ChatCompletionAssistantMessage(
                        role="assistant",
                        content=combined_text if combined_text else None,
                    )

                messages.append(assistant_message)

        return messages

    def translate_completion_to_generate_content(
        self,
        response: ModelResponse,
    ) -> Dict[str, Any]:
        """
        Transform litellm completion response to Google GenAI generate_content format

        Args:
            response: ModelResponse from litellm.completion

        Returns:
            Dict in Google GenAI generate_content response format
        """

        # Extract the main response content
        choice = response.choices[0] if response.choices else None
        if not choice:
            raise ValueError("Invalid completion response: no choices found")

        # Handle different choice types (Choices vs StreamingChoices)
        if isinstance(choice, Choices):
            if not choice.message:
                raise ValueError(
                    "Invalid completion response: no message found in choice"
                )
            parts = self._transform_openai_message_to_google_genai_parts(choice.message)
        else:
            # Fallback for generic choice objects
            message_content = getattr(choice, "message", {}).get(
                "content", ""
            ) or getattr(choice, "delta", {}).get("content", "")
            parts = [{"text": message_content}] if message_content else []

        # Create Google GenAI format response
        generate_content_response: Dict[str, Any] = {
            "candidates": [
                {
                    "content": {"parts": parts, "role": "model"},
                    "finishReason": self._map_finish_reason(
                        getattr(choice, "finish_reason", None)
                    ),
                    "index": 0,
                    "safetyRatings": [],
                }
            ],
            "usageMetadata": (
                self._map_usage(getattr(response, "usage", None))
                if hasattr(response, "usage") and getattr(response, "usage", None)
                else {
                    "promptTokenCount": 0,
                    "candidatesTokenCount": 0,
                    "totalTokenCount": 0,
                }
            ),
        }

        # Add text field for convenience (common in Google GenAI responses)
        text_content = ""
        for part in parts:
            if isinstance(part, dict) and "text" in part:
                text_content += part["text"]
        if text_content:
            generate_content_response["text"] = text_content

        return generate_content_response

    def translate_streaming_completion_to_generate_content(
        self,
        response: Union[ModelResponse, ModelResponseStream],
        wrapper: GoogleGenAIStreamWrapper,
    ) -> Optional[Dict[str, Any]]:
        """
        Transform streaming litellm completion chunk to Google GenAI generate_content format

        Args:
            response: Streaming ModelResponse chunk from litellm.completion
            wrapper: GoogleGenAIStreamWrapper instance

        Returns:
            Dict in Google GenAI streaming generate_content response format
        """

        # Extract the main response content from streaming chunk
        choice = response.choices[0] if response.choices else None
        if not choice:
            # Return empty chunk if no choices
            return None

        # Handle streaming choice
        if isinstance(choice, StreamingChoices):
            if choice.delta:
                parts = self._transform_openai_delta_to_google_genai_parts_with_accumulation(
                    choice.delta, wrapper
                )
            else:
                parts = []
            finish_reason = getattr(choice, "finish_reason", None)
        else:
            # Fallback for generic choice objects
            message_content = getattr(choice, "delta", {}).get("content", "")
            parts = [{"text": message_content}] if message_content else []
            finish_reason = getattr(choice, "finish_reason", None)

        # Only create response chunk if we have parts or it's the final chunk
        if not parts and not finish_reason:
            return None

        # Create Google GenAI streaming format response
        streaming_chunk: Dict[str, Any] = {
            "candidates": [
                {
                    "content": {"parts": parts, "role": "model"},
                    "finishReason": (
                        self._map_finish_reason(finish_reason)
                        if finish_reason
                        else None
                    ),
                    "index": 0,
                    "safetyRatings": [],
                }
            ]
        }

        # Add usage metadata only in the final chunk (when finish_reason is present)
        if finish_reason:
            usage_metadata = (
                self._map_usage(getattr(response, "usage", None))
                if hasattr(response, "usage") and getattr(response, "usage", None)
                else {
                    "promptTokenCount": 0,
                    "candidatesTokenCount": 0,
                    "totalTokenCount": 0,
                }
            )
            streaming_chunk["usageMetadata"] = usage_metadata

        # Add text field for convenience (common in Google GenAI responses)
        text_content = ""
        for part in parts:
            if isinstance(part, dict) and "text" in part:
                text_content += part["text"]
        if text_content:
            streaming_chunk["text"] = text_content

        return streaming_chunk

    def _transform_openai_message_to_google_genai_parts(
        self,
        message: Any,
    ) -> List[Dict[str, Any]]:
        """Transform OpenAI message to Google GenAI parts format"""
        parts: List[Dict[str, Any]] = []

        # Add text content if present
        if hasattr(message, "content") and message.content:
            parts.append({"text": message.content})

        # Add tool calls if present
        if hasattr(message, "tool_calls") and message.tool_calls:
            for tool_call in message.tool_calls:
                if hasattr(tool_call, "function") and tool_call.function:
                    try:
                        args = (
                            json.loads(tool_call.function.arguments)
                            if tool_call.function.arguments
                            else {}
                        )
                    except json.JSONDecodeError:
                        args = {}

                    function_call_part = {
                        "functionCall": {
                            "name": tool_call.function.name or "undefined_tool_name",
                            "args": args,
                        }
                    }
                    parts.append(function_call_part)

        return parts if parts else [{"text": ""}]

    def _transform_openai_delta_to_google_genai_parts_with_accumulation(
        self, delta: Any, wrapper: GoogleGenAIStreamWrapper
    ) -> List[Dict[str, Any]]:
        """Transforms OpenAI delta to Google GenAI parts, accumulating streaming tool calls."""

        # 1. Initialize wrapper state if it doesn't exist
        if not hasattr(wrapper, "accumulated_tool_calls"):
            wrapper.accumulated_tool_calls = {}

        parts: List[Dict[str, Any]] = []

        if hasattr(delta, "content") and delta.content:
            parts.append({"text": delta.content})

        # 2. Ensure tool_calls is iterable
        tool_calls = delta.tool_calls or []

        for tool_call in tool_calls:
            if not hasattr(tool_call, "function"):
                continue

            # 3. Use `index` as the primary key for accumulation
            tool_call_index = getattr(tool_call, "index", None)
            if tool_call_index is None:
                continue  # Index is essential for tracking streaming tool calls

            # Initialize accumulator for this index if it's new
            if tool_call_index not in wrapper.accumulated_tool_calls:
                wrapper.accumulated_tool_calls[tool_call_index] = {
                    "name": "",
                    "arguments": "",
                }

            # Accumulate name and arguments
            function_name = getattr(tool_call.function, "name", None)
            args_chunk = getattr(tool_call.function, "arguments", None)

            # Optimization: Skip chunks that have no new data
            if not function_name and not args_chunk:
                verbose_logger.debug(
                    f"Skipping empty tool call chunk for index: {tool_call_index}"
                )
                continue

            if function_name:
                wrapper.accumulated_tool_calls[tool_call_index]["name"] = function_name

            if args_chunk:
                wrapper.accumulated_tool_calls[tool_call_index][
                    "arguments"
                ] += args_chunk

            # Attempt to parse and emit a complete tool call
            accumulated_data = wrapper.accumulated_tool_calls[tool_call_index]
            accumulated_name = accumulated_data["name"]
            accumulated_args = accumulated_data["arguments"]

            # 5. Attempt to parse arguments even if name hasn't arrived.
            try:
                # Attempt to parse the accumulated arguments string
                parsed_args = json.loads(accumulated_args)

                # If parsing succeeds, but we don't have a name yet, wait.
                # The part will be created by a later chunk that brings the name.
                if accumulated_name:
                    # If successful, create the part and clean up
                    function_call_part = {
                        "functionCall": {"name": accumulated_name, "args": parsed_args}
                    }
                    parts.append(function_call_part)

                    # Remove the completed tool call from the accumulator
                    del wrapper.accumulated_tool_calls[tool_call_index]

            except json.JSONDecodeError:
                # The JSON for arguments is still incomplete.
                # We will continue to accumulate and wait for more chunks.
                pass

        return parts

    def _map_finish_reason(self, finish_reason: Optional[str]) -> str:
        """Map OpenAI finish reasons to Google GenAI finish reasons"""
        if not finish_reason:
            return "STOP"

        mapping = {
            "stop": "STOP",
            "length": "MAX_TOKENS",
            "content_filter": "SAFETY",
            "tool_calls": "STOP",
            "function_call": "STOP",
        }

        return mapping.get(finish_reason, "STOP")

    def _map_usage(self, usage: Any) -> Dict[str, int]:
        """Map OpenAI usage to Google GenAI usage format"""
        return {
            "promptTokenCount": getattr(usage, "prompt_tokens", 0) or 0,
            "candidatesTokenCount": getattr(usage, "completion_tokens", 0) or 0,
            "totalTokenCount": getattr(usage, "total_tokens", 0) or 0,
        }