chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/amazon_nova_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/amazon_nova_transformation.py
@@ -0,0 +1,361 @@
+"""
+Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Nova /invoke and /async-invoke format.
+
+Why separate file? Make it easy to see how transformation works
+
+Supports:
+- Synchronous embeddings (SINGLE_EMBEDDING)
+- Asynchronous embeddings with segmentation (SEGMENTED_EMBEDDING)
+- Multimodal inputs: text, image, video, audio
+- Multiple embedding purposes and dimensions
+
+Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/nova-embed.html
+"""
+
+from typing import List, Optional
+
+from litellm.types.utils import (
+    Embedding,
+    EmbeddingResponse,
+    PromptTokensDetailsWrapper,
+    Usage,
+)
+
+
+class AmazonNovaEmbeddingConfig:
+    """
+    Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/nova-embed.html
+
+    Amazon Nova Multimodal Embeddings supports:
+    - Text, image, video, and audio inputs
+    - Synchronous (InvokeModel) and asynchronous (StartAsyncInvoke) APIs
+    - Multiple embedding purposes and dimensions
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def get_supported_openai_params(self) -> List[str]:
+        return [
+            "dimensions",
+        ]
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict
+    ) -> dict:
+        """Map OpenAI-style parameters to Nova parameters."""
+        for k, v in non_default_params.items():
+            if k == "dimensions":
+                # Map OpenAI dimensions to Nova embedding_dimension
+                optional_params["embedding_dimension"] = v
+            elif k in self.get_supported_openai_params():
+                optional_params[k] = v
+        return optional_params
+
+    def _parse_data_url(self, data_url: str) -> tuple:
+        """
+        Parse a data URL to extract the media type and base64 data.
+
+        Args:
+            data_url: Data URL in format: data:image/jpeg;base64,/9j/4AAQ...
+
+        Returns:
+            tuple: (media_type, base64_data)
+                media_type: e.g., "image/jpeg", "video/mp4", "audio/mpeg"
+                base64_data: The base64-encoded data without the prefix
+        """
+        if not data_url.startswith("data:"):
+            raise ValueError(f"Invalid data URL format: {data_url[:50]}...")
+
+        # Split by comma to separate metadata from data
+        # Format: data:image/jpeg;base64,<base64_data>
+        if "," not in data_url:
+            raise ValueError(
+                f"Invalid data URL format (missing comma): {data_url[:50]}..."
+            )
+
+        metadata, base64_data = data_url.split(",", 1)
+
+        # Extract media type from metadata
+        # Remove 'data:' prefix and ';base64' suffix
+        metadata = metadata[5:]  # Remove 'data:'
+
+        if ";" in metadata:
+            media_type = metadata.split(";")[0]
+        else:
+            media_type = metadata
+
+        return media_type, base64_data
+
+    def _transform_request(
+        self,
+        input: str,
+        inference_params: dict,
+        async_invoke_route: bool = False,
+        model_id: Optional[str] = None,
+        output_s3_uri: Optional[str] = None,
+    ) -> dict:
+        """
+        Transform OpenAI-style input to Nova format.
+
+        Only handles OpenAI params (dimensions). All other Nova-specific params
+        should be passed via inference_params and will be passed through as-is.
+
+        Args:
+            input: The input text or media reference
+            inference_params: Additional parameters (will be passed through)
+            async_invoke_route: Whether this is for async invoke
+            model_id: Model ID (for async invoke)
+            output_s3_uri: S3 URI for output (for async invoke)
+
+        Returns:
+            dict: Nova embedding request
+        """
+        # Determine task type
+        task_type = "SEGMENTED_EMBEDDING" if async_invoke_route else "SINGLE_EMBEDDING"
+
+        # Build the base request structure
+        request: dict = {
+            "schemaVersion": "nova-multimodal-embed-v1",
+            "taskType": task_type,
+        }
+
+        # Start with inference_params (user-provided params)
+        embedding_params = inference_params.copy()
+
+        embedding_params.pop("output_s3_uri", None)
+
+        # Map OpenAI dimensions to embeddingDimension if provided
+        if "dimensions" in embedding_params:
+            embedding_params["embeddingDimension"] = embedding_params.pop("dimensions")
+        elif "embedding_dimension" in embedding_params:
+            embedding_params["embeddingDimension"] = embedding_params.pop(
+                "embedding_dimension"
+            )
+
+        # Add required embeddingPurpose if not provided (required by Nova API)
+        if "embeddingPurpose" not in embedding_params:
+            embedding_params["embeddingPurpose"] = "GENERIC_INDEX"
+
+        # Add required embeddingDimension if not provided (required by Nova API)
+        if "embeddingDimension" not in embedding_params:
+            embedding_params["embeddingDimension"] = 3072
+
+        # For text/media input, add basic structure if user hasn't provided text/image/video/audio
+        if (
+            "text" not in embedding_params
+            and "image" not in embedding_params
+            and "video" not in embedding_params
+            and "audio" not in embedding_params
+        ):
+            # Check if input is a data URL (e.g., data:image/jpeg;base64,...)
+            if input.startswith("data:"):
+                # Parse the data URL to extract media type and base64 data
+                media_type, base64_data = self._parse_data_url(input)
+
+                if media_type.startswith("image/"):
+                    # Extract image format from MIME type (e.g., image/jpeg -> jpeg)
+                    image_format = media_type.split("/")[1].lower()
+                    # Nova API expects specific formats
+                    if image_format == "jpg":
+                        image_format = "jpeg"
+
+                    embedding_params["image"] = {
+                        "format": image_format,
+                        "source": {"bytes": base64_data},
+                    }
+                elif media_type.startswith("video/"):
+                    # Handle video data URLs
+                    video_format = media_type.split("/")[1].lower()
+                    embedding_params["video"] = {
+                        "format": video_format,
+                        "source": {"bytes": base64_data},
+                    }
+                elif media_type.startswith("audio/"):
+                    # Handle audio data URLs
+                    audio_format = media_type.split("/")[1].lower()
+                    embedding_params["audio"] = {
+                        "format": audio_format,
+                        "source": {"bytes": base64_data},
+                    }
+                else:
+                    # Fallback to text for unknown types
+                    embedding_params["text"] = {"value": input, "truncationMode": "END"}
+            elif input.startswith("s3://"):
+                # S3 URL - default to text for now, user should specify modality
+                embedding_params["text"] = {
+                    "source": {"s3Location": {"uri": input}},
+                    "truncationMode": "END",  # Required by Nova API
+                }
+            else:
+                # Plain text input
+                embedding_params["text"] = {
+                    "value": input,
+                    "truncationMode": "END",  # Required by Nova API
+                }
+
+        # Set the embedding params in the request
+        if task_type == "SINGLE_EMBEDDING":
+            request["singleEmbeddingParams"] = embedding_params
+        else:
+            request["segmentedEmbeddingParams"] = embedding_params
+
+        # For async invoke, wrap in the async invoke format
+        if async_invoke_route and model_id:
+            return self._wrap_async_invoke_request(
+                model_input=request,
+                model_id=model_id,
+                output_s3_uri=output_s3_uri,
+            )
+
+        return request
+
+    def _wrap_async_invoke_request(
+        self,
+        model_input: dict,
+        model_id: str,
+        output_s3_uri: Optional[str] = None,
+    ) -> dict:
+        """
+        Wrap the transformed request in the AWS Bedrock async invoke format.
+
+        Args:
+            model_input: The transformed Nova embedding request
+            model_id: The model identifier (without async_invoke prefix)
+            output_s3_uri: S3 URI for output data config
+
+        Returns:
+            dict: The wrapped async invoke request
+        """
+        import urllib.parse
+
+        # Clean the model ID
+        unquoted_model_id = urllib.parse.unquote(model_id)
+        if unquoted_model_id.startswith("async_invoke/"):
+            unquoted_model_id = unquoted_model_id.replace("async_invoke/", "")
+
+        # Validate that the S3 URI is not empty
+        if not output_s3_uri or output_s3_uri.strip() == "":
+            raise ValueError("output_s3_uri is required for async invoke requests")
+
+        return {
+            "modelId": unquoted_model_id,
+            "modelInput": model_input,
+            "outputDataConfig": {"s3OutputDataConfig": {"s3Uri": output_s3_uri}},
+        }
+
+    def _transform_response(
+        self,
+        response_list: List[dict],
+        model: str,
+        batch_data: Optional[List[dict]] = None,
+    ) -> EmbeddingResponse:
+        """
+        Transform Nova response to OpenAI format.
+
+        Nova response format:
+        {
+            "embeddings": [
+                {
+                    "embeddingType": "TEXT" | "IMAGE" | "VIDEO" | "AUDIO" | "AUDIO_VIDEO_COMBINED",
+                    "embedding": [0.1, 0.2, ...],
+                    "truncatedCharLength": 100  # Optional, only for text
+                }
+            ]
+        }
+        """
+        embeddings: List[Embedding] = []
+        total_tokens = 0
+
+        for response in response_list:
+            # Nova response has an "embeddings" array
+            if "embeddings" in response and isinstance(response["embeddings"], list):
+                for item in response["embeddings"]:
+                    if "embedding" in item:
+                        embedding = Embedding(
+                            embedding=item["embedding"],
+                            index=len(embeddings),
+                            object="embedding",
+                        )
+                        embeddings.append(embedding)
+
+                        # Estimate token count
+                        # For text, use truncatedCharLength if available
+                        if "truncatedCharLength" in item:
+                            total_tokens += item["truncatedCharLength"] // 4
+                        else:
+                            # Rough estimate based on embedding dimension
+                            total_tokens += len(item["embedding"]) // 4
+            elif "embedding" in response:
+                # Direct embedding response (fallback)
+                embedding = Embedding(
+                    embedding=response["embedding"],
+                    index=len(embeddings),
+                    object="embedding",
+                )
+                embeddings.append(embedding)
+                total_tokens += len(response["embedding"]) // 4
+
+        # Count images from original requests for cost calculation
+        image_count = 0
+        if batch_data:
+            for request_data in batch_data:
+                # Nova wraps params in singleEmbeddingParams or segmentedEmbeddingParams
+                params = request_data.get(
+                    "singleEmbeddingParams",
+                    request_data.get("segmentedEmbeddingParams", {}),
+                )
+                if "image" in params:
+                    image_count += 1
+
+        prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
+        if image_count > 0:
+            prompt_tokens_details = PromptTokensDetailsWrapper(
+                image_count=image_count,
+            )
+
+        usage = Usage(
+            prompt_tokens=total_tokens,
+            total_tokens=total_tokens,
+            prompt_tokens_details=prompt_tokens_details,
+        )
+
+        return EmbeddingResponse(data=embeddings, model=model, usage=usage)
+
+    def _transform_async_invoke_response(
+        self, response: dict, model: str
+    ) -> EmbeddingResponse:
+        """
+        Transform async invoke response (invocation ARN) to OpenAI format.
+
+        AWS async invoke returns:
+        {
+            "invocationArn": "arn:aws:bedrock:us-east-1:123456789012:async-invoke/abc123"
+        }
+
+        We transform this to a job-like embedding response with the ARN in hidden params.
+        """
+        invocation_arn = response.get("invocationArn", "")
+
+        # Create a placeholder embedding object for the job
+        embedding = Embedding(
+            embedding=[],  # Empty embedding for async jobs
+            index=0,
+            object="embedding",
+        )
+
+        # Create usage object (empty for async jobs)
+        usage = Usage(prompt_tokens=0, total_tokens=0)
+
+        # Create hidden params with job ID
+        from litellm.types.llms.base import HiddenParams
+
+        hidden_params = HiddenParams()
+        setattr(hidden_params, "_invocation_arn", invocation_arn)
+
+        return EmbeddingResponse(
+            data=[embedding],
+            model=model,
+            usage=usage,
+            hidden_params=hidden_params,
+        )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/amazon_titan_g1_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/amazon_titan_g1_transformation.py
@@ -0,0 +1,88 @@
+"""
+Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Titan G1 /invoke format. 
+
+Why separate file? Make it easy to see how transformation works
+
+Convers
+- G1 request format
+
+Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html
+"""
+
+import types
+from typing import List
+
+from litellm.types.llms.bedrock import (
+    AmazonTitanG1EmbeddingRequest,
+    AmazonTitanG1EmbeddingResponse,
+)
+from litellm.types.utils import Embedding, EmbeddingResponse, Usage
+
+
+class AmazonTitanG1Config:
+    """
+    Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html
+    """
+
+    def __init__(
+        self,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self) -> List[str]:
+        return []
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict
+    ) -> dict:
+        return optional_params
+
+    def _transform_request(
+        self, input: str, inference_params: dict
+    ) -> AmazonTitanG1EmbeddingRequest:
+        return AmazonTitanG1EmbeddingRequest(inputText=input)
+
+    def _transform_response(
+        self, response_list: List[dict], model: str
+    ) -> EmbeddingResponse:
+        total_prompt_tokens = 0
+
+        transformed_responses: List[Embedding] = []
+        for index, response in enumerate(response_list):
+            _parsed_response = AmazonTitanG1EmbeddingResponse(**response)  # type: ignore
+            transformed_responses.append(
+                Embedding(
+                    embedding=_parsed_response["embedding"],
+                    index=index,
+                    object="embedding",
+                )
+            )
+            total_prompt_tokens += _parsed_response["inputTextTokenCount"]
+
+        usage = Usage(
+            prompt_tokens=total_prompt_tokens,
+            completion_tokens=0,
+            total_tokens=total_prompt_tokens,
+        )
+        return EmbeddingResponse(model=model, usage=usage, data=transformed_responses)
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/amazon_titan_multimodal_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/amazon_titan_multimodal_transformation.py
@@ -0,0 +1,101 @@
+"""
+Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Titan multimodal /invoke format.
+
+Why separate file? Make it easy to see how transformation works
+
+Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-mm.html
+"""
+
+from typing import List, Optional
+
+from litellm.types.llms.bedrock import (
+    AmazonTitanMultimodalEmbeddingConfig,
+    AmazonTitanMultimodalEmbeddingRequest,
+    AmazonTitanMultimodalEmbeddingResponse,
+)
+from litellm.types.utils import (
+    Embedding,
+    EmbeddingResponse,
+    PromptTokensDetailsWrapper,
+    Usage,
+)
+from litellm.utils import get_base64_str, is_base64_encoded
+
+
+class AmazonTitanMultimodalEmbeddingG1Config:
+    """
+    Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-mm.html
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def get_supported_openai_params(self) -> List[str]:
+        return ["dimensions"]
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict
+    ) -> dict:
+        for k, v in non_default_params.items():
+            if k == "dimensions":
+                optional_params[
+                    "embeddingConfig"
+                ] = AmazonTitanMultimodalEmbeddingConfig(outputEmbeddingLength=v)
+        return optional_params
+
+    def _transform_request(
+        self, input: str, inference_params: dict
+    ) -> AmazonTitanMultimodalEmbeddingRequest:
+        ## check if b64 encoded str or not ##
+        is_encoded = is_base64_encoded(input)
+        if is_encoded:  # check if string is b64 encoded image or not
+            b64_str = get_base64_str(input)
+            transformed_request = AmazonTitanMultimodalEmbeddingRequest(
+                inputImage=b64_str
+            )
+        else:
+            transformed_request = AmazonTitanMultimodalEmbeddingRequest(inputText=input)
+
+        for k, v in inference_params.items():
+            transformed_request[k] = v  # type: ignore
+        return transformed_request
+
+    def _transform_response(
+        self,
+        response_list: List[dict],
+        model: str,
+        batch_data: Optional[List[dict]] = None,
+    ) -> EmbeddingResponse:
+        total_prompt_tokens = 0
+        transformed_responses: List[Embedding] = []
+        for index, response in enumerate(response_list):
+            _parsed_response = AmazonTitanMultimodalEmbeddingResponse(**response)  # type: ignore
+            transformed_responses.append(
+                Embedding(
+                    embedding=_parsed_response["embedding"],
+                    index=index,
+                    object="embedding",
+                )
+            )
+            total_prompt_tokens += _parsed_response["inputTextTokenCount"]
+
+        # Count images from original requests for cost calculation
+        image_count = 0
+        if batch_data:
+            for request_data in batch_data:
+                if "inputImage" in request_data:
+                    image_count += 1
+
+        prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
+        if image_count > 0:
+            prompt_tokens_details = PromptTokensDetailsWrapper(
+                image_count=image_count,
+            )
+
+        usage = Usage(
+            prompt_tokens=total_prompt_tokens,
+            completion_tokens=0,
+            total_tokens=total_prompt_tokens,
+            prompt_tokens_details=prompt_tokens_details,
+        )
+        return EmbeddingResponse(model=model, usage=usage, data=transformed_responses)
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/amazon_titan_v2_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/amazon_titan_v2_transformation.py
@@ -0,0 +1,131 @@
+"""
+Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Titan V2 /invoke format.
+
+Why separate file? Make it easy to see how transformation works
+
+Convers
+- v2 request format
+
+Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html
+"""
+
+import types
+from typing import List, Optional, Union
+
+from litellm.types.llms.bedrock import (
+    AmazonTitanV2EmbeddingRequest,
+    AmazonTitanV2EmbeddingResponse,
+)
+from litellm.types.utils import Embedding, EmbeddingResponse, Usage
+
+
+class AmazonTitanV2Config:
+    """
+    Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html
+
+    normalize: boolean - flag indicating whether or not to normalize the output embeddings. Defaults to true
+    dimensions: int - The number of dimensions the output embeddings should have. The following values are accepted: 1024 (default), 512, 256.
+    """
+
+    normalize: Optional[bool] = None
+    dimensions: Optional[int] = None
+
+    def __init__(
+        self, normalize: Optional[bool] = None, dimensions: Optional[int] = None
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self) -> List[str]:
+        return ["dimensions", "encoding_format"]
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict
+    ) -> dict:
+        for k, v in non_default_params.items():
+            if k == "dimensions":
+                optional_params["dimensions"] = v
+            elif k == "encoding_format":
+                # Map OpenAI encoding_format to AWS embeddingTypes
+                if v == "float":
+                    optional_params["embeddingTypes"] = ["float"]
+                elif v == "base64":
+                    # base64 maps to binary format in AWS
+                    optional_params["embeddingTypes"] = ["binary"]
+                else:
+                    # For any other encoding format, default to float
+                    optional_params["embeddingTypes"] = ["float"]
+        return optional_params
+
+    def _transform_request(
+        self, input: str, inference_params: dict
+    ) -> AmazonTitanV2EmbeddingRequest:
+        return AmazonTitanV2EmbeddingRequest(inputText=input, **inference_params)  # type: ignore
+
+    def _transform_response(
+        self, response_list: List[dict], model: str
+    ) -> EmbeddingResponse:
+        total_prompt_tokens = 0
+
+        transformed_responses: List[Embedding] = []
+        for index, response in enumerate(response_list):
+            _parsed_response = AmazonTitanV2EmbeddingResponse(**response)  # type: ignore
+
+            # According to AWS docs, embeddingsByType is always present
+            # If binary was requested (encoding_format="base64"), use binary data
+            # Otherwise, use float data from embeddingsByType or fallback to embedding field
+            embedding_data: Union[List[float], List[int]]
+
+            if (
+                "embeddingsByType" in _parsed_response
+                and "binary" in _parsed_response["embeddingsByType"]
+            ):
+                # Use binary data if available (for encoding_format="base64")
+                embedding_data = _parsed_response["embeddingsByType"]["binary"]
+            elif (
+                "embeddingsByType" in _parsed_response
+                and "float" in _parsed_response["embeddingsByType"]
+            ):
+                # Use float data from embeddingsByType
+                embedding_data = _parsed_response["embeddingsByType"]["float"]
+            elif "embedding" in _parsed_response:
+                # Fallback to legacy embedding field
+                embedding_data = _parsed_response["embedding"]
+            else:
+                raise ValueError(f"No embedding data found in response: {response}")
+
+            transformed_responses.append(
+                Embedding(
+                    embedding=embedding_data,
+                    index=index,
+                    object="embedding",
+                )
+            )
+            total_prompt_tokens += _parsed_response["inputTextTokenCount"]
+
+        usage = Usage(
+            prompt_tokens=total_prompt_tokens,
+            completion_tokens=0,
+            total_tokens=total_prompt_tokens,
+        )
+        return EmbeddingResponse(model=model, usage=usage, data=transformed_responses)
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/cohere_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/cohere_transformation.py
@@ -0,0 +1,47 @@
+"""
+Transformation logic from OpenAI /v1/embeddings format to Bedrock Cohere /invoke format. 
+
+Why separate file? Make it easy to see how transformation works
+"""
+
+from typing import List
+
+from litellm.llms.cohere.embed.transformation import CohereEmbeddingConfig
+from litellm.types.llms.bedrock import CohereEmbeddingRequest
+
+
+class BedrockCohereEmbeddingConfig:
+    def __init__(self) -> None:
+        pass
+
+    def get_supported_openai_params(self) -> List[str]:
+        return ["encoding_format", "dimensions"]
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict
+    ) -> dict:
+        for k, v in non_default_params.items():
+            if k == "encoding_format":
+                optional_params["embedding_types"] = v
+            elif k == "dimensions":
+                optional_params["output_dimension"] = v
+        return optional_params
+
+    def _is_v3_model(self, model: str) -> bool:
+        return "3" in model
+
+    def _transform_request(
+        self, model: str, input: List[str], inference_params: dict
+    ) -> CohereEmbeddingRequest:
+        transformed_request = CohereEmbeddingConfig()._transform_request(
+            model, input, inference_params
+        )
+
+        new_transformed_request = CohereEmbeddingRequest(
+            input_type=transformed_request["input_type"],
+        )
+        for k in CohereEmbeddingRequest.__annotations__.keys():
+            if k in transformed_request:
+                new_transformed_request[k] = transformed_request[k]  # type: ignore
+
+        return new_transformed_request
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/embedding.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/embedding.py
@@ -0,0 +1,699 @@
+"""
+Handles embedding calls to Bedrock's `/invoke` endpoint
+"""
+
+import copy
+import json
+import urllib.parse
+from typing import Any, Callable, List, Optional, Tuple, Union, get_args
+
+import httpx
+
+import litellm
+from litellm.constants import BEDROCK_EMBEDDING_PROVIDERS_LITERAL
+from litellm.llms.cohere.embed.handler import embedding as cohere_embedding
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    _get_httpx_client,
+    get_async_httpx_client,
+)
+from litellm.secret_managers.main import get_secret
+from litellm.types.llms.bedrock import (
+    AmazonEmbeddingRequest,
+    CohereEmbeddingRequest,
+)
+from litellm.types.utils import EmbeddingResponse, LlmProviders
+
+from ..base_aws_llm import BaseAWSLLM
+from ..common_utils import BedrockError
+from .amazon_nova_transformation import AmazonNovaEmbeddingConfig
+from .amazon_titan_g1_transformation import AmazonTitanG1Config
+from .amazon_titan_multimodal_transformation import (
+    AmazonTitanMultimodalEmbeddingG1Config,
+)
+from .amazon_titan_v2_transformation import AmazonTitanV2Config
+from .cohere_transformation import BedrockCohereEmbeddingConfig
+from .twelvelabs_marengo_transformation import TwelveLabsMarengoEmbeddingConfig
+
+
+class BedrockEmbedding(BaseAWSLLM):
+    def _load_credentials(
+        self,
+        optional_params: dict,
+    ) -> Tuple[Any, str]:
+        try:
+            from botocore.credentials import Credentials
+        except ImportError:
+            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+        ## CREDENTIALS ##
+        # pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
+        aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
+        aws_access_key_id = optional_params.pop("aws_access_key_id", None)
+        aws_session_token = optional_params.pop("aws_session_token", None)
+        aws_region_name = optional_params.pop("aws_region_name", None)
+        aws_role_name = optional_params.pop("aws_role_name", None)
+        aws_session_name = optional_params.pop("aws_session_name", None)
+        aws_profile_name = optional_params.pop("aws_profile_name", None)
+        aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
+        aws_sts_endpoint = optional_params.pop("aws_sts_endpoint", None)
+
+        ### SET REGION NAME ###
+        if aws_region_name is None:
+            # check env #
+            litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
+
+            if litellm_aws_region_name is not None and isinstance(
+                litellm_aws_region_name, str
+            ):
+                aws_region_name = litellm_aws_region_name
+
+            standard_aws_region_name = get_secret("AWS_REGION", None)
+            if standard_aws_region_name is not None and isinstance(
+                standard_aws_region_name, str
+            ):
+                aws_region_name = standard_aws_region_name
+
+            if aws_region_name is None:
+                aws_region_name = "us-west-2"
+
+        credentials: Credentials = self.get_credentials(  # type: ignore
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_session_token=aws_session_token,
+            aws_region_name=aws_region_name,
+            aws_session_name=aws_session_name,
+            aws_profile_name=aws_profile_name,
+            aws_role_name=aws_role_name,
+            aws_web_identity_token=aws_web_identity_token,
+            aws_sts_endpoint=aws_sts_endpoint,
+        )
+        return credentials, aws_region_name
+
+    async def async_embeddings(self):
+        pass
+
+    def _make_sync_call(
+        self,
+        client: Optional[HTTPHandler],
+        timeout: Optional[Union[float, httpx.Timeout]],
+        api_base: str,
+        headers: dict,
+        data: dict,
+    ) -> dict:
+        if client is None or not isinstance(client, HTTPHandler):
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    timeout = httpx.Timeout(timeout)
+                _params["timeout"] = timeout
+            client = _get_httpx_client(_params)  # type: ignore
+        else:
+            client = client
+        try:
+            response = client.post(url=api_base, headers=headers, data=json.dumps(data))  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise BedrockError(status_code=error_code, message=err.response.text)
+        except httpx.TimeoutException:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
+
+        return response.json()
+
+    async def _make_async_call(
+        self,
+        client: Optional[AsyncHTTPHandler],
+        timeout: Optional[Union[float, httpx.Timeout]],
+        api_base: str,
+        headers: dict,
+        data: dict,
+    ) -> dict:
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    timeout = httpx.Timeout(timeout)
+                _params["timeout"] = timeout
+            client = get_async_httpx_client(
+                params=_params, llm_provider=litellm.LlmProviders.BEDROCK
+            )
+        else:
+            client = client
+
+        try:
+            response = await client.post(url=api_base, headers=headers, data=json.dumps(data))  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise BedrockError(status_code=error_code, message=err.response.text)
+        except httpx.TimeoutException:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
+
+        return response.json()
+
+    def _transform_response(
+        self,
+        response_list: List[dict],
+        model: str,
+        provider: BEDROCK_EMBEDDING_PROVIDERS_LITERAL,
+        is_async_invoke: Optional[bool] = False,
+        batch_data: Optional[List[dict]] = None,
+    ) -> Optional[EmbeddingResponse]:
+        """
+        Transforms the response from the Bedrock embedding provider to the OpenAI format.
+        """
+        returned_response: Optional[EmbeddingResponse] = None
+
+        # Handle async invoke responses (single response with invocationArn)
+        if (
+            is_async_invoke
+            and len(response_list) == 1
+            and "invocationArn" in response_list[0]
+        ):
+            if provider == "twelvelabs":
+                returned_response = (
+                    TwelveLabsMarengoEmbeddingConfig()._transform_async_invoke_response(
+                        response=response_list[0], model=model
+                    )
+                )
+            elif provider == "nova":
+                returned_response = (
+                    AmazonNovaEmbeddingConfig()._transform_async_invoke_response(
+                        response=response_list[0], model=model
+                    )
+                )
+            else:
+                # For other providers, create a generic async response
+                invocation_arn = response_list[0].get("invocationArn", "")
+
+                from litellm.types.utils import Embedding, Usage
+
+                embedding = Embedding(
+                    embedding=[],
+                    index=0,
+                    object="embedding",  # Must be literal "embedding"
+                )
+                usage = Usage(prompt_tokens=0, total_tokens=0)
+
+                # Create hidden params with job ID
+                from litellm.types.llms.base import HiddenParams
+
+                hidden_params = HiddenParams()
+                setattr(hidden_params, "_invocation_arn", invocation_arn)
+
+                returned_response = EmbeddingResponse(
+                    data=[embedding],
+                    model=model,
+                    usage=usage,
+                    hidden_params=hidden_params,
+                )
+        else:
+            # Handle regular invoke responses
+            if model == "amazon.titan-embed-image-v1":
+                returned_response = (
+                    AmazonTitanMultimodalEmbeddingG1Config()._transform_response(
+                        response_list=response_list, model=model, batch_data=batch_data
+                    )
+                )
+            elif model == "amazon.titan-embed-text-v1":
+                returned_response = AmazonTitanG1Config()._transform_response(
+                    response_list=response_list, model=model
+                )
+            elif model == "amazon.titan-embed-text-v2:0":
+                returned_response = AmazonTitanV2Config()._transform_response(
+                    response_list=response_list, model=model
+                )
+            elif provider == "twelvelabs":
+                returned_response = (
+                    TwelveLabsMarengoEmbeddingConfig()._transform_response(
+                        response_list=response_list, model=model
+                    )
+                )
+            elif provider == "nova":
+                returned_response = AmazonNovaEmbeddingConfig()._transform_response(
+                    response_list=response_list, model=model, batch_data=batch_data
+                )
+
+        ##########################################################
+        # Validate returned response
+        ##########################################################
+        if returned_response is None:
+            raise Exception(
+                "Unable to map model response to known provider format. model={}".format(
+                    model
+                )
+            )
+        return returned_response
+
+    def _single_func_embeddings(
+        self,
+        client: Optional[HTTPHandler],
+        timeout: Optional[Union[float, httpx.Timeout]],
+        batch_data: List[dict],
+        credentials: Any,
+        extra_headers: Optional[dict],
+        endpoint_url: str,
+        aws_region_name: str,
+        model: str,
+        logging_obj: Any,
+        provider: BEDROCK_EMBEDDING_PROVIDERS_LITERAL,
+        api_key: Optional[str] = None,
+        is_async_invoke: Optional[bool] = False,
+    ):
+        responses: List[dict] = []
+        for data in batch_data:
+            headers = {"Content-Type": "application/json"}
+            if extra_headers is not None:
+                headers = {"Content-Type": "application/json", **extra_headers}
+
+            prepped = self.get_request_headers(  # type: ignore  # type: ignore
+                credentials=credentials,
+                aws_region_name=aws_region_name,
+                extra_headers=extra_headers,
+                endpoint_url=endpoint_url,
+                data=json.dumps(data),
+                headers=headers,
+                api_key=api_key,
+            )
+
+            ## LOGGING
+            logging_obj.pre_call(
+                input=data,
+                api_key="",
+                additional_args={
+                    "complete_input_dict": data,
+                    "api_base": prepped.url,
+                    "headers": prepped.headers,
+                },
+            )
+            headers_for_request = (
+                dict(prepped.headers) if hasattr(prepped, "headers") else {}
+            )
+            response = self._make_sync_call(
+                client=client,
+                timeout=timeout,
+                api_base=prepped.url,
+                headers=headers_for_request,
+                data=data,
+            )
+
+            ## LOGGING
+            logging_obj.post_call(
+                input=data,
+                api_key="",
+                original_response=response,
+                additional_args={"complete_input_dict": data},
+            )
+
+            responses.append(response)
+
+        return self._transform_response(
+            response_list=responses,
+            model=model,
+            provider=provider,
+            is_async_invoke=is_async_invoke,
+            batch_data=batch_data,
+        )
+
+    async def _async_single_func_embeddings(
+        self,
+        client: Optional[AsyncHTTPHandler],
+        timeout: Optional[Union[float, httpx.Timeout]],
+        batch_data: List[dict],
+        credentials: Any,
+        extra_headers: Optional[dict],
+        endpoint_url: str,
+        aws_region_name: str,
+        model: str,
+        logging_obj: Any,
+        provider: BEDROCK_EMBEDDING_PROVIDERS_LITERAL,
+        api_key: Optional[str] = None,
+        is_async_invoke: Optional[bool] = False,
+    ):
+        responses: List[dict] = []
+        for data in batch_data:
+            headers = {"Content-Type": "application/json"}
+            if extra_headers is not None:
+                headers = {"Content-Type": "application/json", **extra_headers}
+
+            prepped = self.get_request_headers(  # type: ignore  # type: ignore
+                credentials=credentials,
+                aws_region_name=aws_region_name,
+                extra_headers=extra_headers,
+                endpoint_url=endpoint_url,
+                data=json.dumps(data),
+                headers=headers,
+                api_key=api_key,
+            )
+
+            ## LOGGING
+            logging_obj.pre_call(
+                input=data,
+                api_key="",
+                additional_args={
+                    "complete_input_dict": data,
+                    "api_base": prepped.url,
+                    "headers": prepped.headers,
+                },
+            )
+            # Convert CaseInsensitiveDict to regular dict for httpx compatibility
+            # This ensures custom headers are properly forwarded, especially with IAM roles and custom api_base
+            headers_for_request = (
+                dict(prepped.headers) if hasattr(prepped, "headers") else {}
+            )
+            response = await self._make_async_call(
+                client=client,
+                timeout=timeout,
+                api_base=prepped.url,
+                headers=headers_for_request,
+                data=data,
+            )
+
+            ## LOGGING
+            logging_obj.post_call(
+                input=data,
+                api_key="",
+                original_response=response,
+                additional_args={"complete_input_dict": data},
+            )
+
+            responses.append(response)
+        ## TRANSFORM RESPONSE ##
+        return self._transform_response(
+            response_list=responses,
+            model=model,
+            provider=provider,
+            is_async_invoke=is_async_invoke,
+            batch_data=batch_data,
+        )
+
+    def embeddings(  # noqa: PLR0915
+        self,
+        model: str,
+        input: List[str],
+        api_base: Optional[str],
+        model_response: EmbeddingResponse,
+        print_verbose: Callable,
+        encoding,
+        logging_obj,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]],
+        timeout: Optional[Union[float, httpx.Timeout]],
+        aembedding: Optional[bool],
+        extra_headers: Optional[dict],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+    ) -> EmbeddingResponse:
+        credentials, aws_region_name = self._load_credentials(optional_params)
+
+        ### TRANSFORMATION ###
+        unencoded_model_id = (
+            optional_params.pop("model_id", None) or model
+        )  # default to model if not passed
+        modelId = urllib.parse.quote(unencoded_model_id, safe="")
+        aws_region_name = self._get_aws_region_name(
+            optional_params={"aws_region_name": aws_region_name},
+            model=model,
+            model_id=unencoded_model_id,
+        )
+        # Check async invoke needs to be used
+        has_async_invoke = "async_invoke/" in model
+        if has_async_invoke:
+            model = model.replace("async_invoke/", "", 1)
+        provider = self.get_bedrock_embedding_provider(model)
+        if provider is None:
+            raise Exception(
+                f"Unable to determine bedrock embedding provider for model: {model}. "
+                f"Supported providers: {list(get_args(BEDROCK_EMBEDDING_PROVIDERS_LITERAL))}"
+            )
+        inference_params = copy.deepcopy(optional_params)
+        inference_params = {
+            k: v
+            for k, v in inference_params.items()
+            if k.lower() not in self.aws_authentication_params
+        }
+        inference_params.pop(
+            "user", None
+        )  # make sure user is not passed in for bedrock call
+
+        data: Optional[CohereEmbeddingRequest] = None
+        batch_data: Optional[List] = None
+        if provider == "cohere":
+            data = BedrockCohereEmbeddingConfig()._transform_request(
+                model=model, input=input, inference_params=inference_params
+            )
+        elif provider == "amazon" and model in [
+            "amazon.titan-embed-image-v1",
+            "amazon.titan-embed-text-v1",
+            "amazon.titan-embed-text-v2:0",
+        ]:
+            batch_data = []
+            for i in input:
+                if model == "amazon.titan-embed-image-v1":
+                    transformed_request: (
+                        AmazonEmbeddingRequest
+                    ) = AmazonTitanMultimodalEmbeddingG1Config()._transform_request(
+                        input=i, inference_params=inference_params
+                    )
+                elif model == "amazon.titan-embed-text-v1":
+                    transformed_request = AmazonTitanG1Config()._transform_request(
+                        input=i, inference_params=inference_params
+                    )
+                elif model == "amazon.titan-embed-text-v2:0":
+                    transformed_request = AmazonTitanV2Config()._transform_request(
+                        input=i, inference_params=inference_params
+                    )
+                else:
+                    raise Exception(
+                        "Unmapped model. Received={}. Expected={}".format(
+                            model,
+                            [
+                                "amazon.titan-embed-image-v1",
+                                "amazon.titan-embed-text-v1",
+                                "amazon.titan-embed-text-v2:0",
+                            ],
+                        )
+                    )
+                batch_data.append(transformed_request)
+        elif provider == "twelvelabs":
+            batch_data = []
+            for i in input:
+                twelvelabs_request = (
+                    TwelveLabsMarengoEmbeddingConfig()._transform_request(
+                        input=i,
+                        inference_params=inference_params,
+                        async_invoke_route=has_async_invoke,
+                        model_id=modelId,
+                        output_s3_uri=inference_params.get("output_s3_uri"),
+                    )
+                )
+                batch_data.append(twelvelabs_request)
+        elif provider == "nova":
+            batch_data = []
+            for i in input:
+                nova_request = AmazonNovaEmbeddingConfig()._transform_request(
+                    input=i,
+                    inference_params=inference_params,
+                    async_invoke_route=has_async_invoke,
+                    model_id=modelId,
+                    output_s3_uri=inference_params.get("output_s3_uri"),
+                )
+                batch_data.append(nova_request)
+
+        ### SET RUNTIME ENDPOINT ###
+        endpoint_url, proxy_endpoint_url = self.get_runtime_endpoint(
+            api_base=api_base,
+            aws_bedrock_runtime_endpoint=optional_params.pop(
+                "aws_bedrock_runtime_endpoint", None
+            ),
+            aws_region_name=aws_region_name,
+        )
+        if has_async_invoke:
+            endpoint_url = f"{endpoint_url}/async-invoke"
+        else:
+            endpoint_url = f"{endpoint_url}/model/{modelId}/invoke"
+
+        if batch_data is not None:
+            if aembedding:
+                return self._async_single_func_embeddings(  # type: ignore
+                    client=(
+                        client
+                        if client is not None and isinstance(client, AsyncHTTPHandler)
+                        else None
+                    ),
+                    timeout=timeout,
+                    batch_data=batch_data,
+                    credentials=credentials,
+                    extra_headers=extra_headers,
+                    endpoint_url=endpoint_url,
+                    aws_region_name=aws_region_name,
+                    model=model,
+                    logging_obj=logging_obj,
+                    api_key=api_key,
+                    provider=provider,
+                    is_async_invoke=has_async_invoke,
+                )
+            returned_response = self._single_func_embeddings(
+                client=(
+                    client
+                    if client is not None and isinstance(client, HTTPHandler)
+                    else None
+                ),
+                timeout=timeout,
+                batch_data=batch_data,
+                credentials=credentials,
+                extra_headers=extra_headers,
+                endpoint_url=endpoint_url,
+                aws_region_name=aws_region_name,
+                model=model,
+                logging_obj=logging_obj,
+                api_key=api_key,
+                provider=provider,
+                is_async_invoke=has_async_invoke,
+            )
+            if returned_response is None:
+                raise Exception("Unable to map Bedrock request to provider")
+            return returned_response
+        elif data is None:
+            raise Exception("Unable to map Bedrock request to provider")
+
+        headers = {"Content-Type": "application/json"}
+        if extra_headers is not None:
+            headers = {"Content-Type": "application/json", **extra_headers}
+
+        prepped = self.get_request_headers(  # type: ignore
+            credentials=credentials,
+            aws_region_name=aws_region_name,
+            extra_headers=extra_headers,
+            endpoint_url=endpoint_url,
+            data=json.dumps(data),
+            headers=headers,
+            api_key=api_key,
+        )
+
+        ## ROUTING ##
+        # Convert CaseInsensitiveDict to regular dict for httpx compatibility
+        headers_for_request = (
+            dict(prepped.headers) if hasattr(prepped, "headers") else {}
+        )
+        return cohere_embedding(
+            model=model,
+            input=input,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            optional_params=optional_params,
+            encoding=encoding,
+            data=data,  # type: ignore
+            complete_api_base=prepped.url,
+            api_key=None,
+            aembedding=aembedding,
+            timeout=timeout,
+            client=client,
+            headers=headers_for_request,
+        )
+
+    async def _get_async_invoke_status(
+        self, invocation_arn: str, aws_region_name: str, logging_obj=None, **kwargs
+    ) -> dict:
+        """
+        Get the status of an async invoke job using the GetAsyncInvoke operation.
+
+        Args:
+            invocation_arn: The invocation ARN from the async invoke response
+            aws_region_name: AWS region name
+            **kwargs: Additional parameters (credentials, etc.)
+
+        Returns:
+            dict: Status response from AWS Bedrock
+        """
+
+        # Get AWS credentials using the same method as other Bedrock methods
+        credentials, _ = self._load_credentials(kwargs)
+
+        # Get the runtime endpoint
+        endpoint_url, _ = self.get_runtime_endpoint(
+            api_base=None,
+            aws_bedrock_runtime_endpoint=kwargs.get("aws_bedrock_runtime_endpoint"),
+            aws_region_name=aws_region_name,
+        )
+
+        from urllib.parse import quote
+
+        # Encode the ARN for use in URL path
+        encoded_arn = quote(invocation_arn, safe="")
+        status_url = f"{endpoint_url.rstrip('/')}/async-invoke/{encoded_arn}"
+
+        # Prepare headers for GET request
+        headers = {"Content-Type": "application/json"}
+
+        # Use AWSRequest directly for GET requests (get_request_headers hardcodes POST)
+        try:
+            from botocore.auth import SigV4Auth
+            from botocore.awsrequest import AWSRequest
+        except ImportError:
+            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+
+        # Create AWSRequest with GET method and encoded URL
+        request = AWSRequest(
+            method="GET",
+            url=status_url,
+            data=None,  # GET request, no body
+            headers=headers,
+        )
+
+        # Sign the request - SigV4Auth will create canonical string from request URL
+        sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
+        sigv4.add_auth(request)
+
+        # Prepare the request
+        prepped = request.prepare()
+
+        # LOGGING
+        if logging_obj is not None:
+            # Create custom curl command for GET request
+            masked_headers = logging_obj._get_masked_headers(prepped.headers)
+            formatted_headers = " ".join(
+                [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
+            )
+            custom_curl = "\n\nGET Request Sent from LiteLLM:\n"
+            custom_curl += "curl -X GET \\\n"
+            custom_curl += f"{prepped.url} \\\n"
+            custom_curl += f"{formatted_headers}\n"
+
+            logging_obj.pre_call(
+                input=invocation_arn,
+                api_key="",
+                additional_args={
+                    "complete_input_dict": {"invocation_arn": invocation_arn},
+                    "api_base": prepped.url,
+                    "headers": prepped.headers,
+                    "request_str": custom_curl,  # Override with custom GET curl command
+                },
+            )
+
+        # Make the GET request
+        client = get_async_httpx_client(llm_provider=LlmProviders.BEDROCK)
+        response = await client.get(
+            url=prepped.url,
+            headers=prepped.headers,
+        )
+
+        # LOGGING
+        if logging_obj is not None:
+            logging_obj.post_call(
+                input=invocation_arn,
+                api_key="",
+                original_response=response,
+                additional_args={
+                    "complete_input_dict": {"invocation_arn": invocation_arn}
+                },
+            )
+
+        # Parse response
+        if response.status_code == 200:
+            return response.json()
+        else:
+            raise Exception(
+                f"Failed to get async invoke status: {response.status_code} - {response.text}"
+            )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/twelvelabs_marengo_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/bedrock/embed/twelvelabs_marengo_transformation.py
@@ -0,0 +1,304 @@
+"""
+Transformation logic from OpenAI /v1/embeddings format to Bedrock TwelveLabs Marengo /invoke and /async-invoke format.
+
+Why separate file? Make it easy to see how transformation works
+
+Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-marengo.html
+"""
+
+from typing import List, Optional, Union, cast
+
+from litellm.types.llms.bedrock import (
+    TWELVELABS_EMBEDDING_INPUT_TYPES,
+    TwelveLabsAsyncInvokeRequest,
+    TwelveLabsMarengoEmbeddingRequest,
+    TwelveLabsOutputDataConfig,
+    TwelveLabsS3Location,
+    TwelveLabsS3OutputDataConfig,
+)
+from litellm.types.utils import Embedding, EmbeddingResponse, Usage
+
+
+class TwelveLabsMarengoEmbeddingConfig:
+    """
+    Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-marengo.html
+
+    Supports text, image, video, and audio inputs.
+    - InvokeModel: text and image inputs
+    - StartAsyncInvoke: video, audio, image, and text inputs
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def get_supported_openai_params(self) -> List[str]:
+        return [
+            "encoding_format",
+            "textTruncate",
+            "embeddingOption",
+            "startSec",
+            "lengthSec",
+            "useFixedLengthSec",
+            "minClipSec",
+            "input_type",
+        ]
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict
+    ) -> dict:
+        for k, v in non_default_params.items():
+            if k == "encoding_format":
+                # TwelveLabs doesn't have encoding_format, but we can map it to embeddingOption
+                if v == "float":
+                    optional_params["embeddingOption"] = ["visual-text", "visual-image"]
+            elif k == "textTruncate":
+                optional_params["textTruncate"] = v
+            elif k == "embeddingOption":
+                optional_params["embeddingOption"] = v
+            elif k == "input_type":
+                # Map input_type to inputType for Bedrock
+                optional_params["inputType"] = v
+            elif k in ["startSec", "lengthSec", "useFixedLengthSec", "minClipSec"]:
+                optional_params[k] = v
+        return optional_params
+
+    def _extract_bucket_owner_from_params(self, inference_params: dict) -> str:
+        """
+        Extract bucket owner from inference parameters.
+        """
+        return inference_params.get("bucketOwner", "")
+
+    def _is_s3_url(self, input: str) -> bool:
+        """Check if input is an S3 URL."""
+        return input.startswith("s3://")
+
+    def _transform_request(
+        self,
+        input: str,
+        inference_params: dict,
+        async_invoke_route: bool = False,
+        model_id: Optional[str] = None,
+        output_s3_uri: Optional[str] = None,
+    ) -> Union[TwelveLabsMarengoEmbeddingRequest, TwelveLabsAsyncInvokeRequest]:
+        """
+        Transform OpenAI-style input to TwelveLabs Marengo format/async-invoke format.
+
+        Supports:
+        - Text inputs (for both invoke and async-invoke)
+        - Image inputs (for both invoke and async-invoke)
+        - Video inputs (async-invoke only)
+        - Audio inputs (async-invoke only)
+        - S3 URLs for all media types (async-invoke only)
+        """
+        # Get input_type or default to "text"
+        input_type = cast(
+            TWELVELABS_EMBEDDING_INPUT_TYPES,
+            inference_params.get("inputType")
+            or inference_params.get("input_type")
+            or "text",
+        )
+
+        # Validate that async-invoke is used for video/audio
+        if input_type in ["video", "audio"] and not async_invoke_route:
+            raise ValueError(
+                f"Input type '{input_type}' requires async_invoke route. "
+                f"Use model format: 'bedrock/async_invoke/model_id'"
+            )
+
+        transformed_request: TwelveLabsMarengoEmbeddingRequest = {
+            "inputType": input_type
+        }
+
+        if input_type == "text":
+            transformed_request["inputText"] = input
+            # Set default textTruncate if not specified
+            if "textTruncate" not in inference_params:
+                transformed_request["textTruncate"] = "end"
+
+        elif input_type in ["image", "video", "audio"]:
+            if self._is_s3_url(input):
+                # S3 URL input
+                s3_location: TwelveLabsS3Location = {"uri": input}
+                bucket_owner = self._extract_bucket_owner_from_params(inference_params)
+                if bucket_owner:
+                    s3_location["bucketOwner"] = bucket_owner
+
+                transformed_request["mediaSource"] = {"s3Location": s3_location}
+            else:
+                # Base64 encoded input
+                if input.startswith("data:"):
+                    # Extract base64 data from data URL
+                    b64_str = input.split(",", 1)[1] if "," in input else input
+                else:
+                    # Direct base64 string
+                    from litellm.utils import get_base64_str
+
+                    b64_str = get_base64_str(input)
+
+                transformed_request["mediaSource"] = {"base64String": b64_str}
+
+        # Apply any additional inference parameters
+        for k, v in inference_params.items():
+            if k not in [
+                "inputType",
+                "input_type",  # Exclude both camelCase and snake_case
+                "inputText",
+                "mediaSource",
+                "bucketOwner",  # Don't include bucketOwner in the request
+            ]:  # Don't override core fields
+                transformed_request[k] = v  # type: ignore
+
+        # If async invoke route, wrap in the async invoke format
+        if async_invoke_route and model_id:
+            return self._wrap_async_invoke_request(
+                model_input=transformed_request,
+                model_id=model_id,
+                output_s3_uri=output_s3_uri,
+            )
+
+        return transformed_request
+
+    def _wrap_async_invoke_request(
+        self,
+        model_input: TwelveLabsMarengoEmbeddingRequest,
+        model_id: str,
+        output_s3_uri: Optional[str] = None,
+    ) -> TwelveLabsAsyncInvokeRequest:
+        """
+        Wrap the transformed request in the correct AWS Bedrock async invoke format.
+
+        Args:
+            model_input: The transformed TwelveLabs Marengo embedding request
+            model_id: The model identifier (without async_invoke prefix)
+            output_s3_uri: Optional S3 URI for output data config
+
+        Returns:
+            TwelveLabsAsyncInvokeRequest: The wrapped async invoke request
+        """
+        import urllib.parse
+
+        # Clean the model ID
+        unquoted_model_id = urllib.parse.unquote(model_id)
+        if unquoted_model_id.startswith("async_invoke/"):
+            unquoted_model_id = unquoted_model_id.replace("async_invoke/", "")
+
+        # Validate that the S3 URI is not empty
+        if not output_s3_uri or output_s3_uri.strip() == "":
+            raise ValueError("output_s3_uri cannot be empty for async invoke requests")
+
+        return TwelveLabsAsyncInvokeRequest(
+            modelId=unquoted_model_id,
+            modelInput=model_input,
+            outputDataConfig=TwelveLabsOutputDataConfig(
+                s3OutputDataConfig=TwelveLabsS3OutputDataConfig(s3Uri=output_s3_uri)
+            ),
+        )
+
+    def _transform_response(
+        self, response_list: List[dict], model: str
+    ) -> EmbeddingResponse:
+        """
+        Transform TwelveLabs response to OpenAI format.
+        Handles the actual TwelveLabs response format: {"data": [{"embedding": [...]}]}
+        """
+        embeddings: List[Embedding] = []
+        total_tokens = 0
+
+        for response in response_list:
+            # TwelveLabs response format has a "data" field containing the embeddings
+            if "data" in response and isinstance(response["data"], list):
+                for item in response["data"]:
+                    if "embedding" in item:
+                        # Single embedding response
+                        embedding = Embedding(
+                            embedding=item["embedding"],
+                            index=len(embeddings),
+                            object="embedding",
+                        )
+                        embeddings.append(embedding)
+
+                        # Estimate token count (rough approximation)
+                        if "inputTextTokenCount" in item:
+                            total_tokens += item["inputTextTokenCount"]
+                        else:
+                            # Rough estimate: 1 token per 4 characters for text, or use embedding size
+                            total_tokens += len(item["embedding"]) // 4
+            elif "embedding" in response:
+                # Direct embedding response (fallback for other formats)
+                embedding = Embedding(
+                    embedding=response["embedding"],
+                    index=len(embeddings),
+                    object="embedding",
+                )
+                embeddings.append(embedding)
+
+                # Estimate token count (rough approximation)
+                if "inputTextTokenCount" in response:
+                    total_tokens += response["inputTextTokenCount"]
+                else:
+                    # Rough estimate: 1 token per 4 characters for text
+                    total_tokens += len(response.get("inputText", "")) // 4
+            elif "embeddings" in response:
+                # Multiple embeddings response (from video/audio)
+                for i, emb in enumerate(response["embeddings"]):
+                    embedding = Embedding(
+                        embedding=emb["embedding"],
+                        index=len(embeddings),
+                        object="embedding",
+                    )
+                    embeddings.append(embedding)
+                    total_tokens += len(emb["embedding"]) // 4  # Rough estimate
+
+        usage = Usage(prompt_tokens=total_tokens, total_tokens=total_tokens)
+
+        return EmbeddingResponse(data=embeddings, model=model, usage=usage)
+
+    def _transform_async_invoke_response(
+        self, response: dict, model: str
+    ) -> EmbeddingResponse:
+        """
+        Transform async invoke response (invocation ARN) to OpenAI format.
+
+        AWS async invoke returns:
+        {
+            "invocationArn": "arn:aws:bedrock:us-east-1:123456789012:async-invoke/abc123"
+        }
+
+        We transform this to a job-like embedding response:
+        {
+            "object": "list",
+            "data": [
+                {
+                    "object": "embedding_job_id:1234567890",
+                    "embedding": [],
+                    "index": 0
+                }
+            ],
+            "model": "model",
+            "usage": {}
+        }
+        """
+        invocation_arn = response.get("invocationArn", "")
+
+        # Create a placeholder embedding object for the job
+        embedding = Embedding(
+            embedding=[],  # Empty embedding for async jobs
+            index=0,
+            object="embedding",
+        )
+
+        # Create usage object (empty for async jobs)
+        usage = Usage(prompt_tokens=0, total_tokens=0)
+
+        # Create hidden params with job ID
+        from litellm.types.llms.base import HiddenParams
+
+        hidden_params = HiddenParams()
+        setattr(hidden_params, "_invocation_arn", invocation_arn)
+
+        return EmbeddingResponse(
+            data=[embedding],
+            model=model,
+            usage=usage,
+            hidden_params=hidden_params,
+        )