chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,361 @@
"""
Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Nova /invoke and /async-invoke format.
Why separate file? Make it easy to see how transformation works
Supports:
- Synchronous embeddings (SINGLE_EMBEDDING)
- Asynchronous embeddings with segmentation (SEGMENTED_EMBEDDING)
- Multimodal inputs: text, image, video, audio
- Multiple embedding purposes and dimensions
Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/nova-embed.html
"""
from typing import List, Optional
from litellm.types.utils import (
Embedding,
EmbeddingResponse,
PromptTokensDetailsWrapper,
Usage,
)
class AmazonNovaEmbeddingConfig:
"""
Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/nova-embed.html
Amazon Nova Multimodal Embeddings supports:
- Text, image, video, and audio inputs
- Synchronous (InvokeModel) and asynchronous (StartAsyncInvoke) APIs
- Multiple embedding purposes and dimensions
"""
def __init__(self) -> None:
pass
def get_supported_openai_params(self) -> List[str]:
return [
"dimensions",
]
def map_openai_params(
self, non_default_params: dict, optional_params: dict
) -> dict:
"""Map OpenAI-style parameters to Nova parameters."""
for k, v in non_default_params.items():
if k == "dimensions":
# Map OpenAI dimensions to Nova embedding_dimension
optional_params["embedding_dimension"] = v
elif k in self.get_supported_openai_params():
optional_params[k] = v
return optional_params
def _parse_data_url(self, data_url: str) -> tuple:
"""
Parse a data URL to extract the media type and base64 data.
Args:
data_url: Data URL in format: data:image/jpeg;base64,/9j/4AAQ...
Returns:
tuple: (media_type, base64_data)
media_type: e.g., "image/jpeg", "video/mp4", "audio/mpeg"
base64_data: The base64-encoded data without the prefix
"""
if not data_url.startswith("data:"):
raise ValueError(f"Invalid data URL format: {data_url[:50]}...")
# Split by comma to separate metadata from data
# Format: data:image/jpeg;base64,<base64_data>
if "," not in data_url:
raise ValueError(
f"Invalid data URL format (missing comma): {data_url[:50]}..."
)
metadata, base64_data = data_url.split(",", 1)
# Extract media type from metadata
# Remove 'data:' prefix and ';base64' suffix
metadata = metadata[5:] # Remove 'data:'
if ";" in metadata:
media_type = metadata.split(";")[0]
else:
media_type = metadata
return media_type, base64_data
def _transform_request(
self,
input: str,
inference_params: dict,
async_invoke_route: bool = False,
model_id: Optional[str] = None,
output_s3_uri: Optional[str] = None,
) -> dict:
"""
Transform OpenAI-style input to Nova format.
Only handles OpenAI params (dimensions). All other Nova-specific params
should be passed via inference_params and will be passed through as-is.
Args:
input: The input text or media reference
inference_params: Additional parameters (will be passed through)
async_invoke_route: Whether this is for async invoke
model_id: Model ID (for async invoke)
output_s3_uri: S3 URI for output (for async invoke)
Returns:
dict: Nova embedding request
"""
# Determine task type
task_type = "SEGMENTED_EMBEDDING" if async_invoke_route else "SINGLE_EMBEDDING"
# Build the base request structure
request: dict = {
"schemaVersion": "nova-multimodal-embed-v1",
"taskType": task_type,
}
# Start with inference_params (user-provided params)
embedding_params = inference_params.copy()
embedding_params.pop("output_s3_uri", None)
# Map OpenAI dimensions to embeddingDimension if provided
if "dimensions" in embedding_params:
embedding_params["embeddingDimension"] = embedding_params.pop("dimensions")
elif "embedding_dimension" in embedding_params:
embedding_params["embeddingDimension"] = embedding_params.pop(
"embedding_dimension"
)
# Add required embeddingPurpose if not provided (required by Nova API)
if "embeddingPurpose" not in embedding_params:
embedding_params["embeddingPurpose"] = "GENERIC_INDEX"
# Add required embeddingDimension if not provided (required by Nova API)
if "embeddingDimension" not in embedding_params:
embedding_params["embeddingDimension"] = 3072
# For text/media input, add basic structure if user hasn't provided text/image/video/audio
if (
"text" not in embedding_params
and "image" not in embedding_params
and "video" not in embedding_params
and "audio" not in embedding_params
):
# Check if input is a data URL (e.g., data:image/jpeg;base64,...)
if input.startswith("data:"):
# Parse the data URL to extract media type and base64 data
media_type, base64_data = self._parse_data_url(input)
if media_type.startswith("image/"):
# Extract image format from MIME type (e.g., image/jpeg -> jpeg)
image_format = media_type.split("/")[1].lower()
# Nova API expects specific formats
if image_format == "jpg":
image_format = "jpeg"
embedding_params["image"] = {
"format": image_format,
"source": {"bytes": base64_data},
}
elif media_type.startswith("video/"):
# Handle video data URLs
video_format = media_type.split("/")[1].lower()
embedding_params["video"] = {
"format": video_format,
"source": {"bytes": base64_data},
}
elif media_type.startswith("audio/"):
# Handle audio data URLs
audio_format = media_type.split("/")[1].lower()
embedding_params["audio"] = {
"format": audio_format,
"source": {"bytes": base64_data},
}
else:
# Fallback to text for unknown types
embedding_params["text"] = {"value": input, "truncationMode": "END"}
elif input.startswith("s3://"):
# S3 URL - default to text for now, user should specify modality
embedding_params["text"] = {
"source": {"s3Location": {"uri": input}},
"truncationMode": "END", # Required by Nova API
}
else:
# Plain text input
embedding_params["text"] = {
"value": input,
"truncationMode": "END", # Required by Nova API
}
# Set the embedding params in the request
if task_type == "SINGLE_EMBEDDING":
request["singleEmbeddingParams"] = embedding_params
else:
request["segmentedEmbeddingParams"] = embedding_params
# For async invoke, wrap in the async invoke format
if async_invoke_route and model_id:
return self._wrap_async_invoke_request(
model_input=request,
model_id=model_id,
output_s3_uri=output_s3_uri,
)
return request
def _wrap_async_invoke_request(
self,
model_input: dict,
model_id: str,
output_s3_uri: Optional[str] = None,
) -> dict:
"""
Wrap the transformed request in the AWS Bedrock async invoke format.
Args:
model_input: The transformed Nova embedding request
model_id: The model identifier (without async_invoke prefix)
output_s3_uri: S3 URI for output data config
Returns:
dict: The wrapped async invoke request
"""
import urllib.parse
# Clean the model ID
unquoted_model_id = urllib.parse.unquote(model_id)
if unquoted_model_id.startswith("async_invoke/"):
unquoted_model_id = unquoted_model_id.replace("async_invoke/", "")
# Validate that the S3 URI is not empty
if not output_s3_uri or output_s3_uri.strip() == "":
raise ValueError("output_s3_uri is required for async invoke requests")
return {
"modelId": unquoted_model_id,
"modelInput": model_input,
"outputDataConfig": {"s3OutputDataConfig": {"s3Uri": output_s3_uri}},
}
def _transform_response(
self,
response_list: List[dict],
model: str,
batch_data: Optional[List[dict]] = None,
) -> EmbeddingResponse:
"""
Transform Nova response to OpenAI format.
Nova response format:
{
"embeddings": [
{
"embeddingType": "TEXT" | "IMAGE" | "VIDEO" | "AUDIO" | "AUDIO_VIDEO_COMBINED",
"embedding": [0.1, 0.2, ...],
"truncatedCharLength": 100 # Optional, only for text
}
]
}
"""
embeddings: List[Embedding] = []
total_tokens = 0
for response in response_list:
# Nova response has an "embeddings" array
if "embeddings" in response and isinstance(response["embeddings"], list):
for item in response["embeddings"]:
if "embedding" in item:
embedding = Embedding(
embedding=item["embedding"],
index=len(embeddings),
object="embedding",
)
embeddings.append(embedding)
# Estimate token count
# For text, use truncatedCharLength if available
if "truncatedCharLength" in item:
total_tokens += item["truncatedCharLength"] // 4
else:
# Rough estimate based on embedding dimension
total_tokens += len(item["embedding"]) // 4
elif "embedding" in response:
# Direct embedding response (fallback)
embedding = Embedding(
embedding=response["embedding"],
index=len(embeddings),
object="embedding",
)
embeddings.append(embedding)
total_tokens += len(response["embedding"]) // 4
# Count images from original requests for cost calculation
image_count = 0
if batch_data:
for request_data in batch_data:
# Nova wraps params in singleEmbeddingParams or segmentedEmbeddingParams
params = request_data.get(
"singleEmbeddingParams",
request_data.get("segmentedEmbeddingParams", {}),
)
if "image" in params:
image_count += 1
prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
if image_count > 0:
prompt_tokens_details = PromptTokensDetailsWrapper(
image_count=image_count,
)
usage = Usage(
prompt_tokens=total_tokens,
total_tokens=total_tokens,
prompt_tokens_details=prompt_tokens_details,
)
return EmbeddingResponse(data=embeddings, model=model, usage=usage)
def _transform_async_invoke_response(
self, response: dict, model: str
) -> EmbeddingResponse:
"""
Transform async invoke response (invocation ARN) to OpenAI format.
AWS async invoke returns:
{
"invocationArn": "arn:aws:bedrock:us-east-1:123456789012:async-invoke/abc123"
}
We transform this to a job-like embedding response with the ARN in hidden params.
"""
invocation_arn = response.get("invocationArn", "")
# Create a placeholder embedding object for the job
embedding = Embedding(
embedding=[], # Empty embedding for async jobs
index=0,
object="embedding",
)
# Create usage object (empty for async jobs)
usage = Usage(prompt_tokens=0, total_tokens=0)
# Create hidden params with job ID
from litellm.types.llms.base import HiddenParams
hidden_params = HiddenParams()
setattr(hidden_params, "_invocation_arn", invocation_arn)
return EmbeddingResponse(
data=[embedding],
model=model,
usage=usage,
hidden_params=hidden_params,
)

View File

@@ -0,0 +1,88 @@
"""
Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Titan G1 /invoke format.
Why separate file? Make it easy to see how transformation works
Convers
- G1 request format
Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html
"""
import types
from typing import List
from litellm.types.llms.bedrock import (
AmazonTitanG1EmbeddingRequest,
AmazonTitanG1EmbeddingResponse,
)
from litellm.types.utils import Embedding, EmbeddingResponse, Usage
class AmazonTitanG1Config:
"""
Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html
"""
def __init__(
self,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self) -> List[str]:
return []
def map_openai_params(
self, non_default_params: dict, optional_params: dict
) -> dict:
return optional_params
def _transform_request(
self, input: str, inference_params: dict
) -> AmazonTitanG1EmbeddingRequest:
return AmazonTitanG1EmbeddingRequest(inputText=input)
def _transform_response(
self, response_list: List[dict], model: str
) -> EmbeddingResponse:
total_prompt_tokens = 0
transformed_responses: List[Embedding] = []
for index, response in enumerate(response_list):
_parsed_response = AmazonTitanG1EmbeddingResponse(**response) # type: ignore
transformed_responses.append(
Embedding(
embedding=_parsed_response["embedding"],
index=index,
object="embedding",
)
)
total_prompt_tokens += _parsed_response["inputTextTokenCount"]
usage = Usage(
prompt_tokens=total_prompt_tokens,
completion_tokens=0,
total_tokens=total_prompt_tokens,
)
return EmbeddingResponse(model=model, usage=usage, data=transformed_responses)

View File

@@ -0,0 +1,101 @@
"""
Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Titan multimodal /invoke format.
Why separate file? Make it easy to see how transformation works
Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-mm.html
"""
from typing import List, Optional
from litellm.types.llms.bedrock import (
AmazonTitanMultimodalEmbeddingConfig,
AmazonTitanMultimodalEmbeddingRequest,
AmazonTitanMultimodalEmbeddingResponse,
)
from litellm.types.utils import (
Embedding,
EmbeddingResponse,
PromptTokensDetailsWrapper,
Usage,
)
from litellm.utils import get_base64_str, is_base64_encoded
class AmazonTitanMultimodalEmbeddingG1Config:
"""
Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-mm.html
"""
def __init__(self) -> None:
pass
def get_supported_openai_params(self) -> List[str]:
return ["dimensions"]
def map_openai_params(
self, non_default_params: dict, optional_params: dict
) -> dict:
for k, v in non_default_params.items():
if k == "dimensions":
optional_params[
"embeddingConfig"
] = AmazonTitanMultimodalEmbeddingConfig(outputEmbeddingLength=v)
return optional_params
def _transform_request(
self, input: str, inference_params: dict
) -> AmazonTitanMultimodalEmbeddingRequest:
## check if b64 encoded str or not ##
is_encoded = is_base64_encoded(input)
if is_encoded: # check if string is b64 encoded image or not
b64_str = get_base64_str(input)
transformed_request = AmazonTitanMultimodalEmbeddingRequest(
inputImage=b64_str
)
else:
transformed_request = AmazonTitanMultimodalEmbeddingRequest(inputText=input)
for k, v in inference_params.items():
transformed_request[k] = v # type: ignore
return transformed_request
def _transform_response(
self,
response_list: List[dict],
model: str,
batch_data: Optional[List[dict]] = None,
) -> EmbeddingResponse:
total_prompt_tokens = 0
transformed_responses: List[Embedding] = []
for index, response in enumerate(response_list):
_parsed_response = AmazonTitanMultimodalEmbeddingResponse(**response) # type: ignore
transformed_responses.append(
Embedding(
embedding=_parsed_response["embedding"],
index=index,
object="embedding",
)
)
total_prompt_tokens += _parsed_response["inputTextTokenCount"]
# Count images from original requests for cost calculation
image_count = 0
if batch_data:
for request_data in batch_data:
if "inputImage" in request_data:
image_count += 1
prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
if image_count > 0:
prompt_tokens_details = PromptTokensDetailsWrapper(
image_count=image_count,
)
usage = Usage(
prompt_tokens=total_prompt_tokens,
completion_tokens=0,
total_tokens=total_prompt_tokens,
prompt_tokens_details=prompt_tokens_details,
)
return EmbeddingResponse(model=model, usage=usage, data=transformed_responses)

View File

@@ -0,0 +1,131 @@
"""
Transformation logic from OpenAI /v1/embeddings format to Bedrock Amazon Titan V2 /invoke format.
Why separate file? Make it easy to see how transformation works
Convers
- v2 request format
Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html
"""
import types
from typing import List, Optional, Union
from litellm.types.llms.bedrock import (
AmazonTitanV2EmbeddingRequest,
AmazonTitanV2EmbeddingResponse,
)
from litellm.types.utils import Embedding, EmbeddingResponse, Usage
class AmazonTitanV2Config:
"""
Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html
normalize: boolean - flag indicating whether or not to normalize the output embeddings. Defaults to true
dimensions: int - The number of dimensions the output embeddings should have. The following values are accepted: 1024 (default), 512, 256.
"""
normalize: Optional[bool] = None
dimensions: Optional[int] = None
def __init__(
self, normalize: Optional[bool] = None, dimensions: Optional[int] = None
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self) -> List[str]:
return ["dimensions", "encoding_format"]
def map_openai_params(
self, non_default_params: dict, optional_params: dict
) -> dict:
for k, v in non_default_params.items():
if k == "dimensions":
optional_params["dimensions"] = v
elif k == "encoding_format":
# Map OpenAI encoding_format to AWS embeddingTypes
if v == "float":
optional_params["embeddingTypes"] = ["float"]
elif v == "base64":
# base64 maps to binary format in AWS
optional_params["embeddingTypes"] = ["binary"]
else:
# For any other encoding format, default to float
optional_params["embeddingTypes"] = ["float"]
return optional_params
def _transform_request(
self, input: str, inference_params: dict
) -> AmazonTitanV2EmbeddingRequest:
return AmazonTitanV2EmbeddingRequest(inputText=input, **inference_params) # type: ignore
def _transform_response(
self, response_list: List[dict], model: str
) -> EmbeddingResponse:
total_prompt_tokens = 0
transformed_responses: List[Embedding] = []
for index, response in enumerate(response_list):
_parsed_response = AmazonTitanV2EmbeddingResponse(**response) # type: ignore
# According to AWS docs, embeddingsByType is always present
# If binary was requested (encoding_format="base64"), use binary data
# Otherwise, use float data from embeddingsByType or fallback to embedding field
embedding_data: Union[List[float], List[int]]
if (
"embeddingsByType" in _parsed_response
and "binary" in _parsed_response["embeddingsByType"]
):
# Use binary data if available (for encoding_format="base64")
embedding_data = _parsed_response["embeddingsByType"]["binary"]
elif (
"embeddingsByType" in _parsed_response
and "float" in _parsed_response["embeddingsByType"]
):
# Use float data from embeddingsByType
embedding_data = _parsed_response["embeddingsByType"]["float"]
elif "embedding" in _parsed_response:
# Fallback to legacy embedding field
embedding_data = _parsed_response["embedding"]
else:
raise ValueError(f"No embedding data found in response: {response}")
transformed_responses.append(
Embedding(
embedding=embedding_data,
index=index,
object="embedding",
)
)
total_prompt_tokens += _parsed_response["inputTextTokenCount"]
usage = Usage(
prompt_tokens=total_prompt_tokens,
completion_tokens=0,
total_tokens=total_prompt_tokens,
)
return EmbeddingResponse(model=model, usage=usage, data=transformed_responses)

View File

@@ -0,0 +1,47 @@
"""
Transformation logic from OpenAI /v1/embeddings format to Bedrock Cohere /invoke format.
Why separate file? Make it easy to see how transformation works
"""
from typing import List
from litellm.llms.cohere.embed.transformation import CohereEmbeddingConfig
from litellm.types.llms.bedrock import CohereEmbeddingRequest
class BedrockCohereEmbeddingConfig:
def __init__(self) -> None:
pass
def get_supported_openai_params(self) -> List[str]:
return ["encoding_format", "dimensions"]
def map_openai_params(
self, non_default_params: dict, optional_params: dict
) -> dict:
for k, v in non_default_params.items():
if k == "encoding_format":
optional_params["embedding_types"] = v
elif k == "dimensions":
optional_params["output_dimension"] = v
return optional_params
def _is_v3_model(self, model: str) -> bool:
return "3" in model
def _transform_request(
self, model: str, input: List[str], inference_params: dict
) -> CohereEmbeddingRequest:
transformed_request = CohereEmbeddingConfig()._transform_request(
model, input, inference_params
)
new_transformed_request = CohereEmbeddingRequest(
input_type=transformed_request["input_type"],
)
for k in CohereEmbeddingRequest.__annotations__.keys():
if k in transformed_request:
new_transformed_request[k] = transformed_request[k] # type: ignore
return new_transformed_request

View File

@@ -0,0 +1,699 @@
"""
Handles embedding calls to Bedrock's `/invoke` endpoint
"""
import copy
import json
import urllib.parse
from typing import Any, Callable, List, Optional, Tuple, Union, get_args
import httpx
import litellm
from litellm.constants import BEDROCK_EMBEDDING_PROVIDERS_LITERAL
from litellm.llms.cohere.embed.handler import embedding as cohere_embedding
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
_get_httpx_client,
get_async_httpx_client,
)
from litellm.secret_managers.main import get_secret
from litellm.types.llms.bedrock import (
AmazonEmbeddingRequest,
CohereEmbeddingRequest,
)
from litellm.types.utils import EmbeddingResponse, LlmProviders
from ..base_aws_llm import BaseAWSLLM
from ..common_utils import BedrockError
from .amazon_nova_transformation import AmazonNovaEmbeddingConfig
from .amazon_titan_g1_transformation import AmazonTitanG1Config
from .amazon_titan_multimodal_transformation import (
AmazonTitanMultimodalEmbeddingG1Config,
)
from .amazon_titan_v2_transformation import AmazonTitanV2Config
from .cohere_transformation import BedrockCohereEmbeddingConfig
from .twelvelabs_marengo_transformation import TwelveLabsMarengoEmbeddingConfig
class BedrockEmbedding(BaseAWSLLM):
def _load_credentials(
self,
optional_params: dict,
) -> Tuple[Any, str]:
try:
from botocore.credentials import Credentials
except ImportError:
raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
## CREDENTIALS ##
# pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
aws_access_key_id = optional_params.pop("aws_access_key_id", None)
aws_session_token = optional_params.pop("aws_session_token", None)
aws_region_name = optional_params.pop("aws_region_name", None)
aws_role_name = optional_params.pop("aws_role_name", None)
aws_session_name = optional_params.pop("aws_session_name", None)
aws_profile_name = optional_params.pop("aws_profile_name", None)
aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
aws_sts_endpoint = optional_params.pop("aws_sts_endpoint", None)
### SET REGION NAME ###
if aws_region_name is None:
# check env #
litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
if litellm_aws_region_name is not None and isinstance(
litellm_aws_region_name, str
):
aws_region_name = litellm_aws_region_name
standard_aws_region_name = get_secret("AWS_REGION", None)
if standard_aws_region_name is not None and isinstance(
standard_aws_region_name, str
):
aws_region_name = standard_aws_region_name
if aws_region_name is None:
aws_region_name = "us-west-2"
credentials: Credentials = self.get_credentials( # type: ignore
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_session_token=aws_session_token,
aws_region_name=aws_region_name,
aws_session_name=aws_session_name,
aws_profile_name=aws_profile_name,
aws_role_name=aws_role_name,
aws_web_identity_token=aws_web_identity_token,
aws_sts_endpoint=aws_sts_endpoint,
)
return credentials, aws_region_name
async def async_embeddings(self):
pass
def _make_sync_call(
self,
client: Optional[HTTPHandler],
timeout: Optional[Union[float, httpx.Timeout]],
api_base: str,
headers: dict,
data: dict,
) -> dict:
if client is None or not isinstance(client, HTTPHandler):
_params = {}
if timeout is not None:
if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout
client = _get_httpx_client(_params) # type: ignore
else:
client = client
try:
response = client.post(url=api_base, headers=headers, data=json.dumps(data)) # type: ignore
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
raise BedrockError(status_code=error_code, message=err.response.text)
except httpx.TimeoutException:
raise BedrockError(status_code=408, message="Timeout error occurred.")
return response.json()
async def _make_async_call(
self,
client: Optional[AsyncHTTPHandler],
timeout: Optional[Union[float, httpx.Timeout]],
api_base: str,
headers: dict,
data: dict,
) -> dict:
if client is None or not isinstance(client, AsyncHTTPHandler):
_params = {}
if timeout is not None:
if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout
client = get_async_httpx_client(
params=_params, llm_provider=litellm.LlmProviders.BEDROCK
)
else:
client = client
try:
response = await client.post(url=api_base, headers=headers, data=json.dumps(data)) # type: ignore
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
raise BedrockError(status_code=error_code, message=err.response.text)
except httpx.TimeoutException:
raise BedrockError(status_code=408, message="Timeout error occurred.")
return response.json()
def _transform_response(
self,
response_list: List[dict],
model: str,
provider: BEDROCK_EMBEDDING_PROVIDERS_LITERAL,
is_async_invoke: Optional[bool] = False,
batch_data: Optional[List[dict]] = None,
) -> Optional[EmbeddingResponse]:
"""
Transforms the response from the Bedrock embedding provider to the OpenAI format.
"""
returned_response: Optional[EmbeddingResponse] = None
# Handle async invoke responses (single response with invocationArn)
if (
is_async_invoke
and len(response_list) == 1
and "invocationArn" in response_list[0]
):
if provider == "twelvelabs":
returned_response = (
TwelveLabsMarengoEmbeddingConfig()._transform_async_invoke_response(
response=response_list[0], model=model
)
)
elif provider == "nova":
returned_response = (
AmazonNovaEmbeddingConfig()._transform_async_invoke_response(
response=response_list[0], model=model
)
)
else:
# For other providers, create a generic async response
invocation_arn = response_list[0].get("invocationArn", "")
from litellm.types.utils import Embedding, Usage
embedding = Embedding(
embedding=[],
index=0,
object="embedding", # Must be literal "embedding"
)
usage = Usage(prompt_tokens=0, total_tokens=0)
# Create hidden params with job ID
from litellm.types.llms.base import HiddenParams
hidden_params = HiddenParams()
setattr(hidden_params, "_invocation_arn", invocation_arn)
returned_response = EmbeddingResponse(
data=[embedding],
model=model,
usage=usage,
hidden_params=hidden_params,
)
else:
# Handle regular invoke responses
if model == "amazon.titan-embed-image-v1":
returned_response = (
AmazonTitanMultimodalEmbeddingG1Config()._transform_response(
response_list=response_list, model=model, batch_data=batch_data
)
)
elif model == "amazon.titan-embed-text-v1":
returned_response = AmazonTitanG1Config()._transform_response(
response_list=response_list, model=model
)
elif model == "amazon.titan-embed-text-v2:0":
returned_response = AmazonTitanV2Config()._transform_response(
response_list=response_list, model=model
)
elif provider == "twelvelabs":
returned_response = (
TwelveLabsMarengoEmbeddingConfig()._transform_response(
response_list=response_list, model=model
)
)
elif provider == "nova":
returned_response = AmazonNovaEmbeddingConfig()._transform_response(
response_list=response_list, model=model, batch_data=batch_data
)
##########################################################
# Validate returned response
##########################################################
if returned_response is None:
raise Exception(
"Unable to map model response to known provider format. model={}".format(
model
)
)
return returned_response
def _single_func_embeddings(
self,
client: Optional[HTTPHandler],
timeout: Optional[Union[float, httpx.Timeout]],
batch_data: List[dict],
credentials: Any,
extra_headers: Optional[dict],
endpoint_url: str,
aws_region_name: str,
model: str,
logging_obj: Any,
provider: BEDROCK_EMBEDDING_PROVIDERS_LITERAL,
api_key: Optional[str] = None,
is_async_invoke: Optional[bool] = False,
):
responses: List[dict] = []
for data in batch_data:
headers = {"Content-Type": "application/json"}
if extra_headers is not None:
headers = {"Content-Type": "application/json", **extra_headers}
prepped = self.get_request_headers( # type: ignore # type: ignore
credentials=credentials,
aws_region_name=aws_region_name,
extra_headers=extra_headers,
endpoint_url=endpoint_url,
data=json.dumps(data),
headers=headers,
api_key=api_key,
)
## LOGGING
logging_obj.pre_call(
input=data,
api_key="",
additional_args={
"complete_input_dict": data,
"api_base": prepped.url,
"headers": prepped.headers,
},
)
headers_for_request = (
dict(prepped.headers) if hasattr(prepped, "headers") else {}
)
response = self._make_sync_call(
client=client,
timeout=timeout,
api_base=prepped.url,
headers=headers_for_request,
data=data,
)
## LOGGING
logging_obj.post_call(
input=data,
api_key="",
original_response=response,
additional_args={"complete_input_dict": data},
)
responses.append(response)
return self._transform_response(
response_list=responses,
model=model,
provider=provider,
is_async_invoke=is_async_invoke,
batch_data=batch_data,
)
async def _async_single_func_embeddings(
self,
client: Optional[AsyncHTTPHandler],
timeout: Optional[Union[float, httpx.Timeout]],
batch_data: List[dict],
credentials: Any,
extra_headers: Optional[dict],
endpoint_url: str,
aws_region_name: str,
model: str,
logging_obj: Any,
provider: BEDROCK_EMBEDDING_PROVIDERS_LITERAL,
api_key: Optional[str] = None,
is_async_invoke: Optional[bool] = False,
):
responses: List[dict] = []
for data in batch_data:
headers = {"Content-Type": "application/json"}
if extra_headers is not None:
headers = {"Content-Type": "application/json", **extra_headers}
prepped = self.get_request_headers( # type: ignore # type: ignore
credentials=credentials,
aws_region_name=aws_region_name,
extra_headers=extra_headers,
endpoint_url=endpoint_url,
data=json.dumps(data),
headers=headers,
api_key=api_key,
)
## LOGGING
logging_obj.pre_call(
input=data,
api_key="",
additional_args={
"complete_input_dict": data,
"api_base": prepped.url,
"headers": prepped.headers,
},
)
# Convert CaseInsensitiveDict to regular dict for httpx compatibility
# This ensures custom headers are properly forwarded, especially with IAM roles and custom api_base
headers_for_request = (
dict(prepped.headers) if hasattr(prepped, "headers") else {}
)
response = await self._make_async_call(
client=client,
timeout=timeout,
api_base=prepped.url,
headers=headers_for_request,
data=data,
)
## LOGGING
logging_obj.post_call(
input=data,
api_key="",
original_response=response,
additional_args={"complete_input_dict": data},
)
responses.append(response)
## TRANSFORM RESPONSE ##
return self._transform_response(
response_list=responses,
model=model,
provider=provider,
is_async_invoke=is_async_invoke,
batch_data=batch_data,
)
def embeddings( # noqa: PLR0915
self,
model: str,
input: List[str],
api_base: Optional[str],
model_response: EmbeddingResponse,
print_verbose: Callable,
encoding,
logging_obj,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]],
timeout: Optional[Union[float, httpx.Timeout]],
aembedding: Optional[bool],
extra_headers: Optional[dict],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
) -> EmbeddingResponse:
credentials, aws_region_name = self._load_credentials(optional_params)
### TRANSFORMATION ###
unencoded_model_id = (
optional_params.pop("model_id", None) or model
) # default to model if not passed
modelId = urllib.parse.quote(unencoded_model_id, safe="")
aws_region_name = self._get_aws_region_name(
optional_params={"aws_region_name": aws_region_name},
model=model,
model_id=unencoded_model_id,
)
# Check async invoke needs to be used
has_async_invoke = "async_invoke/" in model
if has_async_invoke:
model = model.replace("async_invoke/", "", 1)
provider = self.get_bedrock_embedding_provider(model)
if provider is None:
raise Exception(
f"Unable to determine bedrock embedding provider for model: {model}. "
f"Supported providers: {list(get_args(BEDROCK_EMBEDDING_PROVIDERS_LITERAL))}"
)
inference_params = copy.deepcopy(optional_params)
inference_params = {
k: v
for k, v in inference_params.items()
if k.lower() not in self.aws_authentication_params
}
inference_params.pop(
"user", None
) # make sure user is not passed in for bedrock call
data: Optional[CohereEmbeddingRequest] = None
batch_data: Optional[List] = None
if provider == "cohere":
data = BedrockCohereEmbeddingConfig()._transform_request(
model=model, input=input, inference_params=inference_params
)
elif provider == "amazon" and model in [
"amazon.titan-embed-image-v1",
"amazon.titan-embed-text-v1",
"amazon.titan-embed-text-v2:0",
]:
batch_data = []
for i in input:
if model == "amazon.titan-embed-image-v1":
transformed_request: (
AmazonEmbeddingRequest
) = AmazonTitanMultimodalEmbeddingG1Config()._transform_request(
input=i, inference_params=inference_params
)
elif model == "amazon.titan-embed-text-v1":
transformed_request = AmazonTitanG1Config()._transform_request(
input=i, inference_params=inference_params
)
elif model == "amazon.titan-embed-text-v2:0":
transformed_request = AmazonTitanV2Config()._transform_request(
input=i, inference_params=inference_params
)
else:
raise Exception(
"Unmapped model. Received={}. Expected={}".format(
model,
[
"amazon.titan-embed-image-v1",
"amazon.titan-embed-text-v1",
"amazon.titan-embed-text-v2:0",
],
)
)
batch_data.append(transformed_request)
elif provider == "twelvelabs":
batch_data = []
for i in input:
twelvelabs_request = (
TwelveLabsMarengoEmbeddingConfig()._transform_request(
input=i,
inference_params=inference_params,
async_invoke_route=has_async_invoke,
model_id=modelId,
output_s3_uri=inference_params.get("output_s3_uri"),
)
)
batch_data.append(twelvelabs_request)
elif provider == "nova":
batch_data = []
for i in input:
nova_request = AmazonNovaEmbeddingConfig()._transform_request(
input=i,
inference_params=inference_params,
async_invoke_route=has_async_invoke,
model_id=modelId,
output_s3_uri=inference_params.get("output_s3_uri"),
)
batch_data.append(nova_request)
### SET RUNTIME ENDPOINT ###
endpoint_url, proxy_endpoint_url = self.get_runtime_endpoint(
api_base=api_base,
aws_bedrock_runtime_endpoint=optional_params.pop(
"aws_bedrock_runtime_endpoint", None
),
aws_region_name=aws_region_name,
)
if has_async_invoke:
endpoint_url = f"{endpoint_url}/async-invoke"
else:
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke"
if batch_data is not None:
if aembedding:
return self._async_single_func_embeddings( # type: ignore
client=(
client
if client is not None and isinstance(client, AsyncHTTPHandler)
else None
),
timeout=timeout,
batch_data=batch_data,
credentials=credentials,
extra_headers=extra_headers,
endpoint_url=endpoint_url,
aws_region_name=aws_region_name,
model=model,
logging_obj=logging_obj,
api_key=api_key,
provider=provider,
is_async_invoke=has_async_invoke,
)
returned_response = self._single_func_embeddings(
client=(
client
if client is not None and isinstance(client, HTTPHandler)
else None
),
timeout=timeout,
batch_data=batch_data,
credentials=credentials,
extra_headers=extra_headers,
endpoint_url=endpoint_url,
aws_region_name=aws_region_name,
model=model,
logging_obj=logging_obj,
api_key=api_key,
provider=provider,
is_async_invoke=has_async_invoke,
)
if returned_response is None:
raise Exception("Unable to map Bedrock request to provider")
return returned_response
elif data is None:
raise Exception("Unable to map Bedrock request to provider")
headers = {"Content-Type": "application/json"}
if extra_headers is not None:
headers = {"Content-Type": "application/json", **extra_headers}
prepped = self.get_request_headers( # type: ignore
credentials=credentials,
aws_region_name=aws_region_name,
extra_headers=extra_headers,
endpoint_url=endpoint_url,
data=json.dumps(data),
headers=headers,
api_key=api_key,
)
## ROUTING ##
# Convert CaseInsensitiveDict to regular dict for httpx compatibility
headers_for_request = (
dict(prepped.headers) if hasattr(prepped, "headers") else {}
)
return cohere_embedding(
model=model,
input=input,
model_response=model_response,
logging_obj=logging_obj,
optional_params=optional_params,
encoding=encoding,
data=data, # type: ignore
complete_api_base=prepped.url,
api_key=None,
aembedding=aembedding,
timeout=timeout,
client=client,
headers=headers_for_request,
)
async def _get_async_invoke_status(
self, invocation_arn: str, aws_region_name: str, logging_obj=None, **kwargs
) -> dict:
"""
Get the status of an async invoke job using the GetAsyncInvoke operation.
Args:
invocation_arn: The invocation ARN from the async invoke response
aws_region_name: AWS region name
**kwargs: Additional parameters (credentials, etc.)
Returns:
dict: Status response from AWS Bedrock
"""
# Get AWS credentials using the same method as other Bedrock methods
credentials, _ = self._load_credentials(kwargs)
# Get the runtime endpoint
endpoint_url, _ = self.get_runtime_endpoint(
api_base=None,
aws_bedrock_runtime_endpoint=kwargs.get("aws_bedrock_runtime_endpoint"),
aws_region_name=aws_region_name,
)
from urllib.parse import quote
# Encode the ARN for use in URL path
encoded_arn = quote(invocation_arn, safe="")
status_url = f"{endpoint_url.rstrip('/')}/async-invoke/{encoded_arn}"
# Prepare headers for GET request
headers = {"Content-Type": "application/json"}
# Use AWSRequest directly for GET requests (get_request_headers hardcodes POST)
try:
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest
except ImportError:
raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
# Create AWSRequest with GET method and encoded URL
request = AWSRequest(
method="GET",
url=status_url,
data=None, # GET request, no body
headers=headers,
)
# Sign the request - SigV4Auth will create canonical string from request URL
sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
sigv4.add_auth(request)
# Prepare the request
prepped = request.prepare()
# LOGGING
if logging_obj is not None:
# Create custom curl command for GET request
masked_headers = logging_obj._get_masked_headers(prepped.headers)
formatted_headers = " ".join(
[f"-H '{k}: {v}'" for k, v in masked_headers.items()]
)
custom_curl = "\n\nGET Request Sent from LiteLLM:\n"
custom_curl += "curl -X GET \\\n"
custom_curl += f"{prepped.url} \\\n"
custom_curl += f"{formatted_headers}\n"
logging_obj.pre_call(
input=invocation_arn,
api_key="",
additional_args={
"complete_input_dict": {"invocation_arn": invocation_arn},
"api_base": prepped.url,
"headers": prepped.headers,
"request_str": custom_curl, # Override with custom GET curl command
},
)
# Make the GET request
client = get_async_httpx_client(llm_provider=LlmProviders.BEDROCK)
response = await client.get(
url=prepped.url,
headers=prepped.headers,
)
# LOGGING
if logging_obj is not None:
logging_obj.post_call(
input=invocation_arn,
api_key="",
original_response=response,
additional_args={
"complete_input_dict": {"invocation_arn": invocation_arn}
},
)
# Parse response
if response.status_code == 200:
return response.json()
else:
raise Exception(
f"Failed to get async invoke status: {response.status_code} - {response.text}"
)

View File

@@ -0,0 +1,304 @@
"""
Transformation logic from OpenAI /v1/embeddings format to Bedrock TwelveLabs Marengo /invoke and /async-invoke format.
Why separate file? Make it easy to see how transformation works
Docs - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-marengo.html
"""
from typing import List, Optional, Union, cast
from litellm.types.llms.bedrock import (
TWELVELABS_EMBEDDING_INPUT_TYPES,
TwelveLabsAsyncInvokeRequest,
TwelveLabsMarengoEmbeddingRequest,
TwelveLabsOutputDataConfig,
TwelveLabsS3Location,
TwelveLabsS3OutputDataConfig,
)
from litellm.types.utils import Embedding, EmbeddingResponse, Usage
class TwelveLabsMarengoEmbeddingConfig:
"""
Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-marengo.html
Supports text, image, video, and audio inputs.
- InvokeModel: text and image inputs
- StartAsyncInvoke: video, audio, image, and text inputs
"""
def __init__(self) -> None:
pass
def get_supported_openai_params(self) -> List[str]:
return [
"encoding_format",
"textTruncate",
"embeddingOption",
"startSec",
"lengthSec",
"useFixedLengthSec",
"minClipSec",
"input_type",
]
def map_openai_params(
self, non_default_params: dict, optional_params: dict
) -> dict:
for k, v in non_default_params.items():
if k == "encoding_format":
# TwelveLabs doesn't have encoding_format, but we can map it to embeddingOption
if v == "float":
optional_params["embeddingOption"] = ["visual-text", "visual-image"]
elif k == "textTruncate":
optional_params["textTruncate"] = v
elif k == "embeddingOption":
optional_params["embeddingOption"] = v
elif k == "input_type":
# Map input_type to inputType for Bedrock
optional_params["inputType"] = v
elif k in ["startSec", "lengthSec", "useFixedLengthSec", "minClipSec"]:
optional_params[k] = v
return optional_params
def _extract_bucket_owner_from_params(self, inference_params: dict) -> str:
"""
Extract bucket owner from inference parameters.
"""
return inference_params.get("bucketOwner", "")
def _is_s3_url(self, input: str) -> bool:
"""Check if input is an S3 URL."""
return input.startswith("s3://")
def _transform_request(
self,
input: str,
inference_params: dict,
async_invoke_route: bool = False,
model_id: Optional[str] = None,
output_s3_uri: Optional[str] = None,
) -> Union[TwelveLabsMarengoEmbeddingRequest, TwelveLabsAsyncInvokeRequest]:
"""
Transform OpenAI-style input to TwelveLabs Marengo format/async-invoke format.
Supports:
- Text inputs (for both invoke and async-invoke)
- Image inputs (for both invoke and async-invoke)
- Video inputs (async-invoke only)
- Audio inputs (async-invoke only)
- S3 URLs for all media types (async-invoke only)
"""
# Get input_type or default to "text"
input_type = cast(
TWELVELABS_EMBEDDING_INPUT_TYPES,
inference_params.get("inputType")
or inference_params.get("input_type")
or "text",
)
# Validate that async-invoke is used for video/audio
if input_type in ["video", "audio"] and not async_invoke_route:
raise ValueError(
f"Input type '{input_type}' requires async_invoke route. "
f"Use model format: 'bedrock/async_invoke/model_id'"
)
transformed_request: TwelveLabsMarengoEmbeddingRequest = {
"inputType": input_type
}
if input_type == "text":
transformed_request["inputText"] = input
# Set default textTruncate if not specified
if "textTruncate" not in inference_params:
transformed_request["textTruncate"] = "end"
elif input_type in ["image", "video", "audio"]:
if self._is_s3_url(input):
# S3 URL input
s3_location: TwelveLabsS3Location = {"uri": input}
bucket_owner = self._extract_bucket_owner_from_params(inference_params)
if bucket_owner:
s3_location["bucketOwner"] = bucket_owner
transformed_request["mediaSource"] = {"s3Location": s3_location}
else:
# Base64 encoded input
if input.startswith("data:"):
# Extract base64 data from data URL
b64_str = input.split(",", 1)[1] if "," in input else input
else:
# Direct base64 string
from litellm.utils import get_base64_str
b64_str = get_base64_str(input)
transformed_request["mediaSource"] = {"base64String": b64_str}
# Apply any additional inference parameters
for k, v in inference_params.items():
if k not in [
"inputType",
"input_type", # Exclude both camelCase and snake_case
"inputText",
"mediaSource",
"bucketOwner", # Don't include bucketOwner in the request
]: # Don't override core fields
transformed_request[k] = v # type: ignore
# If async invoke route, wrap in the async invoke format
if async_invoke_route and model_id:
return self._wrap_async_invoke_request(
model_input=transformed_request,
model_id=model_id,
output_s3_uri=output_s3_uri,
)
return transformed_request
def _wrap_async_invoke_request(
self,
model_input: TwelveLabsMarengoEmbeddingRequest,
model_id: str,
output_s3_uri: Optional[str] = None,
) -> TwelveLabsAsyncInvokeRequest:
"""
Wrap the transformed request in the correct AWS Bedrock async invoke format.
Args:
model_input: The transformed TwelveLabs Marengo embedding request
model_id: The model identifier (without async_invoke prefix)
output_s3_uri: Optional S3 URI for output data config
Returns:
TwelveLabsAsyncInvokeRequest: The wrapped async invoke request
"""
import urllib.parse
# Clean the model ID
unquoted_model_id = urllib.parse.unquote(model_id)
if unquoted_model_id.startswith("async_invoke/"):
unquoted_model_id = unquoted_model_id.replace("async_invoke/", "")
# Validate that the S3 URI is not empty
if not output_s3_uri or output_s3_uri.strip() == "":
raise ValueError("output_s3_uri cannot be empty for async invoke requests")
return TwelveLabsAsyncInvokeRequest(
modelId=unquoted_model_id,
modelInput=model_input,
outputDataConfig=TwelveLabsOutputDataConfig(
s3OutputDataConfig=TwelveLabsS3OutputDataConfig(s3Uri=output_s3_uri)
),
)
def _transform_response(
self, response_list: List[dict], model: str
) -> EmbeddingResponse:
"""
Transform TwelveLabs response to OpenAI format.
Handles the actual TwelveLabs response format: {"data": [{"embedding": [...]}]}
"""
embeddings: List[Embedding] = []
total_tokens = 0
for response in response_list:
# TwelveLabs response format has a "data" field containing the embeddings
if "data" in response and isinstance(response["data"], list):
for item in response["data"]:
if "embedding" in item:
# Single embedding response
embedding = Embedding(
embedding=item["embedding"],
index=len(embeddings),
object="embedding",
)
embeddings.append(embedding)
# Estimate token count (rough approximation)
if "inputTextTokenCount" in item:
total_tokens += item["inputTextTokenCount"]
else:
# Rough estimate: 1 token per 4 characters for text, or use embedding size
total_tokens += len(item["embedding"]) // 4
elif "embedding" in response:
# Direct embedding response (fallback for other formats)
embedding = Embedding(
embedding=response["embedding"],
index=len(embeddings),
object="embedding",
)
embeddings.append(embedding)
# Estimate token count (rough approximation)
if "inputTextTokenCount" in response:
total_tokens += response["inputTextTokenCount"]
else:
# Rough estimate: 1 token per 4 characters for text
total_tokens += len(response.get("inputText", "")) // 4
elif "embeddings" in response:
# Multiple embeddings response (from video/audio)
for i, emb in enumerate(response["embeddings"]):
embedding = Embedding(
embedding=emb["embedding"],
index=len(embeddings),
object="embedding",
)
embeddings.append(embedding)
total_tokens += len(emb["embedding"]) // 4 # Rough estimate
usage = Usage(prompt_tokens=total_tokens, total_tokens=total_tokens)
return EmbeddingResponse(data=embeddings, model=model, usage=usage)
def _transform_async_invoke_response(
self, response: dict, model: str
) -> EmbeddingResponse:
"""
Transform async invoke response (invocation ARN) to OpenAI format.
AWS async invoke returns:
{
"invocationArn": "arn:aws:bedrock:us-east-1:123456789012:async-invoke/abc123"
}
We transform this to a job-like embedding response:
{
"object": "list",
"data": [
{
"object": "embedding_job_id:1234567890",
"embedding": [],
"index": 0
}
],
"model": "model",
"usage": {}
}
"""
invocation_arn = response.get("invocationArn", "")
# Create a placeholder embedding object for the job
embedding = Embedding(
embedding=[], # Empty embedding for async jobs
index=0,
object="embedding",
)
# Create usage object (empty for async jobs)
usage = Usage(prompt_tokens=0, total_tokens=0)
# Create hidden params with job ID
from litellm.types.llms.base import HiddenParams
hidden_params = HiddenParams()
setattr(hidden_params, "_invocation_arn", invocation_arn)
return EmbeddingResponse(
data=[embedding],
model=model,
usage=usage,
hidden_params=hidden_params,
)