chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
"""
|
||||
Vertex AI BGE (BAAI General Embedding) Configuration
|
||||
|
||||
BGE models deployed on Vertex AI require different input/output format:
|
||||
- Request: Use "prompt" instead of "content" as the input field
|
||||
- Response: Embeddings are returned directly as arrays, not wrapped in objects
|
||||
|
||||
Model name handling:
|
||||
- Model names like "bge/endpoint_id" are automatically transformed in common_utils._get_vertex_url()
|
||||
- This module focuses on request/response transformation only
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from litellm.types.utils import EmbeddingResponse, Usage
|
||||
|
||||
from .types import (
|
||||
EmbeddingParameters,
|
||||
TaskType,
|
||||
TextEmbeddingBGEInput,
|
||||
VertexEmbeddingRequest,
|
||||
)
|
||||
|
||||
|
||||
class VertexBGEConfig:
|
||||
"""
|
||||
Configuration and transformation logic for BGE models on Vertex AI.
|
||||
|
||||
BGE (BAAI General Embedding) models use a different request format
|
||||
where the input field is named "prompt" instead of "content".
|
||||
|
||||
Supported model patterns (after provider split in main.py):
|
||||
- "bge-small-en-v1.5" (model name)
|
||||
- "bge/204379420394258432" (endpoint ID pattern)
|
||||
|
||||
Note: Model name transformation (bge/ -> numeric ID) is handled automatically
|
||||
in common_utils._get_vertex_url(). This class focuses on request/response format only.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def is_bge_model(model: str) -> bool:
|
||||
"""
|
||||
Check if the model is a BGE (BAAI General Embedding) model.
|
||||
|
||||
After provider split in main.py, supports:
|
||||
- "bge-small-en-v1.5" (model name)
|
||||
- "bge/204379420394258432" (endpoint ID pattern)
|
||||
|
||||
Args:
|
||||
model: The model name after provider split
|
||||
|
||||
Returns:
|
||||
bool: True if the model is a BGE model
|
||||
"""
|
||||
model_lower = model.lower()
|
||||
# Check for "bge/" prefix (endpoint pattern) or "bge" in model name
|
||||
return model_lower.startswith("bge/") or "bge" in model_lower
|
||||
|
||||
@staticmethod
|
||||
def transform_request(
|
||||
input: Union[list, str], optional_params: dict, model: str
|
||||
) -> VertexEmbeddingRequest:
|
||||
"""
|
||||
Transforms an OpenAI request to a Vertex BGE embedding request.
|
||||
|
||||
BGE models use "prompt" instead of "content" as the input field.
|
||||
|
||||
Args:
|
||||
input: The input text(s) to embed
|
||||
optional_params: Optional parameters for the request
|
||||
model: The model name
|
||||
|
||||
Returns:
|
||||
VertexEmbeddingRequest: The transformed request
|
||||
"""
|
||||
vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
|
||||
vertex_text_embedding_input_list: List[TextEmbeddingBGEInput] = []
|
||||
task_type: Optional[TaskType] = optional_params.get("task_type")
|
||||
title = optional_params.get("title")
|
||||
|
||||
if isinstance(input, str):
|
||||
input = [input]
|
||||
|
||||
for text in input:
|
||||
embedding_input = VertexBGEConfig._create_embedding_input(
|
||||
prompt=text, task_type=task_type, title=title
|
||||
)
|
||||
vertex_text_embedding_input_list.append(embedding_input)
|
||||
|
||||
vertex_request["instances"] = vertex_text_embedding_input_list
|
||||
vertex_request["parameters"] = EmbeddingParameters(**optional_params)
|
||||
|
||||
return vertex_request
|
||||
|
||||
@staticmethod
|
||||
def _create_embedding_input(
|
||||
prompt: str,
|
||||
task_type: Optional[TaskType] = None,
|
||||
title: Optional[str] = None,
|
||||
) -> TextEmbeddingBGEInput:
|
||||
"""
|
||||
Creates a TextEmbeddingBGEInput object for BGE models.
|
||||
|
||||
BGE models use "prompt" instead of "content" as the input field.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to be embedded
|
||||
task_type: The type of task to be performed
|
||||
title: The title of the document to be embedded
|
||||
|
||||
Returns:
|
||||
TextEmbeddingBGEInput: A TextEmbeddingBGEInput object
|
||||
"""
|
||||
text_embedding_input = TextEmbeddingBGEInput(prompt=prompt)
|
||||
if task_type is not None:
|
||||
text_embedding_input["task_type"] = task_type
|
||||
if title is not None:
|
||||
text_embedding_input["title"] = title
|
||||
return text_embedding_input
|
||||
|
||||
@staticmethod
|
||||
def transform_response(
|
||||
response: dict, model: str, model_response: EmbeddingResponse
|
||||
) -> EmbeddingResponse:
|
||||
"""
|
||||
Transforms a Vertex BGE embedding response to OpenAI format.
|
||||
|
||||
BGE models return embeddings directly as arrays in predictions:
|
||||
{
|
||||
"predictions": [
|
||||
[0.002, 0.021, ...],
|
||||
[0.003, 0.022, ...]
|
||||
]
|
||||
}
|
||||
|
||||
Args:
|
||||
response: The raw response from Vertex AI
|
||||
model: The model name
|
||||
model_response: The EmbeddingResponse object to populate
|
||||
|
||||
Returns:
|
||||
EmbeddingResponse: The transformed response in OpenAI format
|
||||
|
||||
Raises:
|
||||
KeyError: If response doesn't contain 'predictions'
|
||||
ValueError: If predictions is not a list or contains invalid data
|
||||
"""
|
||||
if "predictions" not in response:
|
||||
raise KeyError("Response missing 'predictions' field")
|
||||
|
||||
_predictions = response["predictions"]
|
||||
|
||||
if not isinstance(_predictions, list):
|
||||
raise ValueError(
|
||||
f"Expected 'predictions' to be a list, got {type(_predictions)}"
|
||||
)
|
||||
|
||||
embedding_response = []
|
||||
# BGE models don't return token counts, so we estimate or set to 0
|
||||
input_tokens = 0
|
||||
|
||||
for idx, embedding_values in enumerate(_predictions):
|
||||
if not isinstance(embedding_values, list):
|
||||
raise ValueError(
|
||||
f"Expected embedding at index {idx} to be a list, got {type(embedding_values)}"
|
||||
)
|
||||
|
||||
embedding_response.append(
|
||||
{
|
||||
"object": "embedding",
|
||||
"index": idx,
|
||||
"embedding": embedding_values,
|
||||
}
|
||||
)
|
||||
|
||||
model_response.object = "list"
|
||||
model_response.data = embedding_response
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
setattr(model_response, "usage", usage)
|
||||
return model_response
|
||||
@@ -0,0 +1,232 @@
|
||||
from typing import Literal, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObject
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
_get_httpx_client,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.llms.vertex_ai.vertex_ai_non_gemini import VertexAIError
|
||||
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
|
||||
from litellm.types.llms.vertex_ai import *
|
||||
from litellm.types.utils import EmbeddingResponse
|
||||
|
||||
from .types import *
|
||||
|
||||
|
||||
class VertexEmbedding(VertexBase):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def embedding(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[list, str],
|
||||
print_verbose,
|
||||
model_response: EmbeddingResponse,
|
||||
optional_params: dict,
|
||||
logging_obj: LiteLLMLoggingObject,
|
||||
custom_llm_provider: Literal[
|
||||
"vertex_ai", "vertex_ai_beta", "gemini"
|
||||
], # if it's vertex_ai or gemini (google ai studio)
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
api_key: Optional[str] = None,
|
||||
encoding=None,
|
||||
aembedding: Optional[bool] = False,
|
||||
api_base: Optional[str] = None,
|
||||
client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
|
||||
vertex_project: Optional[str] = None,
|
||||
vertex_location: Optional[str] = None,
|
||||
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
|
||||
gemini_api_key: Optional[str] = None,
|
||||
extra_headers: Optional[dict] = None,
|
||||
) -> EmbeddingResponse:
|
||||
if aembedding is True:
|
||||
return self.async_embedding( # type: ignore
|
||||
model=model,
|
||||
input=input,
|
||||
logging_obj=logging_obj,
|
||||
model_response=model_response,
|
||||
optional_params=optional_params,
|
||||
encoding=encoding,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
timeout=timeout,
|
||||
api_base=api_base,
|
||||
vertex_project=vertex_project,
|
||||
vertex_location=vertex_location,
|
||||
vertex_credentials=vertex_credentials,
|
||||
gemini_api_key=gemini_api_key,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
should_use_v1beta1_features = self.is_using_v1beta1_features(
|
||||
optional_params=optional_params
|
||||
)
|
||||
|
||||
_auth_header, vertex_project = self._ensure_access_token(
|
||||
credentials=vertex_credentials,
|
||||
project_id=vertex_project,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
# Extract use_psc_endpoint_format from optional_params
|
||||
use_psc_endpoint_format = optional_params.get("use_psc_endpoint_format", False)
|
||||
|
||||
auth_header, api_base = self._get_token_and_url(
|
||||
model=model,
|
||||
gemini_api_key=gemini_api_key,
|
||||
auth_header=_auth_header,
|
||||
vertex_project=vertex_project,
|
||||
vertex_location=vertex_location,
|
||||
vertex_credentials=vertex_credentials,
|
||||
stream=False,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_base=api_base,
|
||||
should_use_v1beta1_features=should_use_v1beta1_features,
|
||||
mode="embedding",
|
||||
use_psc_endpoint_format=use_psc_endpoint_format,
|
||||
)
|
||||
headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
|
||||
vertex_request: VertexEmbeddingRequest = litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request(
|
||||
input=input, optional_params=optional_params, model=model
|
||||
)
|
||||
|
||||
_client_params = {}
|
||||
if timeout:
|
||||
_client_params["timeout"] = timeout
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
client = _get_httpx_client(params=_client_params)
|
||||
else:
|
||||
client = client # type: ignore
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=vertex_request,
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": vertex_request,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = client.post(url=api_base, headers=headers, json=vertex_request) # type: ignore
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as err:
|
||||
error_code = err.response.status_code
|
||||
raise VertexAIError(status_code=error_code, message=err.response.text)
|
||||
except httpx.TimeoutException:
|
||||
raise VertexAIError(status_code=408, message="Timeout error occurred.")
|
||||
|
||||
_json_response = response.json()
|
||||
## LOGGING POST-CALL
|
||||
logging_obj.post_call(
|
||||
input=input, api_key=None, original_response=_json_response
|
||||
)
|
||||
|
||||
model_response = (
|
||||
litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
|
||||
response=_json_response, model=model, model_response=model_response
|
||||
)
|
||||
)
|
||||
|
||||
return model_response
|
||||
|
||||
async def async_embedding(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[list, str],
|
||||
model_response: EmbeddingResponse,
|
||||
logging_obj: LiteLLMLoggingObject,
|
||||
optional_params: dict,
|
||||
custom_llm_provider: Literal[
|
||||
"vertex_ai", "vertex_ai_beta", "gemini"
|
||||
], # if it's vertex_ai or gemini (google ai studio)
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
api_base: Optional[str] = None,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
vertex_project: Optional[str] = None,
|
||||
vertex_location: Optional[str] = None,
|
||||
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
|
||||
gemini_api_key: Optional[str] = None,
|
||||
extra_headers: Optional[dict] = None,
|
||||
encoding=None,
|
||||
) -> EmbeddingResponse:
|
||||
"""
|
||||
Async embedding implementation
|
||||
"""
|
||||
should_use_v1beta1_features = self.is_using_v1beta1_features(
|
||||
optional_params=optional_params
|
||||
)
|
||||
_auth_header, vertex_project = await self._ensure_access_token_async(
|
||||
credentials=vertex_credentials,
|
||||
project_id=vertex_project,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
# Extract use_psc_endpoint_format from optional_params
|
||||
use_psc_endpoint_format = optional_params.get("use_psc_endpoint_format", False)
|
||||
|
||||
auth_header, api_base = self._get_token_and_url(
|
||||
model=model,
|
||||
gemini_api_key=gemini_api_key,
|
||||
auth_header=_auth_header,
|
||||
vertex_project=vertex_project,
|
||||
vertex_location=vertex_location,
|
||||
vertex_credentials=vertex_credentials,
|
||||
stream=False,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_base=api_base,
|
||||
should_use_v1beta1_features=should_use_v1beta1_features,
|
||||
mode="embedding",
|
||||
use_psc_endpoint_format=use_psc_endpoint_format,
|
||||
)
|
||||
headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
|
||||
vertex_request: VertexEmbeddingRequest = litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request(
|
||||
input=input, optional_params=optional_params, model=model
|
||||
)
|
||||
|
||||
_async_client_params = {}
|
||||
if timeout:
|
||||
_async_client_params["timeout"] = timeout
|
||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||
client = get_async_httpx_client(
|
||||
params=_async_client_params, llm_provider=litellm.LlmProviders.VERTEX_AI
|
||||
)
|
||||
else:
|
||||
client = client # type: ignore
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=vertex_request,
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": vertex_request,
|
||||
"api_base": api_base,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.post(api_base, headers=headers, json=vertex_request) # type: ignore
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as err:
|
||||
error_code = err.response.status_code
|
||||
raise VertexAIError(status_code=error_code, message=err.response.text)
|
||||
except httpx.TimeoutException:
|
||||
raise VertexAIError(status_code=408, message="Timeout error occurred.")
|
||||
|
||||
_json_response = response.json()
|
||||
## LOGGING POST-CALL
|
||||
logging_obj.post_call(
|
||||
input=input, api_key=None, original_response=_json_response
|
||||
)
|
||||
|
||||
model_response = (
|
||||
litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
|
||||
response=_json_response, model=model, model_response=model_response
|
||||
)
|
||||
)
|
||||
|
||||
return model_response
|
||||
@@ -0,0 +1,285 @@
|
||||
import types
|
||||
from typing import List, Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from litellm.types.utils import EmbeddingResponse, Usage
|
||||
|
||||
from .types import *
|
||||
|
||||
|
||||
class VertexAITextEmbeddingConfig(BaseModel):
|
||||
"""
|
||||
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
|
||||
|
||||
Args:
|
||||
auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
|
||||
task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
|
||||
title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
|
||||
"""
|
||||
|
||||
auto_truncate: Optional[bool] = None
|
||||
task_type: Optional[
|
||||
Literal[
|
||||
"RETRIEVAL_QUERY",
|
||||
"RETRIEVAL_DOCUMENT",
|
||||
"SEMANTIC_SIMILARITY",
|
||||
"CLASSIFICATION",
|
||||
"CLUSTERING",
|
||||
"QUESTION_ANSWERING",
|
||||
"FACT_VERIFICATION",
|
||||
]
|
||||
] = None
|
||||
title: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
auto_truncate: Optional[bool] = None,
|
||||
task_type: Optional[
|
||||
Literal[
|
||||
"RETRIEVAL_QUERY",
|
||||
"RETRIEVAL_DOCUMENT",
|
||||
"SEMANTIC_SIMILARITY",
|
||||
"CLASSIFICATION",
|
||||
"CLUSTERING",
|
||||
"QUESTION_ANSWERING",
|
||||
"FACT_VERIFICATION",
|
||||
]
|
||||
] = None,
|
||||
title: Optional[str] = None,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return ["dimensions"]
|
||||
|
||||
def map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, kwargs: dict
|
||||
):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "dimensions":
|
||||
optional_params["outputDimensionality"] = value
|
||||
|
||||
if "input_type" in kwargs:
|
||||
optional_params["task_type"] = kwargs.pop("input_type")
|
||||
return optional_params, kwargs
|
||||
|
||||
def get_mapped_special_auth_params(self) -> dict:
|
||||
"""
|
||||
Common auth params across bedrock/vertex_ai/azure/watsonx
|
||||
"""
|
||||
return {"project": "vertex_project", "region_name": "vertex_location"}
|
||||
|
||||
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
|
||||
mapped_params = self.get_mapped_special_auth_params()
|
||||
|
||||
for param, value in non_default_params.items():
|
||||
if param in mapped_params:
|
||||
optional_params[mapped_params[param]] = value
|
||||
return optional_params
|
||||
|
||||
def transform_openai_request_to_vertex_embedding_request(
|
||||
self, input: Union[list, str], optional_params: dict, model: str
|
||||
) -> VertexEmbeddingRequest:
|
||||
"""
|
||||
Transforms an openai request to a vertex embedding request.
|
||||
"""
|
||||
# Import here to avoid circular import issues with litellm.__init__
|
||||
from litellm.llms.vertex_ai.vertex_embeddings.bge import VertexBGEConfig
|
||||
|
||||
if model.isdigit():
|
||||
return self._transform_openai_request_to_fine_tuned_embedding_request(
|
||||
input, optional_params, model
|
||||
)
|
||||
if VertexBGEConfig.is_bge_model(model):
|
||||
return VertexBGEConfig.transform_request(
|
||||
input=input, optional_params=optional_params, model=model
|
||||
)
|
||||
|
||||
vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
|
||||
vertex_text_embedding_input_list: List[TextEmbeddingInput] = []
|
||||
task_type: Optional[TaskType] = optional_params.get("task_type")
|
||||
title = optional_params.get("title")
|
||||
|
||||
if isinstance(input, str):
|
||||
input = [input] # Convert single string to list for uniform processing
|
||||
|
||||
for text in input:
|
||||
embedding_input = self.create_embedding_input(
|
||||
content=text, task_type=task_type, title=title
|
||||
)
|
||||
vertex_text_embedding_input_list.append(embedding_input)
|
||||
|
||||
vertex_request["instances"] = vertex_text_embedding_input_list
|
||||
vertex_request["parameters"] = EmbeddingParameters(**optional_params)
|
||||
|
||||
return vertex_request
|
||||
|
||||
def _transform_openai_request_to_fine_tuned_embedding_request(
|
||||
self, input: Union[list, str], optional_params: dict, model: str
|
||||
) -> VertexEmbeddingRequest:
|
||||
"""
|
||||
Transforms an openai request to a vertex fine-tuned embedding request.
|
||||
|
||||
Vertex Doc: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22))
|
||||
Sample Request:
|
||||
|
||||
```json
|
||||
{
|
||||
"instances" : [
|
||||
{
|
||||
"inputs": "How would the Future of AI in 10 Years look?",
|
||||
"parameters": {
|
||||
"max_new_tokens": 128,
|
||||
"temperature": 1.0,
|
||||
"top_p": 0.9,
|
||||
"top_k": 10
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
"""
|
||||
vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
|
||||
vertex_text_embedding_input_list: List[TextEmbeddingFineTunedInput] = []
|
||||
if isinstance(input, str):
|
||||
input = [input] # Convert single string to list for uniform processing
|
||||
|
||||
for text in input:
|
||||
embedding_input = TextEmbeddingFineTunedInput(inputs=text)
|
||||
vertex_text_embedding_input_list.append(embedding_input)
|
||||
|
||||
vertex_request["instances"] = vertex_text_embedding_input_list
|
||||
vertex_request["parameters"] = TextEmbeddingFineTunedParameters(
|
||||
**optional_params
|
||||
)
|
||||
# Remove 'shared_session' from parameters if present
|
||||
if (
|
||||
vertex_request["parameters"] is not None
|
||||
and "shared_session" in vertex_request["parameters"]
|
||||
):
|
||||
del vertex_request["parameters"]["shared_session"] # type: ignore[typeddict-item]
|
||||
|
||||
return vertex_request
|
||||
|
||||
def create_embedding_input(
|
||||
self,
|
||||
content: str,
|
||||
task_type: Optional[TaskType] = None,
|
||||
title: Optional[str] = None,
|
||||
) -> TextEmbeddingInput:
|
||||
"""
|
||||
Creates a TextEmbeddingInput object.
|
||||
|
||||
Vertex requires a List of TextEmbeddingInput objects. This helper function creates a single TextEmbeddingInput object.
|
||||
|
||||
Args:
|
||||
content (str): The content to be embedded.
|
||||
task_type (Optional[TaskType]): The type of task to be performed.
|
||||
title (Optional[str]): The title of the document to be embedded.
|
||||
|
||||
Returns:
|
||||
TextEmbeddingInput: A TextEmbeddingInput object.
|
||||
"""
|
||||
text_embedding_input = TextEmbeddingInput(content=content)
|
||||
if task_type is not None:
|
||||
text_embedding_input["task_type"] = task_type
|
||||
if title is not None:
|
||||
text_embedding_input["title"] = title
|
||||
return text_embedding_input
|
||||
|
||||
def transform_vertex_response_to_openai(
|
||||
self, response: dict, model: str, model_response: EmbeddingResponse
|
||||
) -> EmbeddingResponse:
|
||||
"""
|
||||
Transforms a vertex embedding response to an openai response.
|
||||
"""
|
||||
if model.isdigit():
|
||||
return self._transform_vertex_response_to_openai_for_fine_tuned_models(
|
||||
response, model, model_response
|
||||
)
|
||||
|
||||
# Import here to avoid circular import issues with litellm.__init__
|
||||
from litellm.llms.vertex_ai.vertex_embeddings.bge import VertexBGEConfig
|
||||
|
||||
if VertexBGEConfig.is_bge_model(model):
|
||||
return VertexBGEConfig.transform_response(
|
||||
response=response, model=model, model_response=model_response
|
||||
)
|
||||
|
||||
_predictions = response["predictions"]
|
||||
|
||||
embedding_response = []
|
||||
input_tokens: int = 0
|
||||
for idx, element in enumerate(_predictions):
|
||||
embedding = element["embeddings"]
|
||||
embedding_response.append(
|
||||
{
|
||||
"object": "embedding",
|
||||
"index": idx,
|
||||
"embedding": embedding["values"],
|
||||
}
|
||||
)
|
||||
input_tokens += embedding["statistics"]["token_count"]
|
||||
|
||||
model_response.object = "list"
|
||||
model_response.data = embedding_response
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
setattr(model_response, "usage", usage)
|
||||
return model_response
|
||||
|
||||
def _transform_vertex_response_to_openai_for_fine_tuned_models(
|
||||
self, response: dict, model: str, model_response: EmbeddingResponse
|
||||
) -> EmbeddingResponse:
|
||||
"""
|
||||
Transforms a vertex fine-tuned model embedding response to an openai response format.
|
||||
"""
|
||||
_predictions = response["predictions"]
|
||||
|
||||
embedding_response = []
|
||||
# For fine-tuned models, we don't get token counts in the response
|
||||
input_tokens = 0
|
||||
|
||||
for idx, embedding_values in enumerate(_predictions):
|
||||
embedding_response.append(
|
||||
{
|
||||
"object": "embedding",
|
||||
"index": idx,
|
||||
"embedding": embedding_values[
|
||||
0
|
||||
], # The embedding values are nested one level deeper
|
||||
}
|
||||
)
|
||||
|
||||
model_response.object = "list"
|
||||
model_response.data = embedding_response
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
setattr(model_response, "usage", usage)
|
||||
return model_response
|
||||
@@ -0,0 +1,74 @@
|
||||
"""
|
||||
Types for Vertex Embeddings Requests
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
||||
class TaskType(str, Enum):
|
||||
RETRIEVAL_QUERY = "RETRIEVAL_QUERY"
|
||||
RETRIEVAL_DOCUMENT = "RETRIEVAL_DOCUMENT"
|
||||
SEMANTIC_SIMILARITY = "SEMANTIC_SIMILARITY"
|
||||
CLASSIFICATION = "CLASSIFICATION"
|
||||
CLUSTERING = "CLUSTERING"
|
||||
QUESTION_ANSWERING = "QUESTION_ANSWERING"
|
||||
FACT_VERIFICATION = "FACT_VERIFICATION"
|
||||
CODE_RETRIEVAL_QUERY = "CODE_RETRIEVAL_QUERY"
|
||||
|
||||
|
||||
class TextEmbeddingInput(TypedDict, total=False):
|
||||
content: str
|
||||
task_type: Optional[TaskType]
|
||||
title: Optional[str]
|
||||
|
||||
|
||||
class TextEmbeddingBGEInput(TypedDict, total=False):
|
||||
prompt: str
|
||||
task_type: Optional[TaskType]
|
||||
title: Optional[str]
|
||||
|
||||
|
||||
# Fine-tuned models require a different input format
|
||||
# Ref: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22))
|
||||
class TextEmbeddingFineTunedInput(TypedDict, total=False):
|
||||
inputs: str
|
||||
|
||||
|
||||
class TextEmbeddingFineTunedParameters(TypedDict, total=False):
|
||||
max_new_tokens: Optional[int]
|
||||
temperature: Optional[float]
|
||||
top_p: Optional[float]
|
||||
top_k: Optional[int]
|
||||
|
||||
|
||||
class EmbeddingParameters(TypedDict, total=False):
|
||||
auto_truncate: Optional[bool]
|
||||
output_dimensionality: Optional[int]
|
||||
|
||||
|
||||
class VertexEmbeddingRequest(TypedDict, total=False):
|
||||
instances: Union[
|
||||
List[TextEmbeddingInput],
|
||||
List[TextEmbeddingBGEInput],
|
||||
List[TextEmbeddingFineTunedInput],
|
||||
]
|
||||
parameters: Optional[Union[EmbeddingParameters, TextEmbeddingFineTunedParameters]]
|
||||
|
||||
|
||||
# Example usage:
|
||||
# example_request: VertexEmbeddingRequest = {
|
||||
# "instances": [
|
||||
# {
|
||||
# "content": "I would like embeddings for this text!",
|
||||
# "task_type": "RETRIEVAL_DOCUMENT",
|
||||
# "title": "document title"
|
||||
# }
|
||||
# ],
|
||||
# "parameters": {
|
||||
# "auto_truncate": True,
|
||||
# "output_dimensionality": None
|
||||
# }
|
||||
# }
|
||||
Reference in New Issue
Block a user