chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,183 @@
"""
Vertex AI BGE (BAAI General Embedding) Configuration
BGE models deployed on Vertex AI require different input/output format:
- Request: Use "prompt" instead of "content" as the input field
- Response: Embeddings are returned directly as arrays, not wrapped in objects
Model name handling:
- Model names like "bge/endpoint_id" are automatically transformed in common_utils._get_vertex_url()
- This module focuses on request/response transformation only
"""
from typing import List, Optional, Union
from litellm.types.utils import EmbeddingResponse, Usage
from .types import (
EmbeddingParameters,
TaskType,
TextEmbeddingBGEInput,
VertexEmbeddingRequest,
)
class VertexBGEConfig:
"""
Configuration and transformation logic for BGE models on Vertex AI.
BGE (BAAI General Embedding) models use a different request format
where the input field is named "prompt" instead of "content".
Supported model patterns (after provider split in main.py):
- "bge-small-en-v1.5" (model name)
- "bge/204379420394258432" (endpoint ID pattern)
Note: Model name transformation (bge/ -> numeric ID) is handled automatically
in common_utils._get_vertex_url(). This class focuses on request/response format only.
"""
@staticmethod
def is_bge_model(model: str) -> bool:
"""
Check if the model is a BGE (BAAI General Embedding) model.
After provider split in main.py, supports:
- "bge-small-en-v1.5" (model name)
- "bge/204379420394258432" (endpoint ID pattern)
Args:
model: The model name after provider split
Returns:
bool: True if the model is a BGE model
"""
model_lower = model.lower()
# Check for "bge/" prefix (endpoint pattern) or "bge" in model name
return model_lower.startswith("bge/") or "bge" in model_lower
@staticmethod
def transform_request(
input: Union[list, str], optional_params: dict, model: str
) -> VertexEmbeddingRequest:
"""
Transforms an OpenAI request to a Vertex BGE embedding request.
BGE models use "prompt" instead of "content" as the input field.
Args:
input: The input text(s) to embed
optional_params: Optional parameters for the request
model: The model name
Returns:
VertexEmbeddingRequest: The transformed request
"""
vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
vertex_text_embedding_input_list: List[TextEmbeddingBGEInput] = []
task_type: Optional[TaskType] = optional_params.get("task_type")
title = optional_params.get("title")
if isinstance(input, str):
input = [input]
for text in input:
embedding_input = VertexBGEConfig._create_embedding_input(
prompt=text, task_type=task_type, title=title
)
vertex_text_embedding_input_list.append(embedding_input)
vertex_request["instances"] = vertex_text_embedding_input_list
vertex_request["parameters"] = EmbeddingParameters(**optional_params)
return vertex_request
@staticmethod
def _create_embedding_input(
prompt: str,
task_type: Optional[TaskType] = None,
title: Optional[str] = None,
) -> TextEmbeddingBGEInput:
"""
Creates a TextEmbeddingBGEInput object for BGE models.
BGE models use "prompt" instead of "content" as the input field.
Args:
prompt: The prompt to be embedded
task_type: The type of task to be performed
title: The title of the document to be embedded
Returns:
TextEmbeddingBGEInput: A TextEmbeddingBGEInput object
"""
text_embedding_input = TextEmbeddingBGEInput(prompt=prompt)
if task_type is not None:
text_embedding_input["task_type"] = task_type
if title is not None:
text_embedding_input["title"] = title
return text_embedding_input
@staticmethod
def transform_response(
response: dict, model: str, model_response: EmbeddingResponse
) -> EmbeddingResponse:
"""
Transforms a Vertex BGE embedding response to OpenAI format.
BGE models return embeddings directly as arrays in predictions:
{
"predictions": [
[0.002, 0.021, ...],
[0.003, 0.022, ...]
]
}
Args:
response: The raw response from Vertex AI
model: The model name
model_response: The EmbeddingResponse object to populate
Returns:
EmbeddingResponse: The transformed response in OpenAI format
Raises:
KeyError: If response doesn't contain 'predictions'
ValueError: If predictions is not a list or contains invalid data
"""
if "predictions" not in response:
raise KeyError("Response missing 'predictions' field")
_predictions = response["predictions"]
if not isinstance(_predictions, list):
raise ValueError(
f"Expected 'predictions' to be a list, got {type(_predictions)}"
)
embedding_response = []
# BGE models don't return token counts, so we estimate or set to 0
input_tokens = 0
for idx, embedding_values in enumerate(_predictions):
if not isinstance(embedding_values, list):
raise ValueError(
f"Expected embedding at index {idx} to be a list, got {type(embedding_values)}"
)
embedding_response.append(
{
"object": "embedding",
"index": idx,
"embedding": embedding_values,
}
)
model_response.object = "list"
model_response.data = embedding_response
model_response.model = model
usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
)
setattr(model_response, "usage", usage)
return model_response

View File

@@ -0,0 +1,232 @@
from typing import Literal, Optional, Union
import httpx
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObject
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
_get_httpx_client,
get_async_httpx_client,
)
from litellm.llms.vertex_ai.vertex_ai_non_gemini import VertexAIError
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
from litellm.types.llms.vertex_ai import *
from litellm.types.utils import EmbeddingResponse
from .types import *
class VertexEmbedding(VertexBase):
def __init__(self) -> None:
super().__init__()
def embedding(
self,
model: str,
input: Union[list, str],
print_verbose,
model_response: EmbeddingResponse,
optional_params: dict,
logging_obj: LiteLLMLoggingObject,
custom_llm_provider: Literal[
"vertex_ai", "vertex_ai_beta", "gemini"
], # if it's vertex_ai or gemini (google ai studio)
timeout: Optional[Union[float, httpx.Timeout]],
api_key: Optional[str] = None,
encoding=None,
aembedding: Optional[bool] = False,
api_base: Optional[str] = None,
client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
gemini_api_key: Optional[str] = None,
extra_headers: Optional[dict] = None,
) -> EmbeddingResponse:
if aembedding is True:
return self.async_embedding( # type: ignore
model=model,
input=input,
logging_obj=logging_obj,
model_response=model_response,
optional_params=optional_params,
encoding=encoding,
custom_llm_provider=custom_llm_provider,
timeout=timeout,
api_base=api_base,
vertex_project=vertex_project,
vertex_location=vertex_location,
vertex_credentials=vertex_credentials,
gemini_api_key=gemini_api_key,
extra_headers=extra_headers,
)
should_use_v1beta1_features = self.is_using_v1beta1_features(
optional_params=optional_params
)
_auth_header, vertex_project = self._ensure_access_token(
credentials=vertex_credentials,
project_id=vertex_project,
custom_llm_provider=custom_llm_provider,
)
# Extract use_psc_endpoint_format from optional_params
use_psc_endpoint_format = optional_params.get("use_psc_endpoint_format", False)
auth_header, api_base = self._get_token_and_url(
model=model,
gemini_api_key=gemini_api_key,
auth_header=_auth_header,
vertex_project=vertex_project,
vertex_location=vertex_location,
vertex_credentials=vertex_credentials,
stream=False,
custom_llm_provider=custom_llm_provider,
api_base=api_base,
should_use_v1beta1_features=should_use_v1beta1_features,
mode="embedding",
use_psc_endpoint_format=use_psc_endpoint_format,
)
headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
vertex_request: VertexEmbeddingRequest = litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request(
input=input, optional_params=optional_params, model=model
)
_client_params = {}
if timeout:
_client_params["timeout"] = timeout
if client is None or not isinstance(client, HTTPHandler):
client = _get_httpx_client(params=_client_params)
else:
client = client # type: ignore
## LOGGING
logging_obj.pre_call(
input=vertex_request,
api_key="",
additional_args={
"complete_input_dict": vertex_request,
"api_base": api_base,
"headers": headers,
},
)
try:
response = client.post(url=api_base, headers=headers, json=vertex_request) # type: ignore
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
raise VertexAIError(status_code=error_code, message=err.response.text)
except httpx.TimeoutException:
raise VertexAIError(status_code=408, message="Timeout error occurred.")
_json_response = response.json()
## LOGGING POST-CALL
logging_obj.post_call(
input=input, api_key=None, original_response=_json_response
)
model_response = (
litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
response=_json_response, model=model, model_response=model_response
)
)
return model_response
async def async_embedding(
self,
model: str,
input: Union[list, str],
model_response: EmbeddingResponse,
logging_obj: LiteLLMLoggingObject,
optional_params: dict,
custom_llm_provider: Literal[
"vertex_ai", "vertex_ai_beta", "gemini"
], # if it's vertex_ai or gemini (google ai studio)
timeout: Optional[Union[float, httpx.Timeout]],
api_base: Optional[str] = None,
client: Optional[AsyncHTTPHandler] = None,
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
gemini_api_key: Optional[str] = None,
extra_headers: Optional[dict] = None,
encoding=None,
) -> EmbeddingResponse:
"""
Async embedding implementation
"""
should_use_v1beta1_features = self.is_using_v1beta1_features(
optional_params=optional_params
)
_auth_header, vertex_project = await self._ensure_access_token_async(
credentials=vertex_credentials,
project_id=vertex_project,
custom_llm_provider=custom_llm_provider,
)
# Extract use_psc_endpoint_format from optional_params
use_psc_endpoint_format = optional_params.get("use_psc_endpoint_format", False)
auth_header, api_base = self._get_token_and_url(
model=model,
gemini_api_key=gemini_api_key,
auth_header=_auth_header,
vertex_project=vertex_project,
vertex_location=vertex_location,
vertex_credentials=vertex_credentials,
stream=False,
custom_llm_provider=custom_llm_provider,
api_base=api_base,
should_use_v1beta1_features=should_use_v1beta1_features,
mode="embedding",
use_psc_endpoint_format=use_psc_endpoint_format,
)
headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
vertex_request: VertexEmbeddingRequest = litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request(
input=input, optional_params=optional_params, model=model
)
_async_client_params = {}
if timeout:
_async_client_params["timeout"] = timeout
if client is None or not isinstance(client, AsyncHTTPHandler):
client = get_async_httpx_client(
params=_async_client_params, llm_provider=litellm.LlmProviders.VERTEX_AI
)
else:
client = client # type: ignore
## LOGGING
logging_obj.pre_call(
input=vertex_request,
api_key="",
additional_args={
"complete_input_dict": vertex_request,
"api_base": api_base,
"headers": headers,
},
)
try:
response = await client.post(api_base, headers=headers, json=vertex_request) # type: ignore
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
raise VertexAIError(status_code=error_code, message=err.response.text)
except httpx.TimeoutException:
raise VertexAIError(status_code=408, message="Timeout error occurred.")
_json_response = response.json()
## LOGGING POST-CALL
logging_obj.post_call(
input=input, api_key=None, original_response=_json_response
)
model_response = (
litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
response=_json_response, model=model, model_response=model_response
)
)
return model_response

View File

@@ -0,0 +1,285 @@
import types
from typing import List, Literal, Optional, Union
from pydantic import BaseModel
from litellm.types.utils import EmbeddingResponse, Usage
from .types import *
class VertexAITextEmbeddingConfig(BaseModel):
"""
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
Args:
auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
"""
auto_truncate: Optional[bool] = None
task_type: Optional[
Literal[
"RETRIEVAL_QUERY",
"RETRIEVAL_DOCUMENT",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING",
"QUESTION_ANSWERING",
"FACT_VERIFICATION",
]
] = None
title: Optional[str] = None
def __init__(
self,
auto_truncate: Optional[bool] = None,
task_type: Optional[
Literal[
"RETRIEVAL_QUERY",
"RETRIEVAL_DOCUMENT",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING",
"QUESTION_ANSWERING",
"FACT_VERIFICATION",
]
] = None,
title: Optional[str] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return ["dimensions"]
def map_openai_params(
self, non_default_params: dict, optional_params: dict, kwargs: dict
):
for param, value in non_default_params.items():
if param == "dimensions":
optional_params["outputDimensionality"] = value
if "input_type" in kwargs:
optional_params["task_type"] = kwargs.pop("input_type")
return optional_params, kwargs
def get_mapped_special_auth_params(self) -> dict:
"""
Common auth params across bedrock/vertex_ai/azure/watsonx
"""
return {"project": "vertex_project", "region_name": "vertex_location"}
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
mapped_params = self.get_mapped_special_auth_params()
for param, value in non_default_params.items():
if param in mapped_params:
optional_params[mapped_params[param]] = value
return optional_params
def transform_openai_request_to_vertex_embedding_request(
self, input: Union[list, str], optional_params: dict, model: str
) -> VertexEmbeddingRequest:
"""
Transforms an openai request to a vertex embedding request.
"""
# Import here to avoid circular import issues with litellm.__init__
from litellm.llms.vertex_ai.vertex_embeddings.bge import VertexBGEConfig
if model.isdigit():
return self._transform_openai_request_to_fine_tuned_embedding_request(
input, optional_params, model
)
if VertexBGEConfig.is_bge_model(model):
return VertexBGEConfig.transform_request(
input=input, optional_params=optional_params, model=model
)
vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
vertex_text_embedding_input_list: List[TextEmbeddingInput] = []
task_type: Optional[TaskType] = optional_params.get("task_type")
title = optional_params.get("title")
if isinstance(input, str):
input = [input] # Convert single string to list for uniform processing
for text in input:
embedding_input = self.create_embedding_input(
content=text, task_type=task_type, title=title
)
vertex_text_embedding_input_list.append(embedding_input)
vertex_request["instances"] = vertex_text_embedding_input_list
vertex_request["parameters"] = EmbeddingParameters(**optional_params)
return vertex_request
def _transform_openai_request_to_fine_tuned_embedding_request(
self, input: Union[list, str], optional_params: dict, model: str
) -> VertexEmbeddingRequest:
"""
Transforms an openai request to a vertex fine-tuned embedding request.
Vertex Doc: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22))
Sample Request:
```json
{
"instances" : [
{
"inputs": "How would the Future of AI in 10 Years look?",
"parameters": {
"max_new_tokens": 128,
"temperature": 1.0,
"top_p": 0.9,
"top_k": 10
}
}
]
}
```
"""
vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
vertex_text_embedding_input_list: List[TextEmbeddingFineTunedInput] = []
if isinstance(input, str):
input = [input] # Convert single string to list for uniform processing
for text in input:
embedding_input = TextEmbeddingFineTunedInput(inputs=text)
vertex_text_embedding_input_list.append(embedding_input)
vertex_request["instances"] = vertex_text_embedding_input_list
vertex_request["parameters"] = TextEmbeddingFineTunedParameters(
**optional_params
)
# Remove 'shared_session' from parameters if present
if (
vertex_request["parameters"] is not None
and "shared_session" in vertex_request["parameters"]
):
del vertex_request["parameters"]["shared_session"] # type: ignore[typeddict-item]
return vertex_request
def create_embedding_input(
self,
content: str,
task_type: Optional[TaskType] = None,
title: Optional[str] = None,
) -> TextEmbeddingInput:
"""
Creates a TextEmbeddingInput object.
Vertex requires a List of TextEmbeddingInput objects. This helper function creates a single TextEmbeddingInput object.
Args:
content (str): The content to be embedded.
task_type (Optional[TaskType]): The type of task to be performed.
title (Optional[str]): The title of the document to be embedded.
Returns:
TextEmbeddingInput: A TextEmbeddingInput object.
"""
text_embedding_input = TextEmbeddingInput(content=content)
if task_type is not None:
text_embedding_input["task_type"] = task_type
if title is not None:
text_embedding_input["title"] = title
return text_embedding_input
def transform_vertex_response_to_openai(
self, response: dict, model: str, model_response: EmbeddingResponse
) -> EmbeddingResponse:
"""
Transforms a vertex embedding response to an openai response.
"""
if model.isdigit():
return self._transform_vertex_response_to_openai_for_fine_tuned_models(
response, model, model_response
)
# Import here to avoid circular import issues with litellm.__init__
from litellm.llms.vertex_ai.vertex_embeddings.bge import VertexBGEConfig
if VertexBGEConfig.is_bge_model(model):
return VertexBGEConfig.transform_response(
response=response, model=model, model_response=model_response
)
_predictions = response["predictions"]
embedding_response = []
input_tokens: int = 0
for idx, element in enumerate(_predictions):
embedding = element["embeddings"]
embedding_response.append(
{
"object": "embedding",
"index": idx,
"embedding": embedding["values"],
}
)
input_tokens += embedding["statistics"]["token_count"]
model_response.object = "list"
model_response.data = embedding_response
model_response.model = model
usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
)
setattr(model_response, "usage", usage)
return model_response
def _transform_vertex_response_to_openai_for_fine_tuned_models(
self, response: dict, model: str, model_response: EmbeddingResponse
) -> EmbeddingResponse:
"""
Transforms a vertex fine-tuned model embedding response to an openai response format.
"""
_predictions = response["predictions"]
embedding_response = []
# For fine-tuned models, we don't get token counts in the response
input_tokens = 0
for idx, embedding_values in enumerate(_predictions):
embedding_response.append(
{
"object": "embedding",
"index": idx,
"embedding": embedding_values[
0
], # The embedding values are nested one level deeper
}
)
model_response.object = "list"
model_response.data = embedding_response
model_response.model = model
usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
)
setattr(model_response, "usage", usage)
return model_response

View File

@@ -0,0 +1,74 @@
"""
Types for Vertex Embeddings Requests
"""
from enum import Enum
from typing import List, Optional, Union
from typing_extensions import TypedDict
class TaskType(str, Enum):
RETRIEVAL_QUERY = "RETRIEVAL_QUERY"
RETRIEVAL_DOCUMENT = "RETRIEVAL_DOCUMENT"
SEMANTIC_SIMILARITY = "SEMANTIC_SIMILARITY"
CLASSIFICATION = "CLASSIFICATION"
CLUSTERING = "CLUSTERING"
QUESTION_ANSWERING = "QUESTION_ANSWERING"
FACT_VERIFICATION = "FACT_VERIFICATION"
CODE_RETRIEVAL_QUERY = "CODE_RETRIEVAL_QUERY"
class TextEmbeddingInput(TypedDict, total=False):
content: str
task_type: Optional[TaskType]
title: Optional[str]
class TextEmbeddingBGEInput(TypedDict, total=False):
prompt: str
task_type: Optional[TaskType]
title: Optional[str]
# Fine-tuned models require a different input format
# Ref: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22))
class TextEmbeddingFineTunedInput(TypedDict, total=False):
inputs: str
class TextEmbeddingFineTunedParameters(TypedDict, total=False):
max_new_tokens: Optional[int]
temperature: Optional[float]
top_p: Optional[float]
top_k: Optional[int]
class EmbeddingParameters(TypedDict, total=False):
auto_truncate: Optional[bool]
output_dimensionality: Optional[int]
class VertexEmbeddingRequest(TypedDict, total=False):
instances: Union[
List[TextEmbeddingInput],
List[TextEmbeddingBGEInput],
List[TextEmbeddingFineTunedInput],
]
parameters: Optional[Union[EmbeddingParameters, TextEmbeddingFineTunedParameters]]
# Example usage:
# example_request: VertexEmbeddingRequest = {
# "instances": [
# {
# "content": "I would like embeddings for this text!",
# "task_type": "RETRIEVAL_DOCUMENT",
# "title": "document title"
# }
# ],
# "parameters": {
# "auto_truncate": True,
# "output_dimensionality": None
# }
# }