chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/vertex_embeddings/bge.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/vertex_embeddings/bge.py
@@ -0,0 +1,183 @@
+"""
+Vertex AI BGE (BAAI General Embedding) Configuration
+
+BGE models deployed on Vertex AI require different input/output format:
+- Request: Use "prompt" instead of "content" as the input field
+- Response: Embeddings are returned directly as arrays, not wrapped in objects
+
+Model name handling:
+- Model names like "bge/endpoint_id" are automatically transformed in common_utils._get_vertex_url()
+- This module focuses on request/response transformation only
+"""
+
+from typing import List, Optional, Union
+
+from litellm.types.utils import EmbeddingResponse, Usage
+
+from .types import (
+    EmbeddingParameters,
+    TaskType,
+    TextEmbeddingBGEInput,
+    VertexEmbeddingRequest,
+)
+
+
+class VertexBGEConfig:
+    """
+    Configuration and transformation logic for BGE models on Vertex AI.
+
+    BGE (BAAI General Embedding) models use a different request format
+    where the input field is named "prompt" instead of "content".
+
+    Supported model patterns (after provider split in main.py):
+    - "bge-small-en-v1.5" (model name)
+    - "bge/204379420394258432" (endpoint ID pattern)
+
+    Note: Model name transformation (bge/ -> numeric ID) is handled automatically
+    in common_utils._get_vertex_url(). This class focuses on request/response format only.
+    """
+
+    @staticmethod
+    def is_bge_model(model: str) -> bool:
+        """
+        Check if the model is a BGE (BAAI General Embedding) model.
+
+        After provider split in main.py, supports:
+        - "bge-small-en-v1.5" (model name)
+        - "bge/204379420394258432" (endpoint ID pattern)
+
+        Args:
+            model: The model name after provider split
+
+        Returns:
+            bool: True if the model is a BGE model
+        """
+        model_lower = model.lower()
+        # Check for "bge/" prefix (endpoint pattern) or "bge" in model name
+        return model_lower.startswith("bge/") or "bge" in model_lower
+
+    @staticmethod
+    def transform_request(
+        input: Union[list, str], optional_params: dict, model: str
+    ) -> VertexEmbeddingRequest:
+        """
+        Transforms an OpenAI request to a Vertex BGE embedding request.
+
+        BGE models use "prompt" instead of "content" as the input field.
+
+        Args:
+            input: The input text(s) to embed
+            optional_params: Optional parameters for the request
+            model: The model name
+
+        Returns:
+            VertexEmbeddingRequest: The transformed request
+        """
+        vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
+        vertex_text_embedding_input_list: List[TextEmbeddingBGEInput] = []
+        task_type: Optional[TaskType] = optional_params.get("task_type")
+        title = optional_params.get("title")
+
+        if isinstance(input, str):
+            input = [input]
+
+        for text in input:
+            embedding_input = VertexBGEConfig._create_embedding_input(
+                prompt=text, task_type=task_type, title=title
+            )
+            vertex_text_embedding_input_list.append(embedding_input)
+
+        vertex_request["instances"] = vertex_text_embedding_input_list
+        vertex_request["parameters"] = EmbeddingParameters(**optional_params)
+
+        return vertex_request
+
+    @staticmethod
+    def _create_embedding_input(
+        prompt: str,
+        task_type: Optional[TaskType] = None,
+        title: Optional[str] = None,
+    ) -> TextEmbeddingBGEInput:
+        """
+        Creates a TextEmbeddingBGEInput object for BGE models.
+
+        BGE models use "prompt" instead of "content" as the input field.
+
+        Args:
+            prompt: The prompt to be embedded
+            task_type: The type of task to be performed
+            title: The title of the document to be embedded
+
+        Returns:
+            TextEmbeddingBGEInput: A TextEmbeddingBGEInput object
+        """
+        text_embedding_input = TextEmbeddingBGEInput(prompt=prompt)
+        if task_type is not None:
+            text_embedding_input["task_type"] = task_type
+        if title is not None:
+            text_embedding_input["title"] = title
+        return text_embedding_input
+
+    @staticmethod
+    def transform_response(
+        response: dict, model: str, model_response: EmbeddingResponse
+    ) -> EmbeddingResponse:
+        """
+        Transforms a Vertex BGE embedding response to OpenAI format.
+
+        BGE models return embeddings directly as arrays in predictions:
+        {
+          "predictions": [
+            [0.002, 0.021, ...],
+            [0.003, 0.022, ...]
+          ]
+        }
+
+        Args:
+            response: The raw response from Vertex AI
+            model: The model name
+            model_response: The EmbeddingResponse object to populate
+
+        Returns:
+            EmbeddingResponse: The transformed response in OpenAI format
+
+        Raises:
+            KeyError: If response doesn't contain 'predictions'
+            ValueError: If predictions is not a list or contains invalid data
+        """
+        if "predictions" not in response:
+            raise KeyError("Response missing 'predictions' field")
+
+        _predictions = response["predictions"]
+
+        if not isinstance(_predictions, list):
+            raise ValueError(
+                f"Expected 'predictions' to be a list, got {type(_predictions)}"
+            )
+
+        embedding_response = []
+        # BGE models don't return token counts, so we estimate or set to 0
+        input_tokens = 0
+
+        for idx, embedding_values in enumerate(_predictions):
+            if not isinstance(embedding_values, list):
+                raise ValueError(
+                    f"Expected embedding at index {idx} to be a list, got {type(embedding_values)}"
+                )
+
+            embedding_response.append(
+                {
+                    "object": "embedding",
+                    "index": idx,
+                    "embedding": embedding_values,
+                }
+            )
+
+        model_response.object = "list"
+        model_response.data = embedding_response
+        model_response.model = model
+        usage = Usage(
+            prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
+        )
+        setattr(model_response, "usage", usage)
+        return model_response
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
@@ -0,0 +1,232 @@
+from typing import Literal, Optional, Union
+
+import httpx
+
+import litellm
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObject
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    _get_httpx_client,
+    get_async_httpx_client,
+)
+from litellm.llms.vertex_ai.vertex_ai_non_gemini import VertexAIError
+from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+from litellm.types.llms.vertex_ai import *
+from litellm.types.utils import EmbeddingResponse
+
+from .types import *
+
+
+class VertexEmbedding(VertexBase):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def embedding(
+        self,
+        model: str,
+        input: Union[list, str],
+        print_verbose,
+        model_response: EmbeddingResponse,
+        optional_params: dict,
+        logging_obj: LiteLLMLoggingObject,
+        custom_llm_provider: Literal[
+            "vertex_ai", "vertex_ai_beta", "gemini"
+        ],  # if it's vertex_ai or gemini (google ai studio)
+        timeout: Optional[Union[float, httpx.Timeout]],
+        api_key: Optional[str] = None,
+        encoding=None,
+        aembedding: Optional[bool] = False,
+        api_base: Optional[str] = None,
+        client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
+        vertex_project: Optional[str] = None,
+        vertex_location: Optional[str] = None,
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
+        gemini_api_key: Optional[str] = None,
+        extra_headers: Optional[dict] = None,
+    ) -> EmbeddingResponse:
+        if aembedding is True:
+            return self.async_embedding(  # type: ignore
+                model=model,
+                input=input,
+                logging_obj=logging_obj,
+                model_response=model_response,
+                optional_params=optional_params,
+                encoding=encoding,
+                custom_llm_provider=custom_llm_provider,
+                timeout=timeout,
+                api_base=api_base,
+                vertex_project=vertex_project,
+                vertex_location=vertex_location,
+                vertex_credentials=vertex_credentials,
+                gemini_api_key=gemini_api_key,
+                extra_headers=extra_headers,
+            )
+
+        should_use_v1beta1_features = self.is_using_v1beta1_features(
+            optional_params=optional_params
+        )
+
+        _auth_header, vertex_project = self._ensure_access_token(
+            credentials=vertex_credentials,
+            project_id=vertex_project,
+            custom_llm_provider=custom_llm_provider,
+        )
+        # Extract use_psc_endpoint_format from optional_params
+        use_psc_endpoint_format = optional_params.get("use_psc_endpoint_format", False)
+
+        auth_header, api_base = self._get_token_and_url(
+            model=model,
+            gemini_api_key=gemini_api_key,
+            auth_header=_auth_header,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            vertex_credentials=vertex_credentials,
+            stream=False,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            should_use_v1beta1_features=should_use_v1beta1_features,
+            mode="embedding",
+            use_psc_endpoint_format=use_psc_endpoint_format,
+        )
+        headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
+        vertex_request: VertexEmbeddingRequest = litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request(
+            input=input, optional_params=optional_params, model=model
+        )
+
+        _client_params = {}
+        if timeout:
+            _client_params["timeout"] = timeout
+        if client is None or not isinstance(client, HTTPHandler):
+            client = _get_httpx_client(params=_client_params)
+        else:
+            client = client  # type: ignore
+        ## LOGGING
+        logging_obj.pre_call(
+            input=vertex_request,
+            api_key="",
+            additional_args={
+                "complete_input_dict": vertex_request,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = client.post(url=api_base, headers=headers, json=vertex_request)  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise VertexAIError(status_code=error_code, message=err.response.text)
+        except httpx.TimeoutException:
+            raise VertexAIError(status_code=408, message="Timeout error occurred.")
+
+        _json_response = response.json()
+        ## LOGGING POST-CALL
+        logging_obj.post_call(
+            input=input, api_key=None, original_response=_json_response
+        )
+
+        model_response = (
+            litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
+                response=_json_response, model=model, model_response=model_response
+            )
+        )
+
+        return model_response
+
+    async def async_embedding(
+        self,
+        model: str,
+        input: Union[list, str],
+        model_response: EmbeddingResponse,
+        logging_obj: LiteLLMLoggingObject,
+        optional_params: dict,
+        custom_llm_provider: Literal[
+            "vertex_ai", "vertex_ai_beta", "gemini"
+        ],  # if it's vertex_ai or gemini (google ai studio)
+        timeout: Optional[Union[float, httpx.Timeout]],
+        api_base: Optional[str] = None,
+        client: Optional[AsyncHTTPHandler] = None,
+        vertex_project: Optional[str] = None,
+        vertex_location: Optional[str] = None,
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
+        gemini_api_key: Optional[str] = None,
+        extra_headers: Optional[dict] = None,
+        encoding=None,
+    ) -> EmbeddingResponse:
+        """
+        Async embedding implementation
+        """
+        should_use_v1beta1_features = self.is_using_v1beta1_features(
+            optional_params=optional_params
+        )
+        _auth_header, vertex_project = await self._ensure_access_token_async(
+            credentials=vertex_credentials,
+            project_id=vertex_project,
+            custom_llm_provider=custom_llm_provider,
+        )
+        # Extract use_psc_endpoint_format from optional_params
+        use_psc_endpoint_format = optional_params.get("use_psc_endpoint_format", False)
+
+        auth_header, api_base = self._get_token_and_url(
+            model=model,
+            gemini_api_key=gemini_api_key,
+            auth_header=_auth_header,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            vertex_credentials=vertex_credentials,
+            stream=False,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            should_use_v1beta1_features=should_use_v1beta1_features,
+            mode="embedding",
+            use_psc_endpoint_format=use_psc_endpoint_format,
+        )
+        headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
+        vertex_request: VertexEmbeddingRequest = litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request(
+            input=input, optional_params=optional_params, model=model
+        )
+
+        _async_client_params = {}
+        if timeout:
+            _async_client_params["timeout"] = timeout
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            client = get_async_httpx_client(
+                params=_async_client_params, llm_provider=litellm.LlmProviders.VERTEX_AI
+            )
+        else:
+            client = client  # type: ignore
+        ## LOGGING
+        logging_obj.pre_call(
+            input=vertex_request,
+            api_key="",
+            additional_args={
+                "complete_input_dict": vertex_request,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        try:
+            response = await client.post(api_base, headers=headers, json=vertex_request)  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise VertexAIError(status_code=error_code, message=err.response.text)
+        except httpx.TimeoutException:
+            raise VertexAIError(status_code=408, message="Timeout error occurred.")
+
+        _json_response = response.json()
+        ## LOGGING POST-CALL
+        logging_obj.post_call(
+            input=input, api_key=None, original_response=_json_response
+        )
+
+        model_response = (
+            litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
+                response=_json_response, model=model, model_response=model_response
+            )
+        )
+
+        return model_response
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/vertex_embeddings/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/vertex_embeddings/transformation.py
@@ -0,0 +1,285 @@
+import types
+from typing import List, Literal, Optional, Union
+
+from pydantic import BaseModel
+
+from litellm.types.utils import EmbeddingResponse, Usage
+
+from .types import *
+
+
+class VertexAITextEmbeddingConfig(BaseModel):
+    """
+    Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
+
+    Args:
+        auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
+        task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
+        title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
+    """
+
+    auto_truncate: Optional[bool] = None
+    task_type: Optional[
+        Literal[
+            "RETRIEVAL_QUERY",
+            "RETRIEVAL_DOCUMENT",
+            "SEMANTIC_SIMILARITY",
+            "CLASSIFICATION",
+            "CLUSTERING",
+            "QUESTION_ANSWERING",
+            "FACT_VERIFICATION",
+        ]
+    ] = None
+    title: Optional[str] = None
+
+    def __init__(
+        self,
+        auto_truncate: Optional[bool] = None,
+        task_type: Optional[
+            Literal[
+                "RETRIEVAL_QUERY",
+                "RETRIEVAL_DOCUMENT",
+                "SEMANTIC_SIMILARITY",
+                "CLASSIFICATION",
+                "CLUSTERING",
+                "QUESTION_ANSWERING",
+                "FACT_VERIFICATION",
+            ]
+        ] = None,
+        title: Optional[str] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return ["dimensions"]
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict, kwargs: dict
+    ):
+        for param, value in non_default_params.items():
+            if param == "dimensions":
+                optional_params["outputDimensionality"] = value
+
+        if "input_type" in kwargs:
+            optional_params["task_type"] = kwargs.pop("input_type")
+        return optional_params, kwargs
+
+    def get_mapped_special_auth_params(self) -> dict:
+        """
+        Common auth params across bedrock/vertex_ai/azure/watsonx
+        """
+        return {"project": "vertex_project", "region_name": "vertex_location"}
+
+    def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
+        mapped_params = self.get_mapped_special_auth_params()
+
+        for param, value in non_default_params.items():
+            if param in mapped_params:
+                optional_params[mapped_params[param]] = value
+        return optional_params
+
+    def transform_openai_request_to_vertex_embedding_request(
+        self, input: Union[list, str], optional_params: dict, model: str
+    ) -> VertexEmbeddingRequest:
+        """
+        Transforms an openai request to a vertex embedding request.
+        """
+        # Import here to avoid circular import issues with litellm.__init__
+        from litellm.llms.vertex_ai.vertex_embeddings.bge import VertexBGEConfig
+
+        if model.isdigit():
+            return self._transform_openai_request_to_fine_tuned_embedding_request(
+                input, optional_params, model
+            )
+        if VertexBGEConfig.is_bge_model(model):
+            return VertexBGEConfig.transform_request(
+                input=input, optional_params=optional_params, model=model
+            )
+
+        vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
+        vertex_text_embedding_input_list: List[TextEmbeddingInput] = []
+        task_type: Optional[TaskType] = optional_params.get("task_type")
+        title = optional_params.get("title")
+
+        if isinstance(input, str):
+            input = [input]  # Convert single string to list for uniform processing
+
+        for text in input:
+            embedding_input = self.create_embedding_input(
+                content=text, task_type=task_type, title=title
+            )
+            vertex_text_embedding_input_list.append(embedding_input)
+
+        vertex_request["instances"] = vertex_text_embedding_input_list
+        vertex_request["parameters"] = EmbeddingParameters(**optional_params)
+
+        return vertex_request
+
+    def _transform_openai_request_to_fine_tuned_embedding_request(
+        self, input: Union[list, str], optional_params: dict, model: str
+    ) -> VertexEmbeddingRequest:
+        """
+        Transforms an openai request to a vertex fine-tuned embedding request.
+
+        Vertex Doc: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22))
+        Sample Request:
+
+        ```json
+        {
+            "instances" : [
+                {
+                "inputs": "How would the Future of AI in 10 Years look?",
+                "parameters": {
+                    "max_new_tokens": 128,
+                    "temperature": 1.0,
+                    "top_p": 0.9,
+                    "top_k": 10
+                }
+                }
+            ]
+        }
+        ```
+        """
+        vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
+        vertex_text_embedding_input_list: List[TextEmbeddingFineTunedInput] = []
+        if isinstance(input, str):
+            input = [input]  # Convert single string to list for uniform processing
+
+        for text in input:
+            embedding_input = TextEmbeddingFineTunedInput(inputs=text)
+            vertex_text_embedding_input_list.append(embedding_input)
+
+        vertex_request["instances"] = vertex_text_embedding_input_list
+        vertex_request["parameters"] = TextEmbeddingFineTunedParameters(
+            **optional_params
+        )
+        # Remove 'shared_session' from parameters if present
+        if (
+            vertex_request["parameters"] is not None
+            and "shared_session" in vertex_request["parameters"]
+        ):
+            del vertex_request["parameters"]["shared_session"]  # type: ignore[typeddict-item]
+
+        return vertex_request
+
+    def create_embedding_input(
+        self,
+        content: str,
+        task_type: Optional[TaskType] = None,
+        title: Optional[str] = None,
+    ) -> TextEmbeddingInput:
+        """
+        Creates a TextEmbeddingInput object.
+
+        Vertex requires a List of TextEmbeddingInput objects. This helper function creates a single TextEmbeddingInput object.
+
+        Args:
+            content (str): The content to be embedded.
+            task_type (Optional[TaskType]): The type of task to be performed.
+            title (Optional[str]): The title of the document to be embedded.
+
+        Returns:
+            TextEmbeddingInput: A TextEmbeddingInput object.
+        """
+        text_embedding_input = TextEmbeddingInput(content=content)
+        if task_type is not None:
+            text_embedding_input["task_type"] = task_type
+        if title is not None:
+            text_embedding_input["title"] = title
+        return text_embedding_input
+
+    def transform_vertex_response_to_openai(
+        self, response: dict, model: str, model_response: EmbeddingResponse
+    ) -> EmbeddingResponse:
+        """
+        Transforms a vertex embedding response to an openai response.
+        """
+        if model.isdigit():
+            return self._transform_vertex_response_to_openai_for_fine_tuned_models(
+                response, model, model_response
+            )
+
+        # Import here to avoid circular import issues with litellm.__init__
+        from litellm.llms.vertex_ai.vertex_embeddings.bge import VertexBGEConfig
+
+        if VertexBGEConfig.is_bge_model(model):
+            return VertexBGEConfig.transform_response(
+                response=response, model=model, model_response=model_response
+            )
+
+        _predictions = response["predictions"]
+
+        embedding_response = []
+        input_tokens: int = 0
+        for idx, element in enumerate(_predictions):
+            embedding = element["embeddings"]
+            embedding_response.append(
+                {
+                    "object": "embedding",
+                    "index": idx,
+                    "embedding": embedding["values"],
+                }
+            )
+            input_tokens += embedding["statistics"]["token_count"]
+
+        model_response.object = "list"
+        model_response.data = embedding_response
+        model_response.model = model
+        usage = Usage(
+            prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
+        )
+        setattr(model_response, "usage", usage)
+        return model_response
+
+    def _transform_vertex_response_to_openai_for_fine_tuned_models(
+        self, response: dict, model: str, model_response: EmbeddingResponse
+    ) -> EmbeddingResponse:
+        """
+        Transforms a vertex fine-tuned model embedding response to an openai response format.
+        """
+        _predictions = response["predictions"]
+
+        embedding_response = []
+        # For fine-tuned models, we don't get token counts in the response
+        input_tokens = 0
+
+        for idx, embedding_values in enumerate(_predictions):
+            embedding_response.append(
+                {
+                    "object": "embedding",
+                    "index": idx,
+                    "embedding": embedding_values[
+                        0
+                    ],  # The embedding values are nested one level deeper
+                }
+            )
+
+        model_response.object = "list"
+        model_response.data = embedding_response
+        model_response.model = model
+        usage = Usage(
+            prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
+        )
+        setattr(model_response, "usage", usage)
+        return model_response
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/vertex_embeddings/types.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/vertex_embeddings/types.py
@@ -0,0 +1,74 @@
+"""
+Types for Vertex Embeddings Requests
+"""
+
+from enum import Enum
+from typing import List, Optional, Union
+
+from typing_extensions import TypedDict
+
+
+class TaskType(str, Enum):
+    RETRIEVAL_QUERY = "RETRIEVAL_QUERY"
+    RETRIEVAL_DOCUMENT = "RETRIEVAL_DOCUMENT"
+    SEMANTIC_SIMILARITY = "SEMANTIC_SIMILARITY"
+    CLASSIFICATION = "CLASSIFICATION"
+    CLUSTERING = "CLUSTERING"
+    QUESTION_ANSWERING = "QUESTION_ANSWERING"
+    FACT_VERIFICATION = "FACT_VERIFICATION"
+    CODE_RETRIEVAL_QUERY = "CODE_RETRIEVAL_QUERY"
+
+
+class TextEmbeddingInput(TypedDict, total=False):
+    content: str
+    task_type: Optional[TaskType]
+    title: Optional[str]
+
+
+class TextEmbeddingBGEInput(TypedDict, total=False):
+    prompt: str
+    task_type: Optional[TaskType]
+    title: Optional[str]
+
+
+# Fine-tuned models require a different input format
+# Ref: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22))
+class TextEmbeddingFineTunedInput(TypedDict, total=False):
+    inputs: str
+
+
+class TextEmbeddingFineTunedParameters(TypedDict, total=False):
+    max_new_tokens: Optional[int]
+    temperature: Optional[float]
+    top_p: Optional[float]
+    top_k: Optional[int]
+
+
+class EmbeddingParameters(TypedDict, total=False):
+    auto_truncate: Optional[bool]
+    output_dimensionality: Optional[int]
+
+
+class VertexEmbeddingRequest(TypedDict, total=False):
+    instances: Union[
+        List[TextEmbeddingInput],
+        List[TextEmbeddingBGEInput],
+        List[TextEmbeddingFineTunedInput],
+    ]
+    parameters: Optional[Union[EmbeddingParameters, TextEmbeddingFineTunedParameters]]
+
+
+# Example usage:
+# example_request: VertexEmbeddingRequest = {
+#     "instances": [
+#         {
+#             "content": "I would like embeddings for this text!",
+#             "task_type": "RETRIEVAL_DOCUMENT",
+#             "title": "document title"
+#         }
+#     ],
+#     "parameters": {
+#         "auto_truncate": True,
+#         "output_dimensionality": None
+#     }
+# }