chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/gemini_embeddings/batch_embed_content_handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/gemini_embeddings/batch_embed_content_handler.py
@@ -0,0 +1,347 @@
+"""
+Google AI Studio /batchEmbedContents Embeddings Endpoint
+"""
+
+import json
+from typing import Any, Dict, Literal, Optional, Union
+
+import httpx
+
+import litellm
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    get_async_httpx_client,
+)
+from litellm.types.llms.openai import EmbeddingInput
+from litellm.types.llms.vertex_ai import (
+    VertexAIBatchEmbeddingsRequestBody,
+    VertexAIBatchEmbeddingsResponseObject,
+)
+from litellm.types.utils import EmbeddingResponse
+
+from ..gemini.vertex_and_google_ai_studio_gemini import VertexLLM
+from .batch_embed_content_transformation import (
+    _is_file_reference,
+    _is_multimodal_input,
+    process_embed_content_response,
+    process_response,
+    transform_openai_input_gemini_content,
+    transform_openai_input_gemini_embed_content,
+)
+
+
+class GoogleBatchEmbeddings(VertexLLM):
+    def _resolve_file_references(
+        self,
+        input: EmbeddingInput,
+        api_key: str,
+        sync_handler: HTTPHandler,
+    ) -> Dict[str, Dict[str, str]]:
+        """
+        Resolve Gemini file references (files/...) to get mime_type and uri.
+
+        Args:
+            input: EmbeddingInput that may contain file references
+            api_key: Gemini API key
+            sync_handler: HTTP client
+
+        Returns:
+            Dict mapping file name to {mime_type, uri}
+        """
+        input_list = [input] if isinstance(input, str) else input
+        resolved_files: Dict[str, Dict[str, str]] = {}
+
+        for element in input_list:
+            if isinstance(element, str) and _is_file_reference(element):
+                url = f"https://generativelanguage.googleapis.com/v1beta/{element}"
+                headers = {"x-goog-api-key": api_key}
+                response = sync_handler.get(url=url, headers=headers)
+
+                if response.status_code != 200:
+                    raise Exception(
+                        f"Error fetching file {element}: {response.status_code} {response.text}"
+                    )
+
+                file_data = response.json()
+                resolved_files[element] = {
+                    "mime_type": file_data.get("mimeType", ""),
+                    "uri": file_data.get("uri", element),
+                }
+
+        return resolved_files
+
+    async def _async_resolve_file_references(
+        self,
+        input: EmbeddingInput,
+        api_key: str,
+        async_handler: AsyncHTTPHandler,
+    ) -> Dict[str, Dict[str, str]]:
+        """
+        Async version of _resolve_file_references.
+
+        Args:
+            input: EmbeddingInput that may contain file references
+            api_key: Gemini API key
+            async_handler: Async HTTP client
+
+        Returns:
+            Dict mapping file name to {mime_type, uri}
+        """
+        input_list = [input] if isinstance(input, str) else input
+        resolved_files: Dict[str, Dict[str, str]] = {}
+
+        for element in input_list:
+            if isinstance(element, str) and _is_file_reference(element):
+                url = f"https://generativelanguage.googleapis.com/v1beta/{element}"
+                headers = {"x-goog-api-key": api_key}
+                response = await async_handler.get(url=url, headers=headers)
+
+                if response.status_code != 200:
+                    raise Exception(
+                        f"Error fetching file {element}: {response.status_code} {response.text}"
+                    )
+
+                file_data = response.json()
+                resolved_files[element] = {
+                    "mime_type": file_data.get("mimeType", ""),
+                    "uri": file_data.get("uri", element),
+                }
+
+        return resolved_files
+
+    def batch_embeddings(
+        self,
+        model: str,
+        input: EmbeddingInput,
+        print_verbose,
+        model_response: EmbeddingResponse,
+        custom_llm_provider: Literal["gemini", "vertex_ai"],
+        optional_params: dict,
+        logging_obj: Any,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        encoding=None,
+        vertex_project=None,
+        vertex_location=None,
+        vertex_credentials=None,
+        aembedding: Optional[bool] = False,
+        timeout=300,
+        client=None,
+        extra_headers: Optional[dict] = None,
+    ) -> EmbeddingResponse:
+        _auth_header, vertex_project = self._ensure_access_token(
+            credentials=vertex_credentials,
+            project_id=vertex_project,
+            custom_llm_provider=custom_llm_provider,
+        )
+
+        if client is None:
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    _httpx_timeout = httpx.Timeout(timeout)
+                    _params["timeout"] = _httpx_timeout
+            else:
+                _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
+
+            sync_handler: HTTPHandler = HTTPHandler(**_params)  # type: ignore
+        else:
+            sync_handler = client  # type: ignore
+
+        optional_params = optional_params or {}
+
+        is_multimodal = _is_multimodal_input(input)
+        use_embed_content = is_multimodal or (custom_llm_provider == "vertex_ai")
+        mode: Literal["embedding", "batch_embedding"]
+        if use_embed_content:
+            mode = "embedding"
+        else:
+            mode = "batch_embedding"
+
+        auth_header, url = self._get_token_and_url(
+            model=model,
+            auth_header=_auth_header,
+            gemini_api_key=api_key,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            vertex_credentials=vertex_credentials,
+            stream=None,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            should_use_v1beta1_features=False,
+            mode=mode,
+        )
+
+        headers = {
+            "Content-Type": "application/json; charset=utf-8",
+        }
+        if auth_header is not None:
+            if isinstance(auth_header, dict):
+                headers.update(auth_header)
+            else:
+                headers["Authorization"] = f"Bearer {auth_header}"
+        if extra_headers is not None:
+            headers.update(extra_headers)
+
+        if aembedding is True:
+            return self.async_batch_embeddings(  # type: ignore
+                model=model,
+                api_base=api_base,
+                url=url,
+                data=None,
+                model_response=model_response,
+                timeout=timeout,
+                headers=headers,
+                input=input,
+                use_embed_content=use_embed_content,
+                api_key=api_key,
+                optional_params=optional_params,
+                logging_obj=logging_obj,
+            )
+
+        ### TRANSFORMATION (sync path) ###
+        request_data: Any
+        if use_embed_content:
+            resolved_files = {}
+            if api_key:
+                resolved_files = self._resolve_file_references(
+                    input=input, api_key=api_key, sync_handler=sync_handler
+                )
+            request_data = transform_openai_input_gemini_embed_content(
+                input=input,
+                model=model,
+                optional_params=optional_params,
+                resolved_files=resolved_files,
+            )
+        else:
+            request_data = transform_openai_input_gemini_content(
+                input=input, model=model, optional_params=optional_params
+            )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=input,
+            api_key="",
+            additional_args={
+                "complete_input_dict": request_data,
+                "api_base": url,
+                "headers": headers,
+            },
+        )
+
+        response = sync_handler.post(
+            url=url,
+            headers=headers,
+            data=json.dumps(request_data),
+        )
+
+        if response.status_code != 200:
+            raise Exception(f"Error: {response.status_code} {response.text}")
+
+        _json_response = response.json()
+
+        if use_embed_content:
+            return process_embed_content_response(
+                input=input,
+                model_response=model_response,
+                model=model,
+                response_json=_json_response,
+            )
+        else:
+            _predictions = VertexAIBatchEmbeddingsResponseObject(**_json_response)  # type: ignore
+            return process_response(
+                model=model,
+                model_response=model_response,
+                _predictions=_predictions,
+                input=input,
+            )
+
+    async def async_batch_embeddings(
+        self,
+        model: str,
+        api_base: Optional[str],
+        url: str,
+        data: Optional[Union[VertexAIBatchEmbeddingsRequestBody, dict]],
+        model_response: EmbeddingResponse,
+        input: EmbeddingInput,
+        timeout: Optional[Union[float, httpx.Timeout]],
+        headers={},
+        client: Optional[AsyncHTTPHandler] = None,
+        use_embed_content: bool = False,
+        api_key: Optional[str] = None,
+        optional_params: Optional[dict] = None,
+        logging_obj: Optional[Any] = None,
+    ) -> EmbeddingResponse:
+        if client is None:
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    _httpx_timeout = httpx.Timeout(timeout)
+                    _params["timeout"] = _httpx_timeout
+            else:
+                _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
+
+            async_handler: AsyncHTTPHandler = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders.VERTEX_AI,
+                params={"timeout": timeout},
+            )
+        else:
+            async_handler = client  # type: ignore
+
+        ### TRANSFORMATION (async path) ###
+        if use_embed_content:
+            resolved_files = {}
+            if api_key:
+                resolved_files = await self._async_resolve_file_references(
+                    input=input, api_key=api_key, async_handler=async_handler
+                )
+            data = transform_openai_input_gemini_embed_content(
+                input=input,
+                model=model,
+                optional_params=optional_params or {},
+                resolved_files=resolved_files,
+            )
+        else:
+            data = transform_openai_input_gemini_content(
+                input=input, model=model, optional_params=optional_params or {}
+            )
+
+        ## LOGGING
+        if logging_obj is not None:
+            logging_obj.pre_call(
+                input=input,
+                api_key="",
+                additional_args={
+                    "complete_input_dict": data,
+                    "api_base": url,
+                    "headers": headers,
+                },
+            )
+
+        response = await async_handler.post(
+            url=url,
+            headers=headers,
+            data=json.dumps(data),
+        )
+
+        if response.status_code != 200:
+            raise Exception(f"Error: {response.status_code} {response.text}")
+
+        _json_response = response.json()
+
+        if use_embed_content:
+            return process_embed_content_response(
+                input=input,
+                model_response=model_response,
+                model=model,
+                response_json=_json_response,
+            )
+        else:
+            _predictions = VertexAIBatchEmbeddingsResponseObject(**_json_response)  # type: ignore
+            return process_response(
+                model=model,
+                model_response=model_response,
+                _predictions=_predictions,
+                input=input,
+            )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/gemini_embeddings/batch_embed_content_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/gemini_embeddings/batch_embed_content_transformation.py
@@ -0,0 +1,308 @@
+"""
+Transformation logic from OpenAI /v1/embeddings format to Google AI Studio /batchEmbedContents format. 
+
+Why separate file? Make it easy to see how transformation works
+"""
+
+from typing import Dict, List, Optional, Tuple
+
+from litellm.types.llms.openai import EmbeddingInput
+from litellm.types.llms.vertex_ai import (
+    BlobType,
+    ContentType,
+    EmbedContentRequest,
+    FileDataType,
+    PartType,
+    VertexAIBatchEmbeddingsRequestBody,
+    VertexAIBatchEmbeddingsResponseObject,
+)
+from litellm.types.utils import Embedding, EmbeddingResponse, Usage
+from litellm.utils import get_formatted_prompt, token_counter
+
+SUPPORTED_EMBEDDING_MIME_TYPES = {
+    "image/png",
+    "image/jpeg",
+    "audio/mpeg",
+    "audio/wav",
+    "video/mp4",
+    "video/quicktime",
+    "application/pdf",
+}
+
+
+def _is_file_reference(s: str) -> bool:
+    """Check if string is a Gemini file reference (files/...)."""
+    return isinstance(s, str) and s.startswith("files/")
+
+
+def _is_gcs_url(s: str) -> bool:
+    """Check if string is a GCS URL (gs://...)."""
+    return isinstance(s, str) and s.startswith("gs://")
+
+
+def _infer_mime_type_from_gcs_url(gcs_url: str) -> str:
+    """
+    Infer MIME type from GCS URL file extension.
+
+    Args:
+        gcs_url: GCS URL like gs://bucket/path/to/file.png
+
+    Returns:
+        str: Inferred MIME type
+
+    Raises:
+        ValueError: If file extension is not supported
+    """
+    extension_to_mime = {
+        ".png": "image/png",
+        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".mp3": "audio/mpeg",
+        ".wav": "audio/wav",
+        ".mp4": "video/mp4",
+        ".mov": "video/quicktime",
+        ".pdf": "application/pdf",
+    }
+
+    gcs_url_lower = gcs_url.lower()
+    for ext, mime_type in extension_to_mime.items():
+        if gcs_url_lower.endswith(ext):
+            return mime_type
+
+    raise ValueError(
+        f"Unable to infer MIME type from GCS URL: {gcs_url}. "
+        f"Supported extensions: {', '.join(extension_to_mime.keys())}"
+    )
+
+
+def _parse_data_url(data_url: str) -> Tuple[str, str]:
+    """
+    Parse a data URL to extract the media type and base64 data.
+
+    Args:
+        data_url: Data URL in format: data:image/jpeg;base64,/9j/4AAQ...
+
+    Returns:
+        tuple: (media_type, base64_data)
+            media_type: e.g., "image/jpeg", "video/mp4", "audio/mpeg"
+            base64_data: The base64-encoded data without the prefix
+
+    Raises:
+        ValueError: If data URL format is invalid or MIME type is unsupported
+    """
+    if not data_url.startswith("data:"):
+        raise ValueError(f"Invalid data URL format: {data_url[:50]}...")
+
+    if "," not in data_url:
+        raise ValueError(f"Invalid data URL format (missing comma): {data_url[:50]}...")
+
+    metadata, base64_data = data_url.split(",", 1)
+
+    metadata = metadata[5:]
+
+    if ";" in metadata:
+        media_type = metadata.split(";")[0]
+    else:
+        media_type = metadata
+
+    if media_type not in SUPPORTED_EMBEDDING_MIME_TYPES:
+        raise ValueError(
+            f"Unsupported MIME type for embedding: {media_type}. "
+            f"Supported types: {', '.join(sorted(SUPPORTED_EMBEDDING_MIME_TYPES))}"
+        )
+
+    return media_type, base64_data
+
+
+def _is_multimodal_input(input: EmbeddingInput) -> bool:
+    """
+    Check if the input contains multimodal data (data URIs, file references, or GCS URLs).
+
+    Args:
+        input: EmbeddingInput (str or List[str])
+
+    Returns:
+        bool: True if any element is a data URI, file reference, or GCS URL
+    """
+    if isinstance(input, str):
+        input_list = [input]
+    else:
+        input_list = input
+
+    for element in input_list:
+        if isinstance(element, str):
+            if element.startswith("data:") and ";base64," in element:
+                return True
+            if _is_file_reference(element):
+                return True
+            if _is_gcs_url(element):
+                return True
+
+    return False
+
+
+def transform_openai_input_gemini_content(
+    input: EmbeddingInput, model: str, optional_params: dict
+) -> VertexAIBatchEmbeddingsRequestBody:
+    """
+    The content to embed. Only the parts.text fields will be counted.
+    """
+    gemini_model_name = "models/{}".format(model)
+
+    gemini_params = optional_params.copy()
+    if "dimensions" in gemini_params:
+        gemini_params["outputDimensionality"] = gemini_params.pop("dimensions")
+
+    requests: List[EmbedContentRequest] = []
+    if isinstance(input, str):
+        request = EmbedContentRequest(
+            model=gemini_model_name,
+            content=ContentType(parts=[PartType(text=input)]),
+            **gemini_params,
+        )
+        requests.append(request)
+    else:
+        for i in input:
+            request = EmbedContentRequest(
+                model=gemini_model_name,
+                content=ContentType(parts=[PartType(text=i)]),
+                **gemini_params,
+            )
+            requests.append(request)
+
+    return VertexAIBatchEmbeddingsRequestBody(requests=requests)
+
+
+def transform_openai_input_gemini_embed_content(
+    input: EmbeddingInput,
+    model: str,
+    optional_params: dict,
+    resolved_files: Optional[Dict[str, Dict[str, str]]] = None,
+) -> dict:
+    """
+    Transform OpenAI embedding input to Gemini embedContent format (multimodal).
+
+    Args:
+        input: EmbeddingInput (str or List[str]) with text, data URIs, or file references
+        model: Model name
+        optional_params: Additional parameters (taskType, outputDimensionality, etc.)
+        resolved_files: Dict mapping file names (files/abc) to {mime_type, uri}
+
+    Returns:
+        dict: Gemini embedContent request body with content.parts
+    """
+    resolved_files = resolved_files or {}
+
+    gemini_params = optional_params.copy()
+    if "dimensions" in gemini_params:
+        gemini_params["outputDimensionality"] = gemini_params.pop("dimensions")
+
+    input_list = [input] if isinstance(input, str) else input
+    parts: List[PartType] = []
+
+    for element in input_list:
+        if not isinstance(element, str):
+            raise ValueError(f"Unsupported input type: {type(element)}")
+
+        if element.startswith("data:") and ";base64," in element:
+            mime_type, base64_data = _parse_data_url(element)
+            blob: BlobType = {"mime_type": mime_type, "data": base64_data}
+            parts.append(PartType(inline_data=blob))
+        elif _is_gcs_url(element):
+            mime_type = _infer_mime_type_from_gcs_url(element)
+            file_data: FileDataType = {
+                "mime_type": mime_type,
+                "file_uri": element,
+            }
+            parts.append(PartType(file_data=file_data))
+        elif _is_file_reference(element):
+            if element not in resolved_files:
+                raise ValueError(f"File reference {element} not resolved")
+            file_info = resolved_files[element]
+            file_data_ref: FileDataType = {
+                "mime_type": file_info["mime_type"],
+                "file_uri": file_info["uri"],
+            }
+            parts.append(PartType(file_data=file_data_ref))
+        else:
+            parts.append(PartType(text=element))
+
+    request_body: dict = {
+        "content": ContentType(parts=parts),
+        **gemini_params,
+    }
+
+    return request_body
+
+
+def process_embed_content_response(
+    input: EmbeddingInput,
+    model_response: EmbeddingResponse,
+    model: str,
+    response_json: dict,
+) -> EmbeddingResponse:
+    """
+    Process Gemini embedContent response (single embedding for multimodal input).
+
+    Args:
+        input: Original input
+        model_response: EmbeddingResponse to populate
+        model: Model name
+        response_json: Raw JSON response from embedContent endpoint
+
+    Returns:
+        EmbeddingResponse with single embedding
+    """
+    if "embedding" not in response_json:
+        raise ValueError(
+            f"embedContent response missing 'embedding' field: {response_json}"
+        )
+
+    embedding_data = response_json["embedding"]
+
+    openai_embedding = Embedding(
+        embedding=embedding_data["values"],
+        index=0,
+        object="embedding",
+    )
+
+    model_response.data = [openai_embedding]
+    model_response.model = model
+
+    if _is_multimodal_input(input):
+        prompt_tokens = 0
+    else:
+        input_text = get_formatted_prompt(data={"input": input}, call_type="embedding")
+        prompt_tokens = token_counter(model=model, text=input_text)
+    model_response.usage = Usage(
+        prompt_tokens=prompt_tokens, total_tokens=prompt_tokens
+    )
+
+    return model_response
+
+
+def process_response(
+    input: EmbeddingInput,
+    model_response: EmbeddingResponse,
+    model: str,
+    _predictions: VertexAIBatchEmbeddingsResponseObject,
+) -> EmbeddingResponse:
+    openai_embeddings: List[Embedding] = []
+    for embedding in _predictions["embeddings"]:
+        openai_embedding = Embedding(
+            embedding=embedding["values"],
+            index=0,
+            object="embedding",
+        )
+        openai_embeddings.append(openai_embedding)
+
+    model_response.data = openai_embeddings
+    model_response.model = model
+
+    input_text = get_formatted_prompt(data={"input": input}, call_type="embedding")
+    prompt_tokens = token_counter(model=model, text=input_text)
+    model_response.usage = Usage(
+        prompt_tokens=prompt_tokens, total_tokens=prompt_tokens
+    )
+
+    return model_response