chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/ocr/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/ocr/init.py
@@ -0,0 +1,4 @@
+"""Vertex AI OCR module."""
+from .transformation import VertexAIOCRConfig
+
+__all__ = ["VertexAIOCRConfig"]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/ocr/common_utils.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/ocr/common_utils.py
@@ -0,0 +1,41 @@
+"""
+Common utilities for Vertex AI OCR providers.
+
+This module provides routing logic to determine which OCR configuration to use
+based on the model name.
+"""
+
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+    from litellm.llms.base_llm.ocr.transformation import BaseOCRConfig
+
+
+def get_vertex_ai_ocr_config(model: str) -> Optional["BaseOCRConfig"]:
+    """
+    Determine which Vertex AI OCR configuration to use based on the model name.
+
+    Vertex AI supports multiple OCR services:
+    - Vertex AI OCR: vertex_ai/<model>
+
+    Args:
+        model: The model name (e.g., "vertex_ai/ocr/<model>")
+
+    Returns:
+        OCR configuration instance for the specified model
+
+    Examples:
+        >>> get_vertex_ai_ocr_config("vertex_ai/deepseek-ai/deepseek-ocr-maas")
+        <VertexAIDeepSeekOCRConfig object>
+
+        >>> get_vertex_ai_ocr_config("vertex_ai/ocr/mistral-ocr-maas")
+        <VertexAIOCRConfig object>
+    """
+    from litellm.llms.vertex_ai.ocr.deepseek_transformation import (
+        VertexAIDeepSeekOCRConfig,
+    )
+    from litellm.llms.vertex_ai.ocr.transformation import VertexAIOCRConfig
+
+    if "deepseek" in model:
+        return VertexAIDeepSeekOCRConfig()
+    return VertexAIOCRConfig()
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/ocr/deepseek_transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/ocr/deepseek_transformation.py
@@ -0,0 +1,394 @@
+"""
+Vertex AI DeepSeek OCR transformation implementation.
+"""
+import json
+from typing import TYPE_CHECKING, Any, Dict, Optional
+
+import httpx
+
+from litellm._logging import verbose_logger
+from litellm.llms.base_llm.ocr.transformation import (
+    BaseOCRConfig,
+    DocumentType,
+    OCRPage,
+    OCRRequestData,
+    OCRResponse,
+    OCRUsageInfo,
+)
+from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+
+class VertexAIDeepSeekOCRConfig(BaseOCRConfig):
+    """
+    Vertex AI DeepSeek OCR transformation configuration.
+
+    Vertex AI DeepSeek OCR uses the chat completion API format through the openapi endpoint.
+    This transformation converts OCR requests to chat completion format and vice versa.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.vertex_base = VertexBase()
+
+    def validate_environment(
+        self,
+        headers: Dict,
+        model: str,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        litellm_params: Optional[dict] = None,
+        **kwargs,
+    ) -> Dict:
+        """
+        Validate environment and return headers for Vertex AI OCR.
+
+        Vertex AI uses Bearer token authentication with access token from credentials.
+        """
+        # Extract Vertex AI parameters using safe helpers from VertexBase
+        # Use safe_get_* methods that don't mutate litellm_params dict
+        litellm_params = litellm_params or {}
+
+        vertex_project = VertexBase.safe_get_vertex_ai_project(
+            litellm_params=litellm_params
+        )
+        vertex_credentials = VertexBase.safe_get_vertex_ai_credentials(
+            litellm_params=litellm_params
+        )
+
+        # Get access token from Vertex credentials
+        access_token, project_id = self.vertex_base.get_access_token(
+            credentials=vertex_credentials,
+            project_id=vertex_project,
+        )
+
+        headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/json",
+            **headers,
+        }
+
+        return headers
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: Optional[dict] = None,
+        **kwargs,
+    ) -> str:
+        """
+        Get complete URL for Vertex AI DeepSeek OCR endpoint.
+
+        Vertex AI endpoint format:
+        https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/openapi/chat/completions
+
+        Args:
+            api_base: Vertex AI API base URL (optional)
+            model: Model name (e.g., "deepseek-ai/deepseek-ocr-maas")
+            optional_params: Optional parameters
+            litellm_params: LiteLLM parameters containing vertex_project, vertex_location
+
+        Returns: Complete URL for Vertex AI OCR endpoint
+        """
+        # Extract Vertex AI parameters using safe helpers from VertexBase
+        # Use safe_get_* methods that don't mutate litellm_params dict
+        litellm_params = litellm_params or {}
+
+        vertex_project = VertexBase.safe_get_vertex_ai_project(
+            litellm_params=litellm_params
+        )
+        vertex_location = VertexBase.safe_get_vertex_ai_location(
+            litellm_params=litellm_params
+        )
+
+        if vertex_project is None:
+            raise ValueError(
+                "Missing vertex_project - Set VERTEXAI_PROJECT environment variable or pass vertex_project parameter"
+            )
+
+        if vertex_location is None:
+            vertex_location = "us-central1"
+
+        # Get API base URL
+        if api_base is None:
+            api_base = "https://aiplatform.googleapis.com"
+
+        # Ensure no trailing slash
+        api_base = api_base.rstrip("/")
+
+        # Vertex AI DeepSeek OCR endpoint format
+        # Format: https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/endpoints/openapi/chat/completions
+        return f"{api_base}/v1/projects/{vertex_project}/locations/{vertex_location}/endpoints/openapi/chat/completions"
+
+    def transform_ocr_request(
+        self,
+        model: str,
+        document: DocumentType,
+        optional_params: dict,
+        headers: dict,
+        **kwargs,
+    ) -> OCRRequestData:
+        """
+        Transform OCR request to chat completion format for Vertex AI DeepSeek OCR.
+
+        Converts OCR document format to chat completion messages format:
+        - Input: {"type": "image_url", "image_url": "gs://..."}
+        - Output: {"model": "deepseek-ai/deepseek-ocr-maas", "messages": [{"role": "user", "content": [{"type": "image_url", "image_url": "gs://..."}]}]}
+
+        Args:
+            model: Model name (e.g., "deepseek-ai/deepseek-ocr-maas")
+            document: Document dict from user (Mistral OCR format)
+            optional_params: Already mapped optional parameters
+            headers: Request headers
+            **kwargs: Additional arguments
+
+        Returns:
+            OCRRequestData with JSON data in chat completion format
+        """
+        verbose_logger.debug(
+            "Vertex AI DeepSeek OCR transform_ocr_request (sync) called"
+        )
+
+        if not isinstance(document, dict):
+            raise ValueError(f"Expected document dict, got {type(document)}")
+
+        # Extract document type and URL
+        doc_type = document.get("type")
+        image_url = None
+        document_url = None
+
+        if doc_type == "image_url":
+            image_url = document.get("image_url", "")
+        elif doc_type == "document_url":
+            document_url = document.get("document_url", "")
+        else:
+            raise ValueError(
+                f"Unsupported document type: {doc_type}. Expected 'image_url' or 'document_url'"
+            )
+
+        # Build chat completion message content
+        content_item = {}
+        if image_url:
+            content_item = {"type": "image_url", "image_url": image_url}
+        elif document_url:
+            # For document URLs, we use image_url type as well (Vertex AI supports both)
+            content_item = {"type": "image_url", "image_url": document_url}
+
+        # Build chat completion request
+        data = {
+            "model": "deepseek-ai/" + model,
+            "messages": [{"role": "user", "content": [content_item]}],
+        }
+
+        # Add optional parameters (stream, temperature, etc.)
+        # Filter out OCR-specific params that don't apply to chat completion
+        chat_completion_params = {}
+        for key, value in optional_params.items():
+            # Include common chat completion params
+            if key in ["stream", "temperature", "max_tokens", "top_p", "n", "stop"]:
+                chat_completion_params[key] = value
+
+        data.update(chat_completion_params)
+
+        verbose_logger.debug(
+            "Vertex AI DeepSeek OCR: Transformed request to chat completion format"
+        )
+
+        return OCRRequestData(data=data, files=None)
+
+    async def async_transform_ocr_request(
+        self,
+        model: str,
+        document: DocumentType,
+        optional_params: dict,
+        headers: dict,
+        **kwargs,
+    ) -> OCRRequestData:
+        """
+        Transform OCR request to chat completion format for Vertex AI DeepSeek OCR (async).
+
+        Same as sync version - no async-specific logic needed.
+
+        Args:
+            model: Model name
+            document: Document dict from user
+            optional_params: Already mapped optional parameters
+            headers: Request headers
+            **kwargs: Additional arguments
+
+        Returns:
+            OCRRequestData with JSON data in chat completion format
+        """
+        return self.transform_ocr_request(
+            model=model,
+            document=document,
+            optional_params=optional_params,
+            headers=headers,
+            **kwargs,
+        )
+
+    def transform_ocr_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        **kwargs,
+    ) -> OCRResponse:
+        """
+        Transform chat completion response to OCR format.
+
+        Vertex AI DeepSeek OCR returns chat completion format:
+        {
+            "id": "...",
+            "object": "chat.completion",
+            "choices": [{
+                "message": {
+                    "role": "assistant",
+                    "content": "<OCR result as JSON string or markdown>"
+                }
+            }],
+            "usage": {...}
+        }
+
+        We need to extract the content and convert it to OCRResponse format.
+
+        Args:
+            model: Model name
+            raw_response: Raw HTTP response from Vertex AI
+            logging_obj: Logging object
+            **kwargs: Additional arguments
+
+        Returns:
+            OCRResponse in standard format
+        """
+        verbose_logger.debug("Vertex AI DeepSeek OCR transform_ocr_response called")
+        verbose_logger.debug(f"Raw response: {raw_response.text}")
+
+        try:
+            response_json = raw_response.json()
+
+            # Extract content from chat completion response
+            choices = response_json.get("choices", [])
+            if not choices:
+                raise ValueError("No choices in chat completion response")
+
+            message = choices[0].get("message", {})
+            content = message.get("content", "")
+
+            if not content:
+                raise ValueError("No content in chat completion response")
+
+            # Try to parse content as JSON (OCR result might be JSON string)
+            ocr_data = None
+            try:
+                # If content is a JSON string, parse it
+                if isinstance(content, str) and content.strip().startswith("{"):
+                    ocr_data = json.loads(content)
+                elif isinstance(content, dict):
+                    ocr_data = content
+                else:
+                    # If content is markdown text, create a single page with the markdown
+                    ocr_data = {
+                        "pages": [{"index": 0, "markdown": content}],
+                        "model": model,
+                        "usage_info": response_json.get("usage", {}),
+                    }
+            except json.JSONDecodeError:
+                # If JSON parsing fails, treat content as markdown
+                ocr_data = {
+                    "pages": [{"index": 0, "markdown": content}],
+                    "model": model,
+                    "usage_info": response_json.get("usage", {}),
+                }
+
+            # Ensure we have the expected structure
+            if "pages" not in ocr_data:
+                # If OCR data doesn't have pages, wrap the content in a page
+                ocr_data = {
+                    "pages": [
+                        {
+                            "index": 0,
+                            "markdown": content
+                            if isinstance(content, str)
+                            else json.dumps(content),
+                        }
+                    ],
+                    "model": ocr_data.get("model", model),
+                    "usage_info": ocr_data.get(
+                        "usage_info", response_json.get("usage", {})
+                    ),
+                }
+
+            # Convert usage info if present
+            usage_info = None
+            if "usage_info" in ocr_data:
+                usage_dict = ocr_data["usage_info"]
+                if isinstance(usage_dict, dict):
+                    usage_info = OCRUsageInfo(**usage_dict)
+
+            # Build OCRResponse
+            pages = []
+            for page_data in ocr_data.get("pages", []):
+                # Ensure page has required fields
+                if isinstance(page_data, dict):
+                    page = OCRPage(
+                        index=page_data.get("index", 0),
+                        markdown=page_data.get("markdown", ""),
+                        images=page_data.get("images"),
+                        dimensions=page_data.get("dimensions"),
+                    )
+                    pages.append(page)
+
+            if not pages:
+                # Create a default page if none exist
+                pages = [
+                    OCRPage(
+                        index=0, markdown=content if isinstance(content, str) else ""
+                    )
+                ]
+
+            return OCRResponse(
+                pages=pages,
+                model=ocr_data.get("model", model),
+                document_annotation=ocr_data.get("document_annotation"),
+                usage_info=usage_info,
+                object="ocr",
+            )
+
+        except Exception as e:
+            verbose_logger.error(f"Error parsing Vertex AI DeepSeek OCR response: {e}")
+            raise e
+
+    async def async_transform_ocr_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        **kwargs,
+    ) -> OCRResponse:
+        """
+        Async transform chat completion response to OCR format.
+
+        Same as sync version - no async-specific logic needed.
+
+        Args:
+            model: Model name
+            raw_response: Raw HTTP response
+            logging_obj: Logging object
+            **kwargs: Additional arguments
+
+        Returns:
+            OCRResponse in standard format
+        """
+        return self.transform_ocr_response(
+            model=model,
+            raw_response=raw_response,
+            logging_obj=logging_obj,
+            **kwargs,
+        )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/ocr/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/vertex_ai/ocr/transformation.py
@@ -0,0 +1,301 @@
+"""
+Vertex AI Mistral OCR transformation implementation.
+"""
+from typing import Dict, Optional
+
+from litellm._logging import verbose_logger
+from litellm.litellm_core_utils.prompt_templates.image_handling import (
+    async_convert_url_to_base64,
+    convert_url_to_base64,
+)
+from litellm.llms.base_llm.ocr.transformation import DocumentType, OCRRequestData
+from litellm.llms.mistral.ocr.transformation import MistralOCRConfig
+from litellm.llms.vertex_ai.common_utils import get_vertex_base_url
+from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+
+
+class VertexAIOCRConfig(MistralOCRConfig):
+    """
+    Vertex AI Mistral OCR transformation configuration.
+
+    Vertex AI uses Mistral's OCR API format through the Mistral publisher endpoint.
+    Inherits transformation logic from MistralOCRConfig since they use the same format.
+
+    Reference: Vertex AI Mistral OCR documentation
+
+    Important: Vertex AI OCR only supports base64 data URIs (data:image/..., data:application/pdf;base64,...).
+    Regular URLs are not supported.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.vertex_base = VertexBase()
+
+    def validate_environment(
+        self,
+        headers: Dict,
+        model: str,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        litellm_params: Optional[dict] = None,
+        **kwargs,
+    ) -> Dict:
+        """
+        Validate environment and return headers for Vertex AI OCR.
+
+        Vertex AI uses Bearer token authentication with access token from credentials.
+        """
+        # Extract Vertex AI parameters using safe helpers from VertexBase
+        # Use safe_get_* methods that don't mutate litellm_params dict
+        litellm_params = litellm_params or {}
+
+        vertex_project = VertexBase.safe_get_vertex_ai_project(
+            litellm_params=litellm_params
+        )
+        vertex_credentials = VertexBase.safe_get_vertex_ai_credentials(
+            litellm_params=litellm_params
+        )
+
+        # Get access token from Vertex credentials
+        access_token, project_id = self.vertex_base.get_access_token(
+            credentials=vertex_credentials,
+            project_id=vertex_project,
+        )
+
+        headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/json",
+            **headers,
+        }
+
+        return headers
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: Optional[dict] = None,
+        **kwargs,
+    ) -> str:
+        """
+        Get complete URL for Vertex AI OCR endpoint.
+
+        Vertex AI endpoint format:
+        https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/mistralai/ocr
+
+        Args:
+            api_base: Vertex AI API base URL (optional)
+            model: Model name (not used in URL construction)
+            optional_params: Optional parameters
+            litellm_params: LiteLLM parameters containing vertex_project, vertex_location
+
+        Returns: Complete URL for Vertex AI OCR endpoint
+        """
+        # Extract Vertex AI parameters using safe helpers from VertexBase
+        # Use safe_get_* methods that don't mutate litellm_params dict
+        litellm_params = litellm_params or {}
+
+        vertex_project = VertexBase.safe_get_vertex_ai_project(
+            litellm_params=litellm_params
+        )
+        vertex_location = VertexBase.safe_get_vertex_ai_location(
+            litellm_params=litellm_params
+        )
+
+        if vertex_project is None:
+            raise ValueError(
+                "Missing vertex_project - Set VERTEXAI_PROJECT environment variable or pass vertex_project parameter"
+            )
+
+        if vertex_location is None:
+            vertex_location = "us-central1"
+
+        # Get API base URL
+        if api_base is None:
+            api_base = get_vertex_base_url(vertex_location)
+
+        # Ensure no trailing slash
+        api_base = api_base.rstrip("/")
+
+        # Vertex AI OCR endpoint format for Mistral publisher
+        # Format: https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/mistralai/models/{model}:rawPredict
+        return f"{api_base}/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/mistralai/models/{model}:rawPredict"
+
+    def _convert_url_to_data_uri_sync(self, url: str) -> str:
+        """
+        Synchronously convert a URL to a base64 data URI.
+
+        Vertex AI OCR doesn't have internet access, so we need to fetch URLs
+        and convert them to base64 data URIs.
+
+        Args:
+            url: The URL to convert
+
+        Returns:
+            Base64 data URI string
+        """
+        verbose_logger.debug(
+            f"Vertex AI OCR: Converting URL to base64 data URI (sync): {url}"
+        )
+
+        # Fetch and convert to base64 data URI
+        # convert_url_to_base64 already returns a full data URI like "data:image/jpeg;base64,..."
+        data_uri = convert_url_to_base64(url=url)
+
+        verbose_logger.debug(
+            f"Vertex AI OCR: Converted URL to data URI (length: {len(data_uri)})"
+        )
+
+        return data_uri
+
+    async def _convert_url_to_data_uri_async(self, url: str) -> str:
+        """
+        Asynchronously convert a URL to a base64 data URI.
+
+        Vertex AI OCR doesn't have internet access, so we need to fetch URLs
+        and convert them to base64 data URIs.
+
+        Args:
+            url: The URL to convert
+
+        Returns:
+            Base64 data URI string
+        """
+        verbose_logger.debug(
+            f"Vertex AI OCR: Converting URL to base64 data URI (async): {url}"
+        )
+
+        # Fetch and convert to base64 data URI asynchronously
+        # async_convert_url_to_base64 already returns a full data URI like "data:image/jpeg;base64,..."
+        data_uri = await async_convert_url_to_base64(url=url)
+
+        verbose_logger.debug(
+            f"Vertex AI OCR: Converted URL to data URI (length: {len(data_uri)})"
+        )
+
+        return data_uri
+
+    def transform_ocr_request(
+        self,
+        model: str,
+        document: DocumentType,
+        optional_params: dict,
+        headers: dict,
+        **kwargs,
+    ) -> OCRRequestData:
+        """
+        Transform OCR request for Vertex AI, converting URLs to base64 data URIs (sync).
+
+        Vertex AI OCR doesn't have internet access, so we automatically fetch
+        any URLs and convert them to base64 data URIs synchronously.
+
+        Args:
+            model: Model name
+            document: Document dict from user
+            optional_params: Already mapped optional parameters
+            headers: Request headers
+            **kwargs: Additional arguments
+
+        Returns:
+            OCRRequestData with JSON data
+        """
+        verbose_logger.debug("Vertex AI OCR transform_ocr_request (sync) called")
+
+        if not isinstance(document, dict):
+            raise ValueError(f"Expected document dict, got {type(document)}")
+
+        # Check if we need to convert URL to base64
+        doc_type = document.get("type")
+        transformed_document = document.copy()
+
+        if doc_type == "document_url":
+            document_url = document.get("document_url", "")
+            # If it's not already a data URI, convert it
+            if document_url and not document_url.startswith("data:"):
+                verbose_logger.debug(
+                    "Vertex AI OCR: Converting document URL to base64 data URI (sync)"
+                )
+                data_uri = self._convert_url_to_data_uri_sync(url=document_url)
+                transformed_document["document_url"] = data_uri
+        elif doc_type == "image_url":
+            image_url = document.get("image_url", "")
+            # If it's not already a data URI, convert it
+            if image_url and not image_url.startswith("data:"):
+                verbose_logger.debug(
+                    "Vertex AI OCR: Converting image URL to base64 data URI (sync)"
+                )
+                data_uri = self._convert_url_to_data_uri_sync(url=image_url)
+                transformed_document["image_url"] = data_uri
+
+        # Call parent's transform to build the request
+        return super().transform_ocr_request(
+            model=model,
+            document=transformed_document,
+            optional_params=optional_params,
+            headers=headers,
+            **kwargs,
+        )
+
+    async def async_transform_ocr_request(
+        self,
+        model: str,
+        document: DocumentType,
+        optional_params: dict,
+        headers: dict,
+        **kwargs,
+    ) -> OCRRequestData:
+        """
+        Transform OCR request for Vertex AI, converting URLs to base64 data URIs (async).
+
+        Vertex AI OCR doesn't have internet access, so we automatically fetch
+        any URLs and convert them to base64 data URIs asynchronously.
+
+        Args:
+            model: Model name
+            document: Document dict from user
+            optional_params: Already mapped optional parameters
+            headers: Request headers
+            **kwargs: Additional arguments
+
+        Returns:
+            OCRRequestData with JSON data
+        """
+        verbose_logger.debug(
+            f"Vertex AI OCR async_transform_ocr_request - model: {model}"
+        )
+
+        if not isinstance(document, dict):
+            raise ValueError(f"Expected document dict, got {type(document)}")
+
+        # Check if we need to convert URL to base64
+        doc_type = document.get("type")
+        transformed_document = document.copy()
+
+        if doc_type == "document_url":
+            document_url = document.get("document_url", "")
+            # If it's not already a data URI, convert it
+            if document_url and not document_url.startswith("data:"):
+                verbose_logger.debug(
+                    "Vertex AI OCR: Converting document URL to base64 data URI (async)"
+                )
+                data_uri = await self._convert_url_to_data_uri_async(url=document_url)
+                transformed_document["document_url"] = data_uri
+        elif doc_type == "image_url":
+            image_url = document.get("image_url", "")
+            # If it's not already a data URI, convert it
+            if image_url and not image_url.startswith("data:"):
+                verbose_logger.debug(
+                    "Vertex AI OCR: Converting image URL to base64 data URI (async)"
+                )
+                data_uri = await self._convert_url_to_data_uri_async(url=image_url)
+                transformed_document["image_url"] = data_uri
+
+        # Call parent's transform to build the request
+        return super().transform_ocr_request(
+            model=model,
+            document=transformed_document,
+            optional_params=optional_params,
+            headers=headers,
+            **kwargs,
+        )