chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
"""Vertex AI OCR module."""
|
||||
from .transformation import VertexAIOCRConfig
|
||||
|
||||
__all__ = ["VertexAIOCRConfig"]
|
||||
@@ -0,0 +1,41 @@
|
||||
"""
|
||||
Common utilities for Vertex AI OCR providers.
|
||||
|
||||
This module provides routing logic to determine which OCR configuration to use
|
||||
based on the model name.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.llms.base_llm.ocr.transformation import BaseOCRConfig
|
||||
|
||||
|
||||
def get_vertex_ai_ocr_config(model: str) -> Optional["BaseOCRConfig"]:
|
||||
"""
|
||||
Determine which Vertex AI OCR configuration to use based on the model name.
|
||||
|
||||
Vertex AI supports multiple OCR services:
|
||||
- Vertex AI OCR: vertex_ai/<model>
|
||||
|
||||
Args:
|
||||
model: The model name (e.g., "vertex_ai/ocr/<model>")
|
||||
|
||||
Returns:
|
||||
OCR configuration instance for the specified model
|
||||
|
||||
Examples:
|
||||
>>> get_vertex_ai_ocr_config("vertex_ai/deepseek-ai/deepseek-ocr-maas")
|
||||
<VertexAIDeepSeekOCRConfig object>
|
||||
|
||||
>>> get_vertex_ai_ocr_config("vertex_ai/ocr/mistral-ocr-maas")
|
||||
<VertexAIOCRConfig object>
|
||||
"""
|
||||
from litellm.llms.vertex_ai.ocr.deepseek_transformation import (
|
||||
VertexAIDeepSeekOCRConfig,
|
||||
)
|
||||
from litellm.llms.vertex_ai.ocr.transformation import VertexAIOCRConfig
|
||||
|
||||
if "deepseek" in model:
|
||||
return VertexAIDeepSeekOCRConfig()
|
||||
return VertexAIOCRConfig()
|
||||
@@ -0,0 +1,394 @@
|
||||
"""
|
||||
Vertex AI DeepSeek OCR transformation implementation.
|
||||
"""
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.base_llm.ocr.transformation import (
|
||||
BaseOCRConfig,
|
||||
DocumentType,
|
||||
OCRPage,
|
||||
OCRRequestData,
|
||||
OCRResponse,
|
||||
OCRUsageInfo,
|
||||
)
|
||||
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
|
||||
class VertexAIDeepSeekOCRConfig(BaseOCRConfig):
|
||||
"""
|
||||
Vertex AI DeepSeek OCR transformation configuration.
|
||||
|
||||
Vertex AI DeepSeek OCR uses the chat completion API format through the openapi endpoint.
|
||||
This transformation converts OCR requests to chat completion format and vice versa.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.vertex_base = VertexBase()
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: Dict,
|
||||
model: str,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
litellm_params: Optional[dict] = None,
|
||||
**kwargs,
|
||||
) -> Dict:
|
||||
"""
|
||||
Validate environment and return headers for Vertex AI OCR.
|
||||
|
||||
Vertex AI uses Bearer token authentication with access token from credentials.
|
||||
"""
|
||||
# Extract Vertex AI parameters using safe helpers from VertexBase
|
||||
# Use safe_get_* methods that don't mutate litellm_params dict
|
||||
litellm_params = litellm_params or {}
|
||||
|
||||
vertex_project = VertexBase.safe_get_vertex_ai_project(
|
||||
litellm_params=litellm_params
|
||||
)
|
||||
vertex_credentials = VertexBase.safe_get_vertex_ai_credentials(
|
||||
litellm_params=litellm_params
|
||||
)
|
||||
|
||||
# Get access token from Vertex credentials
|
||||
access_token, project_id = self.vertex_base.get_access_token(
|
||||
credentials=vertex_credentials,
|
||||
project_id=vertex_project,
|
||||
)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json",
|
||||
**headers,
|
||||
}
|
||||
|
||||
return headers
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: Optional[dict] = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
Get complete URL for Vertex AI DeepSeek OCR endpoint.
|
||||
|
||||
Vertex AI endpoint format:
|
||||
https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/openapi/chat/completions
|
||||
|
||||
Args:
|
||||
api_base: Vertex AI API base URL (optional)
|
||||
model: Model name (e.g., "deepseek-ai/deepseek-ocr-maas")
|
||||
optional_params: Optional parameters
|
||||
litellm_params: LiteLLM parameters containing vertex_project, vertex_location
|
||||
|
||||
Returns: Complete URL for Vertex AI OCR endpoint
|
||||
"""
|
||||
# Extract Vertex AI parameters using safe helpers from VertexBase
|
||||
# Use safe_get_* methods that don't mutate litellm_params dict
|
||||
litellm_params = litellm_params or {}
|
||||
|
||||
vertex_project = VertexBase.safe_get_vertex_ai_project(
|
||||
litellm_params=litellm_params
|
||||
)
|
||||
vertex_location = VertexBase.safe_get_vertex_ai_location(
|
||||
litellm_params=litellm_params
|
||||
)
|
||||
|
||||
if vertex_project is None:
|
||||
raise ValueError(
|
||||
"Missing vertex_project - Set VERTEXAI_PROJECT environment variable or pass vertex_project parameter"
|
||||
)
|
||||
|
||||
if vertex_location is None:
|
||||
vertex_location = "us-central1"
|
||||
|
||||
# Get API base URL
|
||||
if api_base is None:
|
||||
api_base = "https://aiplatform.googleapis.com"
|
||||
|
||||
# Ensure no trailing slash
|
||||
api_base = api_base.rstrip("/")
|
||||
|
||||
# Vertex AI DeepSeek OCR endpoint format
|
||||
# Format: https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/endpoints/openapi/chat/completions
|
||||
return f"{api_base}/v1/projects/{vertex_project}/locations/{vertex_location}/endpoints/openapi/chat/completions"
|
||||
|
||||
def transform_ocr_request(
|
||||
self,
|
||||
model: str,
|
||||
document: DocumentType,
|
||||
optional_params: dict,
|
||||
headers: dict,
|
||||
**kwargs,
|
||||
) -> OCRRequestData:
|
||||
"""
|
||||
Transform OCR request to chat completion format for Vertex AI DeepSeek OCR.
|
||||
|
||||
Converts OCR document format to chat completion messages format:
|
||||
- Input: {"type": "image_url", "image_url": "gs://..."}
|
||||
- Output: {"model": "deepseek-ai/deepseek-ocr-maas", "messages": [{"role": "user", "content": [{"type": "image_url", "image_url": "gs://..."}]}]}
|
||||
|
||||
Args:
|
||||
model: Model name (e.g., "deepseek-ai/deepseek-ocr-maas")
|
||||
document: Document dict from user (Mistral OCR format)
|
||||
optional_params: Already mapped optional parameters
|
||||
headers: Request headers
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
OCRRequestData with JSON data in chat completion format
|
||||
"""
|
||||
verbose_logger.debug(
|
||||
"Vertex AI DeepSeek OCR transform_ocr_request (sync) called"
|
||||
)
|
||||
|
||||
if not isinstance(document, dict):
|
||||
raise ValueError(f"Expected document dict, got {type(document)}")
|
||||
|
||||
# Extract document type and URL
|
||||
doc_type = document.get("type")
|
||||
image_url = None
|
||||
document_url = None
|
||||
|
||||
if doc_type == "image_url":
|
||||
image_url = document.get("image_url", "")
|
||||
elif doc_type == "document_url":
|
||||
document_url = document.get("document_url", "")
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported document type: {doc_type}. Expected 'image_url' or 'document_url'"
|
||||
)
|
||||
|
||||
# Build chat completion message content
|
||||
content_item = {}
|
||||
if image_url:
|
||||
content_item = {"type": "image_url", "image_url": image_url}
|
||||
elif document_url:
|
||||
# For document URLs, we use image_url type as well (Vertex AI supports both)
|
||||
content_item = {"type": "image_url", "image_url": document_url}
|
||||
|
||||
# Build chat completion request
|
||||
data = {
|
||||
"model": "deepseek-ai/" + model,
|
||||
"messages": [{"role": "user", "content": [content_item]}],
|
||||
}
|
||||
|
||||
# Add optional parameters (stream, temperature, etc.)
|
||||
# Filter out OCR-specific params that don't apply to chat completion
|
||||
chat_completion_params = {}
|
||||
for key, value in optional_params.items():
|
||||
# Include common chat completion params
|
||||
if key in ["stream", "temperature", "max_tokens", "top_p", "n", "stop"]:
|
||||
chat_completion_params[key] = value
|
||||
|
||||
data.update(chat_completion_params)
|
||||
|
||||
verbose_logger.debug(
|
||||
"Vertex AI DeepSeek OCR: Transformed request to chat completion format"
|
||||
)
|
||||
|
||||
return OCRRequestData(data=data, files=None)
|
||||
|
||||
async def async_transform_ocr_request(
|
||||
self,
|
||||
model: str,
|
||||
document: DocumentType,
|
||||
optional_params: dict,
|
||||
headers: dict,
|
||||
**kwargs,
|
||||
) -> OCRRequestData:
|
||||
"""
|
||||
Transform OCR request to chat completion format for Vertex AI DeepSeek OCR (async).
|
||||
|
||||
Same as sync version - no async-specific logic needed.
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
document: Document dict from user
|
||||
optional_params: Already mapped optional parameters
|
||||
headers: Request headers
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
OCRRequestData with JSON data in chat completion format
|
||||
"""
|
||||
return self.transform_ocr_request(
|
||||
model=model,
|
||||
document=document,
|
||||
optional_params=optional_params,
|
||||
headers=headers,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def transform_ocr_response(
|
||||
self,
|
||||
model: str,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
**kwargs,
|
||||
) -> OCRResponse:
|
||||
"""
|
||||
Transform chat completion response to OCR format.
|
||||
|
||||
Vertex AI DeepSeek OCR returns chat completion format:
|
||||
{
|
||||
"id": "...",
|
||||
"object": "chat.completion",
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "<OCR result as JSON string or markdown>"
|
||||
}
|
||||
}],
|
||||
"usage": {...}
|
||||
}
|
||||
|
||||
We need to extract the content and convert it to OCRResponse format.
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
raw_response: Raw HTTP response from Vertex AI
|
||||
logging_obj: Logging object
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
OCRResponse in standard format
|
||||
"""
|
||||
verbose_logger.debug("Vertex AI DeepSeek OCR transform_ocr_response called")
|
||||
verbose_logger.debug(f"Raw response: {raw_response.text}")
|
||||
|
||||
try:
|
||||
response_json = raw_response.json()
|
||||
|
||||
# Extract content from chat completion response
|
||||
choices = response_json.get("choices", [])
|
||||
if not choices:
|
||||
raise ValueError("No choices in chat completion response")
|
||||
|
||||
message = choices[0].get("message", {})
|
||||
content = message.get("content", "")
|
||||
|
||||
if not content:
|
||||
raise ValueError("No content in chat completion response")
|
||||
|
||||
# Try to parse content as JSON (OCR result might be JSON string)
|
||||
ocr_data = None
|
||||
try:
|
||||
# If content is a JSON string, parse it
|
||||
if isinstance(content, str) and content.strip().startswith("{"):
|
||||
ocr_data = json.loads(content)
|
||||
elif isinstance(content, dict):
|
||||
ocr_data = content
|
||||
else:
|
||||
# If content is markdown text, create a single page with the markdown
|
||||
ocr_data = {
|
||||
"pages": [{"index": 0, "markdown": content}],
|
||||
"model": model,
|
||||
"usage_info": response_json.get("usage", {}),
|
||||
}
|
||||
except json.JSONDecodeError:
|
||||
# If JSON parsing fails, treat content as markdown
|
||||
ocr_data = {
|
||||
"pages": [{"index": 0, "markdown": content}],
|
||||
"model": model,
|
||||
"usage_info": response_json.get("usage", {}),
|
||||
}
|
||||
|
||||
# Ensure we have the expected structure
|
||||
if "pages" not in ocr_data:
|
||||
# If OCR data doesn't have pages, wrap the content in a page
|
||||
ocr_data = {
|
||||
"pages": [
|
||||
{
|
||||
"index": 0,
|
||||
"markdown": content
|
||||
if isinstance(content, str)
|
||||
else json.dumps(content),
|
||||
}
|
||||
],
|
||||
"model": ocr_data.get("model", model),
|
||||
"usage_info": ocr_data.get(
|
||||
"usage_info", response_json.get("usage", {})
|
||||
),
|
||||
}
|
||||
|
||||
# Convert usage info if present
|
||||
usage_info = None
|
||||
if "usage_info" in ocr_data:
|
||||
usage_dict = ocr_data["usage_info"]
|
||||
if isinstance(usage_dict, dict):
|
||||
usage_info = OCRUsageInfo(**usage_dict)
|
||||
|
||||
# Build OCRResponse
|
||||
pages = []
|
||||
for page_data in ocr_data.get("pages", []):
|
||||
# Ensure page has required fields
|
||||
if isinstance(page_data, dict):
|
||||
page = OCRPage(
|
||||
index=page_data.get("index", 0),
|
||||
markdown=page_data.get("markdown", ""),
|
||||
images=page_data.get("images"),
|
||||
dimensions=page_data.get("dimensions"),
|
||||
)
|
||||
pages.append(page)
|
||||
|
||||
if not pages:
|
||||
# Create a default page if none exist
|
||||
pages = [
|
||||
OCRPage(
|
||||
index=0, markdown=content if isinstance(content, str) else ""
|
||||
)
|
||||
]
|
||||
|
||||
return OCRResponse(
|
||||
pages=pages,
|
||||
model=ocr_data.get("model", model),
|
||||
document_annotation=ocr_data.get("document_annotation"),
|
||||
usage_info=usage_info,
|
||||
object="ocr",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"Error parsing Vertex AI DeepSeek OCR response: {e}")
|
||||
raise e
|
||||
|
||||
async def async_transform_ocr_response(
|
||||
self,
|
||||
model: str,
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
**kwargs,
|
||||
) -> OCRResponse:
|
||||
"""
|
||||
Async transform chat completion response to OCR format.
|
||||
|
||||
Same as sync version - no async-specific logic needed.
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
raw_response: Raw HTTP response
|
||||
logging_obj: Logging object
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
OCRResponse in standard format
|
||||
"""
|
||||
return self.transform_ocr_response(
|
||||
model=model,
|
||||
raw_response=raw_response,
|
||||
logging_obj=logging_obj,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -0,0 +1,301 @@
|
||||
"""
|
||||
Vertex AI Mistral OCR transformation implementation.
|
||||
"""
|
||||
from typing import Dict, Optional
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.prompt_templates.image_handling import (
|
||||
async_convert_url_to_base64,
|
||||
convert_url_to_base64,
|
||||
)
|
||||
from litellm.llms.base_llm.ocr.transformation import DocumentType, OCRRequestData
|
||||
from litellm.llms.mistral.ocr.transformation import MistralOCRConfig
|
||||
from litellm.llms.vertex_ai.common_utils import get_vertex_base_url
|
||||
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
|
||||
|
||||
|
||||
class VertexAIOCRConfig(MistralOCRConfig):
|
||||
"""
|
||||
Vertex AI Mistral OCR transformation configuration.
|
||||
|
||||
Vertex AI uses Mistral's OCR API format through the Mistral publisher endpoint.
|
||||
Inherits transformation logic from MistralOCRConfig since they use the same format.
|
||||
|
||||
Reference: Vertex AI Mistral OCR documentation
|
||||
|
||||
Important: Vertex AI OCR only supports base64 data URIs (data:image/..., data:application/pdf;base64,...).
|
||||
Regular URLs are not supported.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.vertex_base = VertexBase()
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: Dict,
|
||||
model: str,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
litellm_params: Optional[dict] = None,
|
||||
**kwargs,
|
||||
) -> Dict:
|
||||
"""
|
||||
Validate environment and return headers for Vertex AI OCR.
|
||||
|
||||
Vertex AI uses Bearer token authentication with access token from credentials.
|
||||
"""
|
||||
# Extract Vertex AI parameters using safe helpers from VertexBase
|
||||
# Use safe_get_* methods that don't mutate litellm_params dict
|
||||
litellm_params = litellm_params or {}
|
||||
|
||||
vertex_project = VertexBase.safe_get_vertex_ai_project(
|
||||
litellm_params=litellm_params
|
||||
)
|
||||
vertex_credentials = VertexBase.safe_get_vertex_ai_credentials(
|
||||
litellm_params=litellm_params
|
||||
)
|
||||
|
||||
# Get access token from Vertex credentials
|
||||
access_token, project_id = self.vertex_base.get_access_token(
|
||||
credentials=vertex_credentials,
|
||||
project_id=vertex_project,
|
||||
)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json",
|
||||
**headers,
|
||||
}
|
||||
|
||||
return headers
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: Optional[dict] = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
Get complete URL for Vertex AI OCR endpoint.
|
||||
|
||||
Vertex AI endpoint format:
|
||||
https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/mistralai/ocr
|
||||
|
||||
Args:
|
||||
api_base: Vertex AI API base URL (optional)
|
||||
model: Model name (not used in URL construction)
|
||||
optional_params: Optional parameters
|
||||
litellm_params: LiteLLM parameters containing vertex_project, vertex_location
|
||||
|
||||
Returns: Complete URL for Vertex AI OCR endpoint
|
||||
"""
|
||||
# Extract Vertex AI parameters using safe helpers from VertexBase
|
||||
# Use safe_get_* methods that don't mutate litellm_params dict
|
||||
litellm_params = litellm_params or {}
|
||||
|
||||
vertex_project = VertexBase.safe_get_vertex_ai_project(
|
||||
litellm_params=litellm_params
|
||||
)
|
||||
vertex_location = VertexBase.safe_get_vertex_ai_location(
|
||||
litellm_params=litellm_params
|
||||
)
|
||||
|
||||
if vertex_project is None:
|
||||
raise ValueError(
|
||||
"Missing vertex_project - Set VERTEXAI_PROJECT environment variable or pass vertex_project parameter"
|
||||
)
|
||||
|
||||
if vertex_location is None:
|
||||
vertex_location = "us-central1"
|
||||
|
||||
# Get API base URL
|
||||
if api_base is None:
|
||||
api_base = get_vertex_base_url(vertex_location)
|
||||
|
||||
# Ensure no trailing slash
|
||||
api_base = api_base.rstrip("/")
|
||||
|
||||
# Vertex AI OCR endpoint format for Mistral publisher
|
||||
# Format: https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/mistralai/models/{model}:rawPredict
|
||||
return f"{api_base}/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/mistralai/models/{model}:rawPredict"
|
||||
|
||||
def _convert_url_to_data_uri_sync(self, url: str) -> str:
|
||||
"""
|
||||
Synchronously convert a URL to a base64 data URI.
|
||||
|
||||
Vertex AI OCR doesn't have internet access, so we need to fetch URLs
|
||||
and convert them to base64 data URIs.
|
||||
|
||||
Args:
|
||||
url: The URL to convert
|
||||
|
||||
Returns:
|
||||
Base64 data URI string
|
||||
"""
|
||||
verbose_logger.debug(
|
||||
f"Vertex AI OCR: Converting URL to base64 data URI (sync): {url}"
|
||||
)
|
||||
|
||||
# Fetch and convert to base64 data URI
|
||||
# convert_url_to_base64 already returns a full data URI like "data:image/jpeg;base64,..."
|
||||
data_uri = convert_url_to_base64(url=url)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"Vertex AI OCR: Converted URL to data URI (length: {len(data_uri)})"
|
||||
)
|
||||
|
||||
return data_uri
|
||||
|
||||
async def _convert_url_to_data_uri_async(self, url: str) -> str:
|
||||
"""
|
||||
Asynchronously convert a URL to a base64 data URI.
|
||||
|
||||
Vertex AI OCR doesn't have internet access, so we need to fetch URLs
|
||||
and convert them to base64 data URIs.
|
||||
|
||||
Args:
|
||||
url: The URL to convert
|
||||
|
||||
Returns:
|
||||
Base64 data URI string
|
||||
"""
|
||||
verbose_logger.debug(
|
||||
f"Vertex AI OCR: Converting URL to base64 data URI (async): {url}"
|
||||
)
|
||||
|
||||
# Fetch and convert to base64 data URI asynchronously
|
||||
# async_convert_url_to_base64 already returns a full data URI like "data:image/jpeg;base64,..."
|
||||
data_uri = await async_convert_url_to_base64(url=url)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"Vertex AI OCR: Converted URL to data URI (length: {len(data_uri)})"
|
||||
)
|
||||
|
||||
return data_uri
|
||||
|
||||
def transform_ocr_request(
|
||||
self,
|
||||
model: str,
|
||||
document: DocumentType,
|
||||
optional_params: dict,
|
||||
headers: dict,
|
||||
**kwargs,
|
||||
) -> OCRRequestData:
|
||||
"""
|
||||
Transform OCR request for Vertex AI, converting URLs to base64 data URIs (sync).
|
||||
|
||||
Vertex AI OCR doesn't have internet access, so we automatically fetch
|
||||
any URLs and convert them to base64 data URIs synchronously.
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
document: Document dict from user
|
||||
optional_params: Already mapped optional parameters
|
||||
headers: Request headers
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
OCRRequestData with JSON data
|
||||
"""
|
||||
verbose_logger.debug("Vertex AI OCR transform_ocr_request (sync) called")
|
||||
|
||||
if not isinstance(document, dict):
|
||||
raise ValueError(f"Expected document dict, got {type(document)}")
|
||||
|
||||
# Check if we need to convert URL to base64
|
||||
doc_type = document.get("type")
|
||||
transformed_document = document.copy()
|
||||
|
||||
if doc_type == "document_url":
|
||||
document_url = document.get("document_url", "")
|
||||
# If it's not already a data URI, convert it
|
||||
if document_url and not document_url.startswith("data:"):
|
||||
verbose_logger.debug(
|
||||
"Vertex AI OCR: Converting document URL to base64 data URI (sync)"
|
||||
)
|
||||
data_uri = self._convert_url_to_data_uri_sync(url=document_url)
|
||||
transformed_document["document_url"] = data_uri
|
||||
elif doc_type == "image_url":
|
||||
image_url = document.get("image_url", "")
|
||||
# If it's not already a data URI, convert it
|
||||
if image_url and not image_url.startswith("data:"):
|
||||
verbose_logger.debug(
|
||||
"Vertex AI OCR: Converting image URL to base64 data URI (sync)"
|
||||
)
|
||||
data_uri = self._convert_url_to_data_uri_sync(url=image_url)
|
||||
transformed_document["image_url"] = data_uri
|
||||
|
||||
# Call parent's transform to build the request
|
||||
return super().transform_ocr_request(
|
||||
model=model,
|
||||
document=transformed_document,
|
||||
optional_params=optional_params,
|
||||
headers=headers,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
async def async_transform_ocr_request(
|
||||
self,
|
||||
model: str,
|
||||
document: DocumentType,
|
||||
optional_params: dict,
|
||||
headers: dict,
|
||||
**kwargs,
|
||||
) -> OCRRequestData:
|
||||
"""
|
||||
Transform OCR request for Vertex AI, converting URLs to base64 data URIs (async).
|
||||
|
||||
Vertex AI OCR doesn't have internet access, so we automatically fetch
|
||||
any URLs and convert them to base64 data URIs asynchronously.
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
document: Document dict from user
|
||||
optional_params: Already mapped optional parameters
|
||||
headers: Request headers
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
OCRRequestData with JSON data
|
||||
"""
|
||||
verbose_logger.debug(
|
||||
f"Vertex AI OCR async_transform_ocr_request - model: {model}"
|
||||
)
|
||||
|
||||
if not isinstance(document, dict):
|
||||
raise ValueError(f"Expected document dict, got {type(document)}")
|
||||
|
||||
# Check if we need to convert URL to base64
|
||||
doc_type = document.get("type")
|
||||
transformed_document = document.copy()
|
||||
|
||||
if doc_type == "document_url":
|
||||
document_url = document.get("document_url", "")
|
||||
# If it's not already a data URI, convert it
|
||||
if document_url and not document_url.startswith("data:"):
|
||||
verbose_logger.debug(
|
||||
"Vertex AI OCR: Converting document URL to base64 data URI (async)"
|
||||
)
|
||||
data_uri = await self._convert_url_to_data_uri_async(url=document_url)
|
||||
transformed_document["document_url"] = data_uri
|
||||
elif doc_type == "image_url":
|
||||
image_url = document.get("image_url", "")
|
||||
# If it's not already a data URI, convert it
|
||||
if image_url and not image_url.startswith("data:"):
|
||||
verbose_logger.debug(
|
||||
"Vertex AI OCR: Converting image URL to base64 data URI (async)"
|
||||
)
|
||||
data_uri = await self._convert_url_to_data_uri_async(url=image_url)
|
||||
transformed_document["image_url"] = data_uri
|
||||
|
||||
# Call parent's transform to build the request
|
||||
return super().transform_ocr_request(
|
||||
model=model,
|
||||
document=transformed_document,
|
||||
optional_params=optional_params,
|
||||
headers=headers,
|
||||
**kwargs,
|
||||
)
|
||||
Reference in New Issue
Block a user