chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/ocr/main.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/ocr/main.py
@@ -0,0 +1,444 @@
+"""
+Main OCR function for LiteLLM.
+"""
+import asyncio
+import base64
+import contextvars
+import mimetypes
+import os
+import re
+from functools import partial
+from io import IOBase
+from pathlib import Path
+from typing import Any, Coroutine, Dict, Optional, Union
+
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.constants import request_timeout
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.ocr.transformation import BaseOCRConfig, OCRResponse
+from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.types.router import GenericLiteLLMParams
+from litellm.utils import ProviderConfigManager, client
+
+####### ENVIRONMENT VARIABLES ###################
+base_llm_http_handler = BaseLLMHTTPHandler()
+#################################################
+
+
+@client
+async def aocr(
+    model: str,
+    document: Dict[str, Any],
+    api_key: Optional[str] = None,
+    api_base: Optional[str] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    custom_llm_provider: Optional[str] = None,
+    extra_headers: Optional[Dict[str, Any]] = None,
+    **kwargs,
+) -> OCRResponse:
+    """
+    Async OCR function.
+
+    Args:
+        model: Model name (e.g., "mistral/mistral-ocr-latest")
+        document: Document to process in Mistral format:
+            {"type": "document_url", "document_url": "https://..."} for PDFs/docs,
+            {"type": "image_url", "image_url": "https://..."} for images, or
+            {"type": "file", "file": <path/bytes/file-obj>} for local files
+        api_key: Optional API key
+        api_base: Optional API base URL
+        timeout: Optional timeout
+        custom_llm_provider: Optional custom LLM provider
+        extra_headers: Optional extra headers
+        **kwargs: Additional parameters (e.g., include_image_base64, pages, image_limit)
+
+    Returns:
+        OCRResponse in Mistral OCR format with pages, model, usage_info, etc.
+
+    Example:
+        ```python
+        import litellm
+
+        # OCR with PDF
+        response = await litellm.aocr(
+            model="mistral/mistral-ocr-latest",
+            document={
+                "type": "document_url",
+                "document_url": "https://arxiv.org/pdf/2201.04234"
+            },
+            include_image_base64=True
+        )
+
+        # OCR with image
+        response = await litellm.aocr(
+            model="mistral/mistral-ocr-latest",
+            document={
+                "type": "image_url",
+                "image_url": "https://example.com/image.png"
+            }
+        )
+
+        # OCR with base64 encoded PDF
+        response = await litellm.aocr(
+            model="mistral/mistral-ocr-latest",
+            document={
+                "type": "document_url",
+                "document_url": f"data:application/pdf;base64,{base64_pdf}"
+            }
+        )
+
+        # OCR with local file
+        response = await litellm.aocr(
+            model="mistral/mistral-ocr-latest",
+            document={"type": "file", "file": "/path/to/document.pdf"}
+        )
+        ```
+    """
+    local_vars = locals()
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["aocr"] = True
+
+        # Get custom llm provider
+        if custom_llm_provider is None:
+            _, custom_llm_provider, _, _ = litellm.get_llm_provider(
+                model=model, api_base=api_base
+            )
+
+        func = partial(
+            ocr,
+            model=model,
+            document=document,
+            api_key=api_key,
+            api_base=api_base,
+            timeout=timeout,
+            custom_llm_provider=custom_llm_provider,
+            extra_headers=extra_headers,
+            **kwargs,
+        )
+
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response
+
+        if response is None:
+            raise ValueError(
+                f"Got an unexpected None response from the OCR API: {response}"
+            )
+
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
+
+
+@client
+def ocr(
+    model: str,
+    document: Dict[str, Any],
+    api_key: Optional[str] = None,
+    api_base: Optional[str] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    custom_llm_provider: Optional[str] = None,
+    extra_headers: Optional[Dict[str, Any]] = None,
+    **kwargs,
+) -> Union[OCRResponse, Coroutine[Any, Any, OCRResponse]]:
+    """
+    Synchronous OCR function.
+
+    Args:
+        model: Model name (e.g., "mistral/mistral-ocr-latest")
+        document: Document to process in Mistral format:
+            {"type": "document_url", "document_url": "https://..."} for PDFs/docs,
+            {"type": "image_url", "image_url": "https://..."} for images, or
+            {"type": "file", "file": <path/bytes/file-obj>} for local files
+        api_key: Optional API key
+        api_base: Optional API base URL
+        timeout: Optional timeout
+        custom_llm_provider: Optional custom LLM provider
+        extra_headers: Optional extra headers
+        **kwargs: Additional parameters (e.g., include_image_base64, pages, image_limit)
+
+    Returns:
+        OCRResponse in Mistral OCR format with pages, model, usage_info, etc.
+
+    Example:
+        ```python
+        import litellm
+
+        # OCR with PDF
+        response = litellm.ocr(
+            model="mistral/mistral-ocr-latest",
+            document={
+                "type": "document_url",
+                "document_url": "https://arxiv.org/pdf/2201.04234"
+            },
+            include_image_base64=True
+        )
+
+        # OCR with image
+        response = litellm.ocr(
+            model="mistral/mistral-ocr-latest",
+            document={
+                "type": "image_url",
+                "image_url": "https://example.com/image.png"
+            }
+        )
+
+        # OCR with base64 encoded PDF
+        response = litellm.ocr(
+            model="mistral/mistral-ocr-latest",
+            document={
+                "type": "document_url",
+                "document_url": f"data:application/pdf;base64,{base64_pdf}"
+            }
+        )
+
+        # OCR with local file
+        response = litellm.ocr(
+            model="mistral/mistral-ocr-latest",
+            document={"type": "file", "file": "/path/to/document.pdf"}
+        )
+
+        # Access pages
+        for page in response.pages:
+            print(f"Page {page.index}: {page.markdown}")
+        ```
+    """
+    local_vars = locals()
+    try:
+        litellm_logging_obj: LiteLLMLoggingObj = kwargs.pop("litellm_logging_obj")  # type: ignore
+        litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
+        _is_async = kwargs.pop("aocr", False) is True
+
+        # Validate document parameter format
+        if not isinstance(document, dict):
+            raise ValueError(
+                f"document must be a dict with 'type' and URL/file field, got {type(document)}"
+            )
+
+        doc_type = document.get("type")
+
+        # Handle file type: convert to document_url/image_url with base64 data URI
+        if doc_type == "file":
+            document = convert_file_document_to_url_document(document)
+            doc_type = document.get("type")
+
+        if doc_type not in ["document_url", "image_url"]:
+            raise ValueError(
+                f"Invalid document type: {doc_type}. "
+                "Must be 'document_url', 'image_url', or 'file'"
+            )
+
+        (
+            model,
+            custom_llm_provider,
+            dynamic_api_key,
+            dynamic_api_base,
+        ) = litellm.get_llm_provider(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=api_base,
+            api_key=api_key,
+        )
+
+        # Update with dynamic values if available
+        if dynamic_api_key:
+            api_key = dynamic_api_key
+        if dynamic_api_base:
+            api_base = dynamic_api_base
+
+        # Get provider config
+        ocr_provider_config: Optional[
+            BaseOCRConfig
+        ] = ProviderConfigManager.get_provider_ocr_config(
+            model=model,
+            provider=litellm.LlmProviders(custom_llm_provider),
+        )
+
+        if ocr_provider_config is None:
+            raise ValueError(
+                f"OCR is not supported for provider: {custom_llm_provider}"
+            )
+
+        verbose_logger.debug(
+            f"OCR call - model: {model}, provider: {custom_llm_provider}"
+        )
+
+        # Get litellm params using GenericLiteLLMParams (same as responses API)
+        litellm_params = GenericLiteLLMParams(**kwargs)
+
+        # Extract OCR-specific parameters from kwargs
+        supported_params = ocr_provider_config.get_supported_ocr_params(model=model)
+        non_default_params = {}
+        for param in supported_params:
+            if param in kwargs:
+                non_default_params[param] = kwargs.pop(param)
+
+        # Map parameters to provider-specific format
+        optional_params = ocr_provider_config.map_ocr_params(
+            non_default_params=non_default_params,
+            optional_params={},
+            model=model,
+        )
+
+        verbose_logger.debug(f"OCR optional_params after mapping: {optional_params}")
+
+        # Pre Call logging
+        litellm_logging_obj.update_environment_variables(
+            model=model,
+            optional_params=optional_params,
+            litellm_params={
+                "litellm_call_id": litellm_call_id,
+                "api_base": api_base,
+            },
+            custom_llm_provider=custom_llm_provider,
+        )
+
+        # Call the handler - pass document dict directly
+        response = base_llm_http_handler.ocr(
+            model=model,
+            document=document,  # Pass the entire document dict
+            optional_params=optional_params,
+            timeout=timeout or request_timeout,
+            logging_obj=litellm_logging_obj,
+            api_key=api_key,
+            api_base=api_base,
+            custom_llm_provider=custom_llm_provider,
+            aocr=_is_async,
+            headers=extra_headers,
+            provider_config=ocr_provider_config,
+            litellm_params=dict(litellm_params),
+        )
+
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
+
+
+#################################################
+# Public utilities — used by the SDK and the proxy
+#################################################
+
+_MIME_PATTERN = re.compile(r"^[\w.+-]+/[\w.+-]+$")
+
+_MIME_TYPE_MAP = {
+    ".pdf": "application/pdf",
+    ".png": "image/png",
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".gif": "image/gif",
+    ".webp": "image/webp",
+    ".tiff": "image/tiff",
+    ".tif": "image/tiff",
+    ".bmp": "image/bmp",
+}
+
+
+def get_mime_type(file_path: str) -> str:
+    """
+    Determine MIME type from file path extension.
+
+    Falls back to mimetypes.guess_type, then to 'application/octet-stream'.
+    """
+    ext = os.path.splitext(file_path)[1].lower()
+    mime = _MIME_TYPE_MAP.get(ext)
+    if mime:
+        return mime
+    guessed, _ = mimetypes.guess_type(file_path)
+    return guessed or "application/octet-stream"
+
+
+def convert_file_document_to_url_document(document: Dict[str, Any]) -> Dict[str, str]:
+    """
+    Convert a file-type document dict to a document_url-type document dict
+    with an inline base64 data URI.
+
+    Accepts document dicts like:
+        {"type": "file", "file": "/path/to/document.pdf"}        # file path string
+        {"type": "file", "file": Path("/path/to/doc.pdf")}       # pathlib.Path
+        {"type": "file", "file": <binary file-like object>}      # file-like object (BinaryIO)
+        {"type": "file", "file": b"raw bytes"}                   # raw bytes
+
+    Returns:
+        {"type": "document_url", "document_url": "data:<mime>;base64,<data>"}
+        or {"type": "image_url", "image_url": "data:<mime>;base64,<data>"}
+    """
+    file_input = document.get("file")
+    if file_input is None:
+        raise ValueError(
+            "document with type='file' must include a 'file' field containing "
+            "a file path (str), pathlib.Path, file-like object, or bytes"
+        )
+
+    file_bytes: bytes
+    mime_type: str = "application/octet-stream"
+    file_name: Optional[str] = None
+
+    if isinstance(file_input, (str, Path)):
+        file_path = str(file_input)
+        if not os.path.isfile(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+        mime_type = get_mime_type(file_path)
+        file_name = os.path.basename(file_path)
+        with open(file_path, "rb") as f:
+            file_bytes = f.read()
+    elif isinstance(file_input, bytes):
+        file_bytes = file_input
+    elif isinstance(file_input, IOBase) or hasattr(file_input, "read"):
+        if hasattr(file_input, "name"):
+            file_name = getattr(file_input, "name", None)
+            if file_name:
+                mime_type = get_mime_type(file_name)
+        file_bytes = file_input.read()
+        if isinstance(file_bytes, str):
+            file_bytes = file_bytes.encode("utf-8")
+    else:
+        raise ValueError(
+            f"Unsupported file input type: {type(file_input)}. "
+            "Expected str (file path), pathlib.Path, bytes, or a file-like object."
+        )
+
+    if not file_bytes:
+        raise ValueError("File is empty or could not be read")
+
+    if "mime_type" in document:
+        mime_type = document["mime_type"]
+
+    if not _MIME_PATTERN.match(mime_type):
+        raise ValueError(f"Invalid MIME type: {mime_type}")
+
+    base64_data = base64.b64encode(file_bytes).decode("utf-8")
+    data_uri = f"data:{mime_type};base64,{base64_data}"
+
+    if mime_type.startswith("image/"):
+        verbose_logger.debug(
+            f"OCR file input: Converted file to image_url data URI "
+            f"(mime={mime_type}, size={len(file_bytes)} bytes, name={file_name})"
+        )
+        return {"type": "image_url", "image_url": data_uri}
+    else:
+        verbose_logger.debug(
+            f"OCR file input: Converted file to document_url data URI "
+            f"(mime={mime_type}, size={len(file_bytes)} bytes, name={file_name})"
+        )
+        return {"type": "document_url", "document_url": data_uri}