chore: initial snapshot for gitea/github upload

This commit is contained in:
Your Name
2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions

View File

@@ -0,0 +1,325 @@
"""
Azure Blob Storage backend implementation for file storage.
This module implements the Azure Blob Storage backend for storing files
in Azure Data Lake Storage Gen2. It inherits from AzureBlobStorageLogger
to reuse all authentication and Azure Storage operations.
"""
import time
from typing import Optional
from urllib.parse import quote
from litellm._logging import verbose_logger
from litellm._uuid import uuid
from .storage_backend import BaseFileStorageBackend
from litellm.integrations.azure_storage.azure_storage import AzureBlobStorageLogger
class AzureBlobStorageBackend(BaseFileStorageBackend, AzureBlobStorageLogger):
"""
Azure Blob Storage backend implementation.
Inherits from AzureBlobStorageLogger to reuse:
- Authentication (account key and Azure AD)
- Service client management
- Token management
- All Azure Storage helper methods
Reads configuration from the same environment variables as AzureBlobStorageLogger.
"""
def __init__(self, **kwargs):
"""
Initialize Azure Blob Storage backend.
Inherits all functionality from AzureBlobStorageLogger which handles:
- Reading environment variables
- Authentication (account key and Azure AD)
- Service client management
- Token management
Environment variables (same as AzureBlobStorageLogger):
- AZURE_STORAGE_ACCOUNT_NAME (required)
- AZURE_STORAGE_FILE_SYSTEM (required)
- AZURE_STORAGE_ACCOUNT_KEY (optional, if using account key auth)
- AZURE_STORAGE_TENANT_ID (optional, if using Azure AD)
- AZURE_STORAGE_CLIENT_ID (optional, if using Azure AD)
- AZURE_STORAGE_CLIENT_SECRET (optional, if using Azure AD)
Note: We skip periodic_flush since we're not using this as a logger.
"""
# Initialize AzureBlobStorageLogger (handles all auth and config)
AzureBlobStorageLogger.__init__(self, **kwargs)
# Disable logging functionality - we're only using this for file storage
# The periodic_flush task will be created but will do nothing since we override it
async def periodic_flush(self):
"""
Override to do nothing - we're not using this as a logger.
This prevents the periodic flush task from doing any work.
"""
# Do nothing - this class is used for file storage, not logging
return
async def async_log_success_event(self, *args, **kwargs):
"""
Override to do nothing - we're not using this as a logger.
"""
# Do nothing - this class is used for file storage, not logging
pass
async def async_log_failure_event(self, *args, **kwargs):
"""
Override to do nothing - we're not using this as a logger.
"""
# Do nothing - this class is used for file storage, not logging
pass
def _generate_file_name(
self, original_filename: str, file_naming_strategy: str
) -> str:
"""Generate file name based on naming strategy."""
if file_naming_strategy == "original_filename":
# Use original filename, but sanitize it
return quote(original_filename, safe="")
elif file_naming_strategy == "timestamp":
# Use timestamp
extension = (
original_filename.split(".")[-1] if "." in original_filename else ""
)
timestamp = int(time.time() * 1000) # milliseconds
return f"{timestamp}.{extension}" if extension else str(timestamp)
else: # default to "uuid"
# Use UUID
extension = (
original_filename.split(".")[-1] if "." in original_filename else ""
)
file_uuid = str(uuid.uuid4())
return f"{file_uuid}.{extension}" if extension else file_uuid
async def upload_file(
self,
file_content: bytes,
filename: str,
content_type: str,
path_prefix: Optional[str] = None,
file_naming_strategy: str = "uuid",
) -> str:
"""
Upload a file to Azure Blob Storage.
Returns the blob URL in format: https://{account}.blob.core.windows.net/{container}/{path}
"""
try:
# Generate file name
file_name = self._generate_file_name(filename, file_naming_strategy)
# Build full path
if path_prefix:
# Remove leading/trailing slashes and normalize
prefix = path_prefix.strip("/")
full_path = f"{prefix}/{file_name}"
else:
full_path = file_name
if self.azure_storage_account_key:
# Use Azure SDK with account key (reuse logger's method)
storage_url = await self._upload_file_with_account_key(
file_content=file_content,
full_path=full_path,
)
else:
# Use REST API with Azure AD token (reuse logger's methods)
storage_url = await self._upload_file_with_azure_ad(
file_content=file_content,
full_path=full_path,
)
verbose_logger.debug(
f"Successfully uploaded file to Azure Blob Storage: {storage_url}"
)
return storage_url
except Exception as e:
verbose_logger.exception(
f"Error uploading file to Azure Blob Storage: {str(e)}"
)
raise
async def _upload_file_with_account_key(
self, file_content: bytes, full_path: str
) -> str:
"""Upload file using Azure SDK with account key authentication."""
# Reuse the logger's service client method
service_client = await self.get_service_client()
file_system_client = service_client.get_file_system_client(
file_system=self.azure_storage_file_system
)
# Create filesystem (container) if it doesn't exist
if not await file_system_client.exists():
await file_system_client.create_file_system()
verbose_logger.debug(
f"Created filesystem: {self.azure_storage_file_system}"
)
# Extract directory and filename (similar to logger's pattern)
path_parts = full_path.split("/")
if len(path_parts) > 1:
directory_path = "/".join(path_parts[:-1])
file_name = path_parts[-1]
# Create directory if needed (like logger does)
directory_client = file_system_client.get_directory_client(directory_path)
if not await directory_client.exists():
await directory_client.create_directory()
verbose_logger.debug(f"Created directory: {directory_path}")
# Get file client from directory (same pattern as logger)
file_client = directory_client.get_file_client(file_name)
else:
# No directory, create file directly in root
file_client = file_system_client.get_file_client(full_path)
# Create, append, and flush (same pattern as logger's upload_to_azure_data_lake_with_azure_account_key)
await file_client.create_file()
await file_client.append_data(
data=file_content, offset=0, length=len(file_content)
)
await file_client.flush_data(position=len(file_content), offset=0)
# Return blob URL (not DFS URL)
blob_url = f"https://{self.azure_storage_account_name}.blob.core.windows.net/{self.azure_storage_file_system}/{full_path}"
return blob_url
async def _upload_file_with_azure_ad(
self, file_content: bytes, full_path: str
) -> str:
"""Upload file using REST API with Azure AD authentication."""
# Reuse the logger's token management
await self.set_valid_azure_ad_token()
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
async_client = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback
)
# Use DFS endpoint for upload
base_url = f"https://{self.azure_storage_account_name}.dfs.core.windows.net/{self.azure_storage_file_system}/{full_path}"
# Execute 3-step upload process: create, append, flush
# Reuse the logger's helper methods
await self._create_file(async_client, base_url)
# Append data - logger's _append_data expects string, so we create our own for bytes
await self._append_data_bytes(async_client, base_url, file_content)
await self._flush_data(async_client, base_url, len(file_content))
# Return blob URL (not DFS URL)
blob_url = f"https://{self.azure_storage_account_name}.blob.core.windows.net/{self.azure_storage_file_system}/{full_path}"
return blob_url
async def _append_data_bytes(self, client, base_url: str, file_content: bytes):
"""Append binary data to file using REST API."""
from litellm.constants import AZURE_STORAGE_MSFT_VERSION
headers = {
"x-ms-version": AZURE_STORAGE_MSFT_VERSION,
"Content-Type": "application/octet-stream",
"Authorization": f"Bearer {self.azure_auth_token}",
}
response = await client.patch(
f"{base_url}?action=append&position=0",
headers=headers,
content=file_content,
)
response.raise_for_status()
async def download_file(self, storage_url: str) -> bytes:
"""
Download a file from Azure Blob Storage.
Args:
storage_url: Blob URL in format: https://{account}.blob.core.windows.net/{container}/{path}
Returns:
bytes: File content
"""
try:
# Parse blob URL to extract path
# URL format: https://{account}.blob.core.windows.net/{container}/{path}
if ".blob.core.windows.net/" not in storage_url:
raise ValueError(f"Invalid Azure Blob Storage URL: {storage_url}")
# Extract path after container name
container_and_path = storage_url.split(".blob.core.windows.net/", 1)[1]
path_parts = container_and_path.split("/", 1)
if len(path_parts) < 2:
raise ValueError(
f"Invalid Azure Blob Storage URL format: {storage_url}"
)
file_path = path_parts[1] # Path after container name
if self.azure_storage_account_key:
# Use Azure SDK (reuse logger's service client)
return await self._download_file_with_account_key(file_path)
else:
# Use REST API (reuse logger's token management)
return await self._download_file_with_azure_ad(file_path)
except Exception as e:
verbose_logger.exception(
f"Error downloading file from Azure Blob Storage: {str(e)}"
)
raise
async def _download_file_with_account_key(self, file_path: str) -> bytes:
"""Download file using Azure SDK with account key."""
# Reuse the logger's service client method
service_client = await self.get_service_client()
file_system_client = service_client.get_file_system_client(
file_system=self.azure_storage_file_system
)
# Ensure filesystem exists (should already exist, but check for safety)
if not await file_system_client.exists():
raise ValueError(
f"Filesystem {self.azure_storage_file_system} does not exist"
)
file_client = file_system_client.get_file_client(file_path)
# Download file
download_response = await file_client.download_file()
file_content = await download_response.readall()
return file_content
async def _download_file_with_azure_ad(self, file_path: str) -> bytes:
"""Download file using REST API with Azure AD token."""
# Reuse the logger's token management
await self.set_valid_azure_ad_token()
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.constants import AZURE_STORAGE_MSFT_VERSION
async_client = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback
)
# Use blob endpoint for download (simpler than DFS)
blob_url = f"https://{self.azure_storage_account_name}.blob.core.windows.net/{self.azure_storage_file_system}/{file_path}"
headers = {
"x-ms-version": AZURE_STORAGE_MSFT_VERSION,
"Authorization": f"Bearer {self.azure_auth_token}",
}
response = await async_client.get(blob_url, headers=headers)
response.raise_for_status()
return response.content

View File

@@ -0,0 +1,78 @@
"""
Base storage backend interface for file storage backends.
This module defines the abstract base class that all file storage backends
(e.g., Azure Blob Storage, S3, GCS) must implement.
"""
from abc import ABC, abstractmethod
from typing import Optional
class BaseFileStorageBackend(ABC):
"""
Abstract base class for file storage backends.
All storage backends (Azure Blob Storage, S3, GCS, etc.) must implement
these methods to provide a consistent interface for file operations.
"""
@abstractmethod
async def upload_file(
self,
file_content: bytes,
filename: str,
content_type: str,
path_prefix: Optional[str] = None,
file_naming_strategy: str = "uuid",
) -> str:
"""
Upload a file to the storage backend.
Args:
file_content: The file content as bytes
filename: Original filename (may be used for naming strategy)
content_type: MIME type of the file
path_prefix: Optional path prefix for organizing files
file_naming_strategy: Strategy for naming files ("uuid", "timestamp", "original_filename")
Returns:
str: The storage URL where the file can be accessed/downloaded
Raises:
Exception: If upload fails
"""
pass
@abstractmethod
async def download_file(self, storage_url: str) -> bytes:
"""
Download a file from the storage backend.
Args:
storage_url: The storage URL returned from upload_file
Returns:
bytes: The file content
Raises:
Exception: If download fails
"""
pass
async def delete_file(self, storage_url: str) -> None:
"""
Delete a file from the storage backend.
This is optional and can be overridden by backends that support deletion.
Default implementation does nothing.
Args:
storage_url: The storage URL of the file to delete
Raises:
Exception: If deletion fails
"""
# Default implementation: no-op
# Backends can override if they support deletion
pass

View File

@@ -0,0 +1,40 @@
"""
Factory for creating storage backend instances.
This module provides a factory function to instantiate the correct storage backend
based on the backend type. Backends use the same configuration as their corresponding
callbacks (e.g., azure_storage uses the same env vars as AzureBlobStorageLogger).
"""
from litellm._logging import verbose_logger
from .azure_blob_storage_backend import AzureBlobStorageBackend
from .storage_backend import BaseFileStorageBackend
def get_storage_backend(backend_type: str) -> BaseFileStorageBackend:
"""
Factory function to create a storage backend instance.
Backends are configured using the same environment variables as their
corresponding callbacks. For example, "azure_storage" uses the same
env vars as AzureBlobStorageLogger.
Args:
backend_type: Backend type identifier (e.g., "azure_storage")
Returns:
BaseFileStorageBackend: Instance of the appropriate storage backend
Raises:
ValueError: If backend_type is not supported
"""
verbose_logger.debug(f"Creating storage backend: type={backend_type}")
if backend_type == "azure_storage":
return AzureBlobStorageBackend()
else:
raise ValueError(
f"Unsupported storage backend type: {backend_type}. "
f"Supported types: azure_storage"
)

View File

@@ -0,0 +1,261 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import httpx
from openai.types.file_deleted import FileDeleted
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.files import TwoStepFileUploadConfig
from litellm.types.llms.openai import (
AllMessageValues,
CreateFileRequest,
FileContentRequest,
OpenAICreateFileRequestOptionalParams,
OpenAIFileObject,
OpenAIFilesPurpose,
)
from litellm.types.utils import LlmProviders, ModelResponse
from ..chat.transformation import BaseConfig
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
from litellm.router import Router as _Router
from litellm.types.llms.openai import HttpxBinaryResponseContent
LiteLLMLoggingObj = _LiteLLMLoggingObj
Span = Any
Router = _Router
else:
LiteLLMLoggingObj = Any
Span = Any
Router = Any
class BaseFilesConfig(BaseConfig):
@property
@abstractmethod
def custom_llm_provider(self) -> LlmProviders:
pass
@property
def file_upload_http_method(self) -> str:
"""
HTTP method to use for file uploads.
Override this in provider configs if they need different methods.
Default is POST (used by most providers like OpenAI, Anthropic).
S3-based providers like Bedrock should return "PUT".
"""
return "POST"
@abstractmethod
def get_supported_openai_params(
self, model: str
) -> List[OpenAICreateFileRequestOptionalParams]:
pass
def get_complete_file_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
data: CreateFileRequest,
):
return self.get_complete_url(
api_base=api_base,
api_key=api_key,
model=model,
optional_params=optional_params,
litellm_params=litellm_params,
)
@abstractmethod
def transform_create_file_request(
self,
model: str,
create_file_data: CreateFileRequest,
optional_params: dict,
litellm_params: dict,
) -> Union[dict, str, bytes, "TwoStepFileUploadConfig"]:
"""
Transform OpenAI-style file creation request into provider-specific format.
Returns:
- dict: For pre-signed single-step uploads (e.g., Bedrock S3)
- str/bytes: For traditional file uploads
- TwoStepFileUploadConfig: For two-step upload process (e.g., Manus, GCS)
"""
pass
@abstractmethod
def transform_create_file_response(
self,
model: Optional[str],
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> OpenAIFileObject:
pass
@abstractmethod
def transform_retrieve_file_request(
self,
file_id: str,
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
"""Transform file retrieve request into provider-specific format."""
pass
@abstractmethod
def transform_retrieve_file_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> OpenAIFileObject:
"""Transform file retrieve response into OpenAI format."""
pass
@abstractmethod
def transform_delete_file_request(
self,
file_id: str,
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
"""Transform file delete request into provider-specific format."""
pass
@abstractmethod
def transform_delete_file_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> "FileDeleted":
"""Transform file delete response into OpenAI format."""
pass
@abstractmethod
def transform_list_files_request(
self,
purpose: Optional[str],
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
"""Transform file list request into provider-specific format."""
pass
@abstractmethod
def transform_list_files_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> List[OpenAIFileObject]:
"""Transform file list response into OpenAI format."""
pass
@abstractmethod
def transform_file_content_request(
self,
file_content_request: "FileContentRequest",
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
"""Transform file content request into provider-specific format."""
pass
@abstractmethod
def transform_file_content_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> "HttpxBinaryResponseContent":
"""Transform file content response into OpenAI format."""
pass
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
raise NotImplementedError(
"AudioTranscriptionConfig does not need a request transformation for audio transcription models"
)
def transform_response(
self,
model: str,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
request_data: dict,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
raise NotImplementedError(
"AudioTranscriptionConfig does not need a response transformation for audio transcription models"
)
class BaseFileEndpoints(ABC):
@abstractmethod
async def acreate_file(
self,
create_file_request: CreateFileRequest,
llm_router: Router,
target_model_names_list: List[str],
litellm_parent_otel_span: Span,
user_api_key_dict: UserAPIKeyAuth,
) -> OpenAIFileObject:
pass
@abstractmethod
async def afile_retrieve(
self,
file_id: str,
litellm_parent_otel_span: Optional[Span],
llm_router: Optional[Router] = None,
) -> OpenAIFileObject:
pass
@abstractmethod
async def afile_list(
self,
purpose: Optional[OpenAIFilesPurpose],
litellm_parent_otel_span: Optional[Span],
**data: Dict,
) -> List[OpenAIFileObject]:
pass
@abstractmethod
async def afile_delete(
self,
file_id: str,
litellm_parent_otel_span: Optional[Span],
llm_router: Router,
**data: Dict,
) -> OpenAIFileObject:
pass
@abstractmethod
async def afile_content(
self,
file_id: str,
litellm_parent_otel_span: Optional[Span],
llm_router: Router,
**data: Dict,
) -> "HttpxBinaryResponseContent":
pass