chore: initial snapshot for gitea/github upload

2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/chat/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/chat/transformation.py
@@ -0,0 +1,182 @@
+"""
+Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
+"""
+
+from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, cast, overload
+
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+    _get_image_mime_type_from_url,
+)
+from litellm.litellm_core_utils.prompt_templates.factory import _parse_mime_type
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionFileObject,
+    ChatCompletionVideoObject,
+    ChatCompletionVideoUrlObject,
+)
+
+from ....utils import _remove_additional_properties, _remove_strict_from_schema
+from ...openai.chat.gpt_transformation import OpenAIGPTConfig
+
+
+class HostedVLLMChatConfig(OpenAIGPTConfig):
+    def get_supported_openai_params(self, model: str) -> List[str]:
+        params = super().get_supported_openai_params(model)
+        params.extend(["reasoning_effort", "thinking"])
+        return params
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        _tools = non_default_params.pop("tools", None)
+        if _tools is not None:
+            # remove 'additionalProperties' from tools
+            _tools = _remove_additional_properties(_tools)
+            # remove 'strict' from tools
+            _tools = _remove_strict_from_schema(_tools)
+        if _tools is not None:
+            non_default_params["tools"] = _tools
+
+        # Handle thinking parameter - convert Anthropic-style to OpenAI-style reasoning_effort
+        # vLLM is OpenAI-compatible, so it understands reasoning_effort, not thinking
+        # Reference: https://github.com/BerriAI/litellm/issues/19761
+        thinking = non_default_params.pop("thinking", None)
+        if thinking is not None and isinstance(thinking, dict):
+            if thinking.get("type") == "enabled":
+                # Only convert if reasoning_effort not already set
+                if "reasoning_effort" not in non_default_params:
+                    budget_tokens = thinking.get("budget_tokens", 0)
+                    # Map budget_tokens to reasoning_effort level
+                    # Same logic as Anthropic adapter (translate_anthropic_thinking_to_reasoning_effort)
+                    if budget_tokens >= 10000:
+                        non_default_params["reasoning_effort"] = "high"
+                    elif budget_tokens >= 5000:
+                        non_default_params["reasoning_effort"] = "medium"
+                    elif budget_tokens >= 2000:
+                        non_default_params["reasoning_effort"] = "low"
+                    else:
+                        non_default_params["reasoning_effort"] = "minimal"
+
+        return super().map_openai_params(
+            non_default_params, optional_params, model, drop_params
+        )
+
+    def _get_openai_compatible_provider_info(
+        self, api_base: Optional[str], api_key: Optional[str]
+    ) -> Tuple[Optional[str], Optional[str]]:
+        api_base = api_base or get_secret_str("HOSTED_VLLM_API_BASE")  # type: ignore
+        dynamic_api_key = (
+            api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
+        )  # vllm does not require an api key
+        return api_base, dynamic_api_key
+
+    def _is_video_file(self, content_item: ChatCompletionFileObject) -> bool:
+        """
+        Check if the file is a video
+
+        - format: video/<extension>
+        - file_data: base64 encoded video data
+        - file_id: infer mp4 from extension
+        """
+        file = content_item.get("file", {})
+        format = file.get("format")
+        file_data = file.get("file_data")
+        file_id = file.get("file_id")
+        if content_item.get("type") != "file":
+            return False
+        if format and format.startswith("video/"):
+            return True
+        elif file_data:
+            mime_type = _parse_mime_type(file_data)
+            if mime_type and mime_type.startswith("video/"):
+                return True
+        elif file_id:
+            mime_type = _get_image_mime_type_from_url(file_id)
+            if mime_type and mime_type.startswith("video/"):
+                return True
+        return False
+
+    def _convert_file_to_video_url(
+        self, content_item: ChatCompletionFileObject
+    ) -> ChatCompletionVideoObject:
+        file = content_item.get("file", {})
+        file_id = file.get("file_id")
+        file_data = file.get("file_data")
+
+        if file_id:
+            return ChatCompletionVideoObject(
+                type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_id)
+            )
+        elif file_data:
+            return ChatCompletionVideoObject(
+                type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_data)
+            )
+        raise ValueError("file_id or file_data is required")
+
+    @overload
+    def _transform_messages(
+        self, messages: List[AllMessageValues], model: str, is_async: Literal[True]
+    ) -> Coroutine[Any, Any, List[AllMessageValues]]:
+        ...
+
+    @overload
+    def _transform_messages(
+        self,
+        messages: List[AllMessageValues],
+        model: str,
+        is_async: Literal[False] = False,
+    ) -> List[AllMessageValues]:
+        ...
+
+    def _transform_messages(
+        self, messages: List[AllMessageValues], model: str, is_async: bool = False
+    ) -> Union[List[AllMessageValues], Coroutine[Any, Any, List[AllMessageValues]]]:
+        """
+        Support translating:
+        - video files from file_id or file_data to video_url
+        - thinking_blocks on assistant messages to content blocks
+        """
+        for message in messages:
+            if message["role"] == "assistant":
+                thinking_blocks = message.pop("thinking_blocks", None)  # type: ignore
+                if thinking_blocks:
+                    new_content: list = [
+                        {"type": block["type"], "thinking": block.get("thinking", "")}
+                        if block.get("type") == "thinking"
+                        else {"type": block["type"], "data": block.get("data", "")}
+                        for block in thinking_blocks
+                    ]
+                    existing_content = message.get("content")
+                    if isinstance(existing_content, str):
+                        new_content.append({"type": "text", "text": existing_content})
+                    elif isinstance(existing_content, list):
+                        new_content.extend(existing_content)
+                    message["content"] = new_content  # type: ignore
+            elif message["role"] == "user":
+                message_content = message.get("content")
+                if message_content and isinstance(message_content, list):
+                    replaced_content_items: List[
+                        Tuple[int, ChatCompletionFileObject]
+                    ] = []
+                    for idx, content_item in enumerate(message_content):
+                        if content_item.get("type") == "file":
+                            content_item = cast(ChatCompletionFileObject, content_item)
+                            if self._is_video_file(content_item):
+                                replaced_content_items.append((idx, content_item))
+                    for idx, content_item in replaced_content_items:
+                        message_content[idx] = self._convert_file_to_video_url(
+                            content_item
+                        )
+        if is_async:
+            return super()._transform_messages(
+                messages, model, is_async=cast(Literal[True], True)
+            )
+        else:
+            return super()._transform_messages(
+                messages, model, is_async=cast(Literal[False], False)
+            )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/embedding/README.md
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/embedding/README.md
@@ -0,0 +1,5 @@
+No transformation is required for hosted_vllm embedding.
+
+VLLM is a superset of OpenAI's `embedding` endpoint.
+
+To pass provider-specific parameters, see [this](https://docs.litellm.ai/docs/completion/provider_specific_params)
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/embedding/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/embedding/transformation.py
@@ -0,0 +1,180 @@
+"""
+Hosted VLLM Embedding API Configuration.
+
+This module provides the configuration for hosted VLLM's Embedding API.
+VLLM is OpenAI-compatible and supports embeddings via the /v1/embeddings endpoint.
+
+Docs: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+"""
+
+from typing import TYPE_CHECKING, Any, List, Optional, Union
+
+import httpx
+
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues
+from litellm.types.utils import EmbeddingResponse
+from litellm.utils import convert_to_model_response_object
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+
+class HostedVLLMEmbeddingError(BaseLLMException):
+    """Exception class for Hosted VLLM Embedding errors."""
+
+    pass
+
+
+class HostedVLLMEmbeddingConfig(BaseEmbeddingConfig):
+    """
+    Configuration for Hosted VLLM's Embedding API.
+
+    Reference: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+    """
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        """
+        Validate environment and set up headers for Hosted VLLM API.
+        """
+        if api_key is None:
+            api_key = get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
+
+        default_headers = {
+            "Content-Type": "application/json",
+        }
+
+        # Only add Authorization header if api_key is not "fake-api-key"
+        if api_key and api_key != "fake-api-key":
+            default_headers["Authorization"] = f"Bearer {api_key}"
+
+        # Merge with existing headers (user's headers take priority)
+        return {**default_headers, **headers}
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        Get the complete URL for Hosted VLLM Embedding API endpoint.
+        """
+        if api_base is None:
+            api_base = get_secret_str("HOSTED_VLLM_API_BASE")
+            if api_base is None:
+                raise ValueError("api_base is required for hosted_vllm embeddings")
+
+        # Remove trailing slashes
+        api_base = api_base.rstrip("/")
+
+        # Ensure the URL ends with /embeddings
+        if not api_base.endswith("/embeddings"):
+            api_base = f"{api_base}/embeddings"
+
+        return api_base
+
+    def transform_embedding_request(
+        self,
+        model: str,
+        input: AllEmbeddingInputValues,
+        optional_params: dict,
+        headers: dict,
+    ) -> dict:
+        """
+        Transform embedding request to Hosted VLLM format (OpenAI-compatible).
+        """
+        # Ensure input is a list
+        if isinstance(input, str):
+            input = [input]
+
+        # Strip 'hosted_vllm/' prefix if present
+        if model.startswith("hosted_vllm/"):
+            model = model.replace("hosted_vllm/", "", 1)
+
+        return {
+            "model": model,
+            "input": input,
+            **optional_params,
+        }
+
+    def transform_embedding_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: EmbeddingResponse,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str],
+        request_data: dict,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> EmbeddingResponse:
+        """
+        Transform embedding response from Hosted VLLM format (OpenAI-compatible).
+        """
+        logging_obj.post_call(original_response=raw_response.text)
+
+        # VLLM returns standard OpenAI-compatible embedding response
+        response_json = raw_response.json()
+
+        return convert_to_model_response_object(
+            response_object=response_json,
+            model_response_object=model_response,
+            response_type="embedding",
+        )
+
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Get list of supported OpenAI parameters for Hosted VLLM embeddings.
+        """
+        return [
+            "timeout",
+            "dimensions",
+            "encoding_format",
+            "user",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        Map OpenAI parameters to Hosted VLLM format.
+        """
+        for param, value in non_default_params.items():
+            if param in self.get_supported_openai_params(model):
+                optional_params[param] = value
+        return optional_params
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        """
+        Get the error class for Hosted VLLM errors.
+        """
+        return HostedVLLMEmbeddingError(
+            message=error_message,
+            status_code=status_code,
+            headers=headers,
+        )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/rerank/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/rerank/transformation.py
@@ -0,0 +1,216 @@
+"""
+Transformation logic for Hosted VLLM rerank
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+import httpx
+
+from litellm._uuid import uuid
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.rerank import (
+    OptionalRerankParams,
+    RerankBilledUnits,
+    RerankRequest,
+    RerankResponse,
+    RerankResponseDocument,
+    RerankResponseMeta,
+    RerankResponseResult,
+    RerankTokens,
+)
+
+
+class HostedVLLMRerankError(BaseLLMException):
+    def __init__(
+        self,
+        status_code: int,
+        message: str,
+        headers: Optional[Union[dict, httpx.Headers]] = None,
+    ):
+        super().__init__(status_code=status_code, message=message, headers=headers)
+
+
+class HostedVLLMRerankConfig(BaseRerankConfig):
+    def __init__(self) -> None:
+        pass
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        model: str,
+        optional_params: Optional[dict] = None,
+    ) -> str:
+        if api_base:
+            # Remove trailing slashes and ensure clean base URL
+            api_base = api_base.rstrip("/")
+            # Preserve backward compatibility
+            if api_base.endswith("/v1/rerank"):
+                api_base = api_base.replace("/v1/rerank", "/rerank")
+            elif not api_base.endswith("/rerank"):
+                api_base = f"{api_base}/rerank"
+            return api_base
+        raise ValueError("api_base must be provided for Hosted VLLM rerank")
+
+    def get_supported_cohere_rerank_params(self, model: str) -> list:
+        return [
+            "query",
+            "documents",
+            "top_n",
+            "rank_fields",
+            "return_documents",
+        ]
+
+    def map_cohere_rerank_params(
+        self,
+        non_default_params: Optional[dict],
+        model: str,
+        drop_params: bool,
+        query: str,
+        documents: List[Union[str, Dict[str, Any]]],
+        custom_llm_provider: Optional[str] = None,
+        top_n: Optional[int] = None,
+        rank_fields: Optional[List[str]] = None,
+        return_documents: Optional[bool] = True,
+        max_chunks_per_doc: Optional[int] = None,
+        max_tokens_per_doc: Optional[int] = None,
+    ) -> Dict:
+        """
+        Map parameters for Hosted VLLM rerank
+        """
+        if max_chunks_per_doc is not None:
+            raise ValueError("Hosted VLLM does not support max_chunks_per_doc")
+
+        return dict(
+            OptionalRerankParams(
+                query=query,
+                documents=documents,
+                top_n=top_n,
+                rank_fields=rank_fields,
+                return_documents=return_documents,
+            )
+        )
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+        optional_params: Optional[dict] = None,
+    ) -> dict:
+        if api_key is None:
+            api_key = get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
+
+        default_headers = {
+            "Authorization": f"Bearer {api_key}",
+            "accept": "application/json",
+            "content-type": "application/json",
+        }
+
+        # If 'Authorization' is provided in headers, it overrides the default.
+        if "Authorization" in headers:
+            default_headers["Authorization"] = headers["Authorization"]
+
+        # Merge other headers, overriding any default ones except Authorization
+        return {**default_headers, **headers}
+
+    def transform_rerank_request(
+        self,
+        model: str,
+        optional_rerank_params: Dict,
+        headers: dict,
+    ) -> dict:
+        if "query" not in optional_rerank_params:
+            raise ValueError("query is required for Hosted VLLM rerank")
+        if "documents" not in optional_rerank_params:
+            raise ValueError("documents is required for Hosted VLLM rerank")
+
+        rerank_request = RerankRequest(
+            model=model,
+            query=optional_rerank_params["query"],
+            documents=optional_rerank_params["documents"],
+            top_n=optional_rerank_params.get("top_n", None),
+            rank_fields=optional_rerank_params.get("rank_fields", None),
+            return_documents=optional_rerank_params.get("return_documents", None),
+        )
+        return rerank_request.model_dump(exclude_none=True)
+
+    def transform_rerank_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: RerankResponse,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str] = None,
+        request_data: dict = {},
+        optional_params: dict = {},
+        litellm_params: dict = {},
+    ) -> RerankResponse:
+        """
+        Process response from Hosted VLLM rerank API
+        """
+        try:
+            raw_response_json = raw_response.json()
+        except Exception:
+            raise ValueError(
+                f"Error parsing response: {raw_response.text}, status_code={raw_response.status_code}"
+            )
+
+        return self._transform_response(raw_response_json)
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        return HostedVLLMRerankError(
+            message=error_message, status_code=status_code, headers=headers
+        )
+
+    def _transform_response(self, response: dict) -> RerankResponse:
+        # Extract usage information
+        usage_data = response.get("usage", {})
+        _billed_units = RerankBilledUnits(
+            total_tokens=usage_data.get("total_tokens", 0)
+        )
+        _tokens = RerankTokens(input_tokens=usage_data.get("total_tokens", 0))
+        rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
+
+        # Extract results
+        _results: Optional[List[dict]] = response.get("results")
+
+        if _results is None:
+            raise ValueError(f"No results found in the response={response}")
+
+        rerank_results: List[RerankResponseResult] = []
+
+        for result in _results:
+            # Validate required fields exist
+            if not all(key in result for key in ["index", "relevance_score"]):
+                raise ValueError(f"Missing required fields in the result={result}")
+
+            # Get document data if it exists
+            document_data = result.get("document", {})
+            document = (
+                RerankResponseDocument(text=str(document_data.get("text", "")))
+                if document_data
+                else None
+            )
+
+            # Create typed result
+            rerank_result = RerankResponseResult(
+                index=int(result["index"]),
+                relevance_score=float(result["relevance_score"]),
+            )
+
+            # Only add document if it exists
+            if document:
+                rerank_result["document"] = document
+
+            rerank_results.append(rerank_result)
+
+        return RerankResponse(
+            id=response.get("id") or str(uuid.uuid4()),
+            results=rerank_results,
+            meta=rerank_meta,
+        )
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/responses/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/responses/transformation.py
@@ -0,0 +1,75 @@
+"""
+Responses API transformation for Hosted VLLM provider.
+
+vLLM natively supports the OpenAI-compatible /v1/responses endpoint,
+so this config enables direct routing instead of falling back to
+the chat completions → responses conversion pipeline.
+"""
+
+from typing import Optional
+
+from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.router import GenericLiteLLMParams
+from litellm.types.utils import LlmProviders
+
+
+class HostedVLLMResponsesAPIConfig(OpenAIResponsesAPIConfig):
+    """
+    Configuration for Hosted VLLM Responses API support.
+
+    Extends OpenAI's config since vLLM follows OpenAI's API spec,
+    but uses HOSTED_VLLM_API_BASE for the base URL and defaults
+    to "fake-api-key" when no API key is provided (vLLM does not
+    require authentication by default).
+    """
+
+    @property
+    def custom_llm_provider(self) -> LlmProviders:
+        return LlmProviders.HOSTED_VLLM
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        litellm_params: Optional[GenericLiteLLMParams],
+    ) -> dict:
+        litellm_params = litellm_params or GenericLiteLLMParams()
+        api_key = (
+            litellm_params.api_key
+            or get_secret_str("HOSTED_VLLM_API_KEY")
+            or "fake-api-key"
+        )  # vllm does not require an api key
+        headers.update(
+            {
+                "Authorization": f"Bearer {api_key}",
+            }
+        )
+        return headers
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        litellm_params: dict,
+    ) -> str:
+        api_base = api_base or get_secret_str("HOSTED_VLLM_API_BASE")
+
+        if api_base is None:
+            raise ValueError(
+                "api_base not set for Hosted VLLM responses API. "
+                "Set via api_base parameter or HOSTED_VLLM_API_BASE environment variable"
+            )
+
+        # Remove trailing slashes
+        api_base = api_base.rstrip("/")
+
+        # If api_base already ends with /v1, append /responses
+        # Otherwise append /v1/responses
+        if api_base.endswith("/v1"):
+            return f"{api_base}/responses"
+
+        return f"{api_base}/v1/responses"
+
+    def supports_native_websocket(self) -> bool:
+        """Hosted vLLM does not support native WebSocket for Responses API"""
+        return False
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/transcriptions/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/hosted_vllm/transcriptions/transformation.py
@@ -0,0 +1,65 @@
+"""
+Transformation logic for Hosted VLLM rerank
+"""
+
+from typing import Optional, Union
+
+import httpx
+
+from litellm.llms.base_llm.audio_transcription.transformation import (
+    AudioTranscriptionRequestData,
+)
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.openai.transcriptions.whisper_transformation import (
+    OpenAIWhisperAudioTranscriptionConfig,
+)
+from litellm.types.utils import FileTypes
+
+
+class HostedVLLMAudioTranscriptionError(BaseLLMException):
+    def __init__(
+        self,
+        status_code: int,
+        message: str,
+        headers: Optional[Union[dict, httpx.Headers]] = None,
+    ):
+        super().__init__(status_code=status_code, message=message, headers=headers)
+
+
+class HostedVLLMAudioTranscriptionConfig(OpenAIWhisperAudioTranscriptionConfig):
+    def __init__(self) -> None:
+        pass
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        if api_base:
+            # Remove trailing slashes and ensure clean base URL
+            api_base = api_base.rstrip("/")
+            if not api_base.endswith("/v1/audio/transcriptions"):
+                api_base = f"{api_base}/v1/audio/transcriptions"
+            return api_base
+        raise ValueError("api_base must be provided for Hosted VLLM rerank")
+
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> AudioTranscriptionRequestData:
+        """
+        Transform the audio transcription request
+        """
+
+        data = {"model": model, "file": audio_file, **optional_params}
+
+        return AudioTranscriptionRequestData(
+            data=data,
+        )