chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/minimax/text_to_speech/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/minimax/text_to_speech/init.py
@@ -0,0 +1,7 @@
+"""
+MiniMax Text-to-Speech module
+"""
+
+from .transformation import MinimaxException, MinimaxTextToSpeechConfig
+
+__all__ = ["MinimaxTextToSpeechConfig", "MinimaxException"]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/minimax/text_to_speech/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/minimax/text_to_speech/transformation.py
@@ -0,0 +1,414 @@
+"""
+MiniMax Text-to-Speech transformation
+
+Maps OpenAI TTS spec to MiniMax TTS API (WebSocket-based HTTP API)
+Reference: https://platform.minimax.io/docs
+"""
+
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+
+import httpx
+from httpx import Headers
+
+import litellm
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.text_to_speech.transformation import (
+    BaseTextToSpeechConfig,
+    TextToSpeechRequestData,
+)
+from litellm.secret_managers.main import get_secret_str
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+    from litellm.types.llms.openai import HttpxBinaryResponseContent
+else:
+    LiteLLMLoggingObj = Any
+    HttpxBinaryResponseContent = Any
+
+
+class MinimaxException(BaseLLMException):
+    """Custom exception for MiniMax API errors"""
+
+    def __init__(
+        self,
+        status_code: int,
+        message: str,
+        headers: Optional[Union[dict, Headers]] = None,
+    ):
+        super().__init__(status_code=status_code, message=message, headers=headers)
+
+
+class MinimaxTextToSpeechConfig(BaseTextToSpeechConfig):
+    """
+    Configuration for MiniMax Text-to-Speech
+
+    Reference: https://platform.minimax.io/docs
+
+    MiniMax TTS API supports both WebSocket and HTTP endpoints.
+    This implementation uses the HTTP endpoint for simplicity.
+    """
+
+    TTS_BASE_URL = "https://api.minimax.io"
+    TTS_ENDPOINT_PATH = "/v1/t2a_v2"
+
+    # Voice mappings from OpenAI-style voices to MiniMax voice IDs
+    # MiniMax supports many voices, these are common mappings
+    VOICE_MAPPINGS = {
+        "alloy": "male-qn-qingse",
+        "echo": "male-qn-jingying",
+        "fable": "female-shaonv",
+        "onyx": "male-qn-badao",
+        "nova": "female-yujie",
+        "shimmer": "female-tianmei",
+    }
+
+    # Response format mappings from OpenAI to MiniMax
+    FORMAT_MAPPINGS = {
+        "mp3": "mp3",
+        "pcm": "pcm",
+        "wav": "wav",
+        "flac": "flac",
+    }
+
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        MiniMax TTS supports these OpenAI parameters
+        """
+        return ["voice", "response_format", "speed"]
+
+    def _extract_voice_id(self, voice: str) -> str:
+        """
+        Normalize the provided voice information into a MiniMax voice_id.
+        """
+        normalized_voice = voice.strip()
+        mapped_voice = self.VOICE_MAPPINGS.get(normalized_voice.lower())
+        return mapped_voice or normalized_voice
+
+    def _resolve_voice_id(
+        self,
+        voice: Optional[Union[str, Dict[str, Any]]],
+        params: Dict[str, Any],
+    ) -> str:
+        """
+        Determine the MiniMax voice_id based on provided voice input or parameters.
+        """
+        mapped_voice: Optional[str] = None
+
+        if isinstance(voice, str) and voice.strip():
+            mapped_voice = self._extract_voice_id(voice)
+        elif isinstance(voice, dict):
+            for key in ("voice_id", "id", "name"):
+                candidate = voice.get(key)
+                if isinstance(candidate, str) and candidate.strip():
+                    mapped_voice = self._extract_voice_id(candidate)
+                    break
+        elif voice is not None:
+            mapped_voice = self._extract_voice_id(str(voice))
+
+        if mapped_voice is None:
+            voice_override = params.pop("voice_id", None)
+            if isinstance(voice_override, str) and voice_override.strip():
+                mapped_voice = self._extract_voice_id(voice_override)
+
+        if mapped_voice is None:
+            # Default to a common voice if not specified
+            mapped_voice = "male-qn-qingse"
+
+        return mapped_voice
+
+    def map_openai_params(
+        self,
+        model: str,
+        optional_params: Dict,
+        voice: Optional[Union[str, Dict]] = None,
+        drop_params: bool = False,
+        kwargs: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[Optional[str], Dict]:
+        """
+        Map OpenAI parameters to MiniMax TTS parameters
+        """
+        mapped_params: Dict[str, Any] = {}
+
+        # Work on a copy so we don't mutate the caller's dictionary
+        params = dict(optional_params) if optional_params else {}
+
+        # Extract voice identifier
+        mapped_voice = self._resolve_voice_id(voice, params)
+
+        # Response/output format
+        response_format = params.pop("response_format", None)
+        if isinstance(response_format, str):
+            mapped_format = self.FORMAT_MAPPINGS.get(response_format, "mp3")
+            mapped_params["format"] = mapped_format
+        else:
+            mapped_params["format"] = "mp3"  # Default format
+
+        # Speed parameter (MiniMax supports speed from 0.5 to 2.0)
+        speed = params.pop("speed", None)
+        if speed is not None:
+            try:
+                speed_value = float(speed)
+                # Clamp speed to MiniMax's supported range
+                speed_value = max(0.5, min(2.0, speed_value))
+                mapped_params["speed"] = speed_value
+            except (TypeError, ValueError):
+                mapped_params["speed"] = 1.0
+        else:
+            mapped_params["speed"] = 1.0
+
+        # Instructions parameter is OpenAI-specific; omit to prevent API errors
+        params.pop("instructions", None)
+
+        # Store voice_id for later use in request construction
+        mapped_params["voice_id"] = mapped_voice
+
+        # Handle extra_body for additional MiniMax-specific parameters
+        extra_body = params.pop("extra_body", None)
+        if isinstance(extra_body, dict):
+            for key, value in extra_body.items():
+                if value is not None:
+                    mapped_params[key] = value
+
+        # Pass through any remaining parameters
+        for key, value in params.items():
+            if value is not None:
+                mapped_params[key] = value
+
+        return mapped_voice, mapped_params
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        """
+        Validate MiniMax environment and set up authentication headers
+        """
+        api_key = api_key or litellm.api_key or get_secret_str("MINIMAX_API_KEY")
+
+        if api_key is None:
+            raise ValueError(
+                "MiniMax API key is required. Set MINIMAX_API_KEY environment variable or pass api_key parameter."
+            )
+
+        headers.update(
+            {
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            }
+        )
+
+        return headers
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, Headers]
+    ) -> BaseLLMException:
+        return MinimaxException(
+            message=error_message, status_code=status_code, headers=headers
+        )
+
+    def transform_text_to_speech_request(
+        self,
+        model: str,
+        input: str,
+        voice: Optional[str],
+        optional_params: Dict,
+        litellm_params: Dict,
+        headers: dict,
+    ) -> TextToSpeechRequestData:
+        """
+        Build the MiniMax TTS request payload.
+
+        MiniMax uses a different structure than OpenAI:
+        - model: The TTS model to use
+        - text: The input text
+        - voice_setting: Voice configuration
+        - audio_setting: Audio output configuration
+        """
+        params = dict(optional_params) if optional_params else {}
+
+        # Extract parameters
+        voice_id = params.pop("voice_id", voice or "male-qn-qingse")
+        speed = params.pop("speed", 1.0)
+        audio_format = params.pop("format", "mp3")
+
+        # Extract additional voice settings
+        vol = params.pop("vol", 1.0)  # Volume (0.1 to 10)
+        pitch = params.pop("pitch", 0)  # Pitch adjustment (-12 to 12)
+
+        # Extract audio settings
+        sample_rate = params.pop("sample_rate", 32000)  # 16000, 24000, 32000
+        bitrate = params.pop(
+            "bitrate", 128000
+        )  # For MP3: 64000, 128000, 192000, 256000
+        channel = params.pop("channel", 1)  # 1 for mono, 2 for stereo
+
+        # Output format: 'url' or 'hex' (default is 'hex')
+        output_format = params.pop("output_format", "hex")
+
+        request_body: Dict[str, Any] = {
+            "model": model,
+            "text": input,
+            "stream": False,  # HTTP endpoint doesn't support streaming
+            "output_format": output_format,  # 'url' or 'hex'
+            "voice_setting": {
+                "voice_id": voice_id,
+                "speed": speed,
+                "vol": vol,
+                "pitch": pitch,
+            },
+            "audio_setting": {
+                "sample_rate": sample_rate,
+                "bitrate": bitrate,
+                "format": audio_format,
+                "channel": channel,
+            },
+        }
+
+        # Handle any remaining parameters from extra_body
+        extra_body = params.pop("extra_body", None)
+        if isinstance(extra_body, dict):
+            for key, value in extra_body.items():
+                if value is not None and key not in request_body:
+                    request_body[key] = value
+
+        return TextToSpeechRequestData(
+            dict_body=request_body,
+            headers={"Content-Type": "application/json"},
+        )
+
+    def transform_text_to_speech_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> "HttpxBinaryResponseContent":
+        """
+        Transform MiniMax response to standard format.
+
+        MiniMax returns JSON with base64-encoded audio data:
+        {
+            "base_resp": {"status_code": 0, "status_msg": "success"},
+            "audio_file": "<base64_encoded_audio>",
+            "extra_info": {...}
+        }
+
+        We need to decode the base64 audio and return it as binary content.
+        """
+        import base64
+        import json
+
+        from litellm.types.llms.openai import HttpxBinaryResponseContent
+
+        try:
+            # Parse JSON response
+            response_json = raw_response.json()
+
+            # MiniMax API response format check
+            # The API can return different structures:
+            # 1. {"data": {"audio": "..."}, "status": 0, ...} for HTTP endpoint
+            # 2. {"base_resp": {"status_code": 0, ...}, "audio_file": "..."} for older versions
+
+            # Check for errors - MiniMax uses "status" field in HTTP endpoint response
+            # status: 0 = success, 2 = invalid api key, etc.
+            status = response_json.get("status")
+            if status is not None and status != 0:
+                ced = response_json.get("ced", "Unknown error")
+                error_detail = ced if ced else f"API returned status {status}"
+                raise MinimaxException(
+                    status_code=raw_response.status_code,
+                    message=f"MiniMax TTS error: {error_detail}",
+                    headers=dict(raw_response.headers),
+                )
+
+            # Extract audio data
+            # MiniMax returns audio in "data" field
+            data = response_json.get("data", {})
+
+            # Check if response contains a URL (output_format='url')
+            audio_url = data.get("audio_url", None)
+            if audio_url:
+                # If URL format is used, we need to fetch the audio from the URL
+                # For now, return a response indicating URL mode (TODO: fetch audio from URL)
+                raise MinimaxException(
+                    status_code=500,
+                    message=f"URL output format is not yet supported. Use 'hex' format or fetch from URL: {audio_url}",
+                    headers=dict(raw_response.headers),
+                )
+
+            # Get hex-encoded audio data
+            audio_hex = data.get("audio", "") or response_json.get("audio_file", "")
+
+            if not audio_hex:
+                raise MinimaxException(
+                    status_code=500,
+                    message=f"No audio data in MiniMax response. Response keys: {list(response_json.keys())}",
+                    headers=dict(raw_response.headers),
+                )
+
+            # MiniMax returns hex-encoded audio by default
+            # Try hex decoding first, fall back to base64 if that fails
+            try:
+                audio_bytes = bytes.fromhex(audio_hex)
+            except ValueError:
+                # If hex decoding fails, try base64 (for older API versions)
+                try:
+                    audio_bytes = base64.b64decode(audio_hex)
+                except Exception as e:
+                    raise MinimaxException(
+                        status_code=500,
+                        message=f"Failed to decode audio data: {str(e)}",
+                        headers=dict(raw_response.headers),
+                    )
+
+            # Create a new response with binary audio content
+            # We need to create a response that contains the decoded audio bytes
+            # Remove gzip encoding headers to avoid decompression issues
+            clean_headers = dict(raw_response.headers)
+            clean_headers.pop("content-encoding", None)
+            clean_headers.pop("transfer-encoding", None)
+            clean_headers["content-length"] = str(len(audio_bytes))
+
+            # Create a new response object with the binary content
+            binary_response = httpx.Response(
+                status_code=200,
+                headers=clean_headers,
+                content=audio_bytes,
+                request=raw_response.request,
+            )
+
+            return HttpxBinaryResponseContent(binary_response)
+
+        except json.JSONDecodeError as e:
+            raise MinimaxException(
+                status_code=500,
+                message=f"Failed to parse MiniMax response: {str(e)}",
+                headers=dict(raw_response.headers),
+            )
+        except Exception as e:
+            if isinstance(e, MinimaxException):
+                raise
+            raise MinimaxException(
+                status_code=500,
+                message=f"Error processing MiniMax response: {str(e)}",
+                headers=dict(raw_response.headers),
+            )
+
+    def get_complete_url(
+        self,
+        model: str,
+        api_base: Optional[str],
+        litellm_params: dict,
+    ) -> str:
+        """
+        Construct the MiniMax endpoint URL.
+        """
+        base_url = api_base or get_secret_str("MINIMAX_API_BASE") or self.TTS_BASE_URL
+        base_url = base_url.rstrip("/")
+
+        # MiniMax uses a simple endpoint path
+        url = f"{base_url}{self.TTS_ENDPOINT_PATH}"
+
+        return url