chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/speech/guardrail_translation/README.md
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/speech/guardrail_translation/README.md
@@ -0,0 +1,178 @@
+# OpenAI Text-to-Speech Guardrail Translation Handler
+
+Handler for processing OpenAI's text-to-speech endpoint (`/v1/audio/speech`) with guardrails.
+
+## Overview
+
+This handler processes text-to-speech requests by:
+1. Extracting the input text from the request
+2. Applying guardrails to the input text
+3. Updating the request with the guardrailed text
+4. Returning the output unchanged (audio is binary, not text)
+
+## Data Format
+
+### Input Format
+
+```json
+{
+  "model": "tts-1",
+  "input": "The quick brown fox jumped over the lazy dog.",
+  "voice": "alloy",
+  "response_format": "mp3",
+  "speed": 1.0
+}
+```
+
+### Output Format
+
+The output is binary audio data (MP3, WAV, etc.), not text, so it cannot be guardrailed.
+
+## Usage
+
+The handler is automatically discovered and applied when guardrails are used with the text-to-speech endpoint.
+
+### Example: Using Guardrails with Text-to-Speech
+
+```bash
+curl -X POST 'http://localhost:4000/v1/audio/speech' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer your-api-key' \
+-d '{
+    "model": "tts-1",
+    "input": "The quick brown fox jumped over the lazy dog.",
+    "voice": "alloy",
+    "guardrails": ["content_moderation"]
+}' \
+--output speech.mp3
+```
+
+The guardrail will be applied to the input text before the text-to-speech conversion.
+
+### Example: PII Masking in TTS Input
+
+```bash
+curl -X POST 'http://localhost:4000/v1/audio/speech' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer your-api-key' \
+-d '{
+    "model": "tts-1",
+    "input": "Please call John Doe at john@example.com",
+    "voice": "nova",
+    "guardrails": ["mask_pii"]
+}' \
+--output speech.mp3
+```
+
+The audio will say: "Please call [NAME_REDACTED] at [EMAIL_REDACTED]"
+
+### Example: Content Filtering Before TTS
+
+```bash
+curl -X POST 'http://localhost:4000/v1/audio/speech' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer your-api-key' \
+-d '{
+    "model": "tts-1-hd",
+    "input": "This is the text that will be spoken",
+    "voice": "shimmer",
+    "guardrails": ["content_filter"]
+}' \
+--output speech.mp3
+```
+
+## Implementation Details
+
+### Input Processing
+
+- **Field**: `input` (string)
+- **Processing**: Applies guardrail to input text
+- **Result**: Updated input text in request
+
+### Output Processing
+
+- **Processing**: Not applicable (audio is binary data)
+- **Result**: Response returned unchanged
+
+## Use Cases
+
+1. **PII Protection**: Remove personally identifiable information before converting to speech
+2. **Content Filtering**: Remove inappropriate content before TTS conversion
+3. **Compliance**: Ensure text meets requirements before voice synthesis
+4. **Text Sanitization**: Clean up text before audio generation
+
+## Extension
+
+Override these methods to customize behavior:
+
+- `process_input_messages()`: Customize how input text is processed
+- `process_output_response()`: Currently a no-op, but can be overridden if needed
+
+## Supported Call Types
+
+- `CallTypes.speech` - Synchronous text-to-speech
+- `CallTypes.aspeech` - Asynchronous text-to-speech
+
+## Notes
+
+- Only the input text is processed by guardrails
+- Output processing is a no-op since audio cannot be text-guardrailed
+- Both sync and async call types use the same handler
+- Works with all TTS models (tts-1, tts-1-hd, etc.)
+- Works with all voice options
+
+## Common Patterns
+
+### Remove PII Before TTS
+
+```python
+import litellm
+from pathlib import Path
+
+speech_file_path = Path(__file__).parent / "speech.mp3"
+response = litellm.speech(
+    model="tts-1",
+    voice="alloy",
+    input="Hi, this is John Doe calling from john@company.com",
+    guardrails=["mask_pii"],
+)
+response.stream_to_file(speech_file_path)
+# Audio will have PII masked
+```
+
+### Content Moderation Before TTS
+
+```python
+import litellm
+from pathlib import Path
+
+speech_file_path = Path(__file__).parent / "speech.mp3"
+response = litellm.speech(
+    model="tts-1-hd",
+    voice="nova",
+    input="Your text here",
+    guardrails=["content_moderation"],
+)
+response.stream_to_file(speech_file_path)
+```
+
+### Async TTS with Guardrails
+
+```python
+import litellm
+import asyncio
+from pathlib import Path
+
+async def generate_speech():
+    speech_file_path = Path(__file__).parent / "speech.mp3"
+    response = await litellm.aspeech(
+        model="tts-1",
+        voice="echo",
+        input="Text to convert to speech",
+        guardrails=["pii_mask"],
+    )
+    response.stream_to_file(speech_file_path)
+
+asyncio.run(generate_speech())
+```
+
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/speech/guardrail_translation/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/speech/guardrail_translation/init.py
@@ -0,0 +1,13 @@
+"""OpenAI Text-to-Speech handler for Unified Guardrails."""
+
+from litellm.llms.openai.speech.guardrail_translation.handler import (
+    OpenAITextToSpeechHandler,
+)
+from litellm.types.utils import CallTypes
+
+guardrail_translation_mappings = {
+    CallTypes.speech: OpenAITextToSpeechHandler,
+    CallTypes.aspeech: OpenAITextToSpeechHandler,
+}
+
+__all__ = ["guardrail_translation_mappings", "OpenAITextToSpeechHandler"]
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/speech/guardrail_translation/handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/openai/speech/guardrail_translation/handler.py
@@ -0,0 +1,108 @@
+"""
+OpenAI Text-to-Speech Handler for Unified Guardrails
+
+This module provides guardrail translation support for OpenAI's text-to-speech endpoint.
+The handler processes the 'input' text parameter (output is audio, so no text to guardrail).
+"""
+
+from typing import TYPE_CHECKING, Any, Optional
+
+from litellm._logging import verbose_proxy_logger
+from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
+from litellm.types.utils import GenericGuardrailAPIInputs
+
+if TYPE_CHECKING:
+    from litellm.integrations.custom_guardrail import CustomGuardrail
+    from litellm.types.llms.openai import HttpxBinaryResponseContent
+
+
+class OpenAITextToSpeechHandler(BaseTranslation):
+    """
+    Handler for processing OpenAI text-to-speech requests with guardrails.
+
+    This class provides methods to:
+    1. Process input text (pre-call hook)
+
+    Note: Output processing is not applicable since the output is audio (binary),
+    not text. Only the input text is processed.
+    """
+
+    async def process_input_messages(
+        self,
+        data: dict,
+        guardrail_to_apply: "CustomGuardrail",
+        litellm_logging_obj: Optional[Any] = None,
+    ) -> Any:
+        """
+        Process input text by applying guardrails.
+
+        Args:
+            data: Request data dictionary containing 'input' parameter
+            guardrail_to_apply: The guardrail instance to apply
+
+        Returns:
+            Modified data with guardrails applied to input text
+        """
+        input_text = data.get("input")
+        if input_text is None:
+            verbose_proxy_logger.debug(
+                "OpenAI Text-to-Speech: No input text found in request data"
+            )
+            return data
+
+        if isinstance(input_text, str):
+            inputs = GenericGuardrailAPIInputs(texts=[input_text])
+            # Include model information if available (voice model)
+            model = data.get("model")
+            if model:
+                inputs["model"] = model
+            guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
+                inputs=inputs,
+                request_data=data,
+                input_type="request",
+                logging_obj=litellm_logging_obj,
+            )
+            guardrailed_texts = guardrailed_inputs.get("texts", [])
+            data["input"] = guardrailed_texts[0] if guardrailed_texts else input_text
+
+            verbose_proxy_logger.debug(
+                "OpenAI Text-to-Speech: Applied guardrail to input text. "
+                "Original length: %d, New length: %d",
+                len(input_text),
+                len(data["input"]),
+            )
+        else:
+            verbose_proxy_logger.debug(
+                "OpenAI Text-to-Speech: Unexpected input type: %s. Expected string.",
+                type(input_text),
+            )
+
+        return data
+
+    async def process_output_response(
+        self,
+        response: "HttpxBinaryResponseContent",
+        guardrail_to_apply: "CustomGuardrail",
+        litellm_logging_obj: Optional[Any] = None,
+        user_api_key_dict: Optional[Any] = None,
+    ) -> Any:
+        """
+        Process output - not applicable for text-to-speech.
+
+        The output is audio (binary data), not text, so there's nothing to apply
+        guardrails to. This method returns the response unchanged.
+
+        Args:
+            response: Binary audio response
+            guardrail_to_apply: The guardrail instance (unused)
+            litellm_logging_obj: Optional logging object (unused)
+            user_api_key_dict: User API key metadata (unused)
+
+        Returns:
+            Unmodified response (audio data doesn't need text guardrails)
+        """
+        verbose_proxy_logger.debug(
+            "OpenAI Text-to-Speech: Output processing not applicable "
+            "(output is audio data, not text)"
+        )
+        return response