chore: initial snapshot for gitea/github upload

2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/llms/sap/chat/transformation.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/llms/sap/chat/transformation.py
@@ -0,0 +1,351 @@
+"""
+Translate from OpenAI's `/v1/chat/completions` to SAP Generative AI Hub's Orchestration Service`v2/completion`
+"""
+from typing import (
+    List,
+    Optional,
+    Union,
+    Dict,
+    Tuple,
+    Any,
+    TYPE_CHECKING,
+    Iterator,
+    AsyncIterator,
+)
+from functools import cached_property
+import litellm
+import httpx
+
+
+from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import ModelResponse
+
+from ...openai.chat.gpt_transformation import OpenAIGPTConfig
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+from ..credentials import get_token_creator
+from .models import (
+    SAPMessage,
+    SAPAssistantMessage,
+    SAPToolChatMessage,
+    ChatCompletionTool,
+    ResponseFormatJSONSchema,
+    ResponseFormat,
+    SAPUserMessage,
+)
+from .handler import (
+    GenAIHubOrchestrationError,
+    AsyncSAPStreamIterator,
+    SAPStreamIterator,
+)
+
+
+def validate_dict(data: dict, model) -> dict:
+    return model(**data).model_dump(by_alias=True)
+
+
+class GenAIHubOrchestrationConfig(OpenAIGPTConfig):
+    frequency_penalty: Optional[int] = None
+    function_call: Optional[Union[str, dict]] = None
+    functions: Optional[list] = None
+    logit_bias: Optional[dict] = None
+    max_tokens: Optional[int] = None
+    n: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    stop: Optional[Union[str, list]] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    response_format: Optional[dict] = None
+    tools: Optional[list] = None
+    tool_choice: Optional[Union[str, dict]] = None  #
+    model_version: str = "latest"
+
+    def __init__(
+        self,
+        frequency_penalty: Optional[int] = None,
+        function_call: Optional[Union[str, dict]] = None,
+        functions: Optional[list] = None,
+        logit_bias: Optional[dict] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        response_format: Optional[dict] = None,
+        tools: Optional[list] = None,
+        tool_choice: Optional[Union[str, dict]] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+        self.token_creator = None
+        self._base_url = None
+        self._resource_group = None
+
+    def run_env_setup(self, service_key: Optional[str] = None) -> None:
+        try:
+            self.token_creator, self._base_url, self._resource_group = get_token_creator(service_key)  # type: ignore
+        except ValueError as err:
+            raise GenAIHubOrchestrationError(status_code=400, message=err.args[0])
+
+    @property
+    def headers(self) -> Dict[str, str]:
+        if self.token_creator is None:
+            self.run_env_setup()
+        access_token = self.token_creator()  # type: ignore
+        return {
+            "Authorization": access_token,
+            "AI-Resource-Group": self.resource_group,
+            "Content-Type": "application/json",
+            "AI-Client-Type": "LiteLLM",
+        }
+
+    @property
+    def base_url(self) -> str:
+        if self._base_url is None:
+            self.run_env_setup()
+        return self._base_url  # type: ignore
+
+    @property
+    def resource_group(self) -> str:
+        if self._resource_group is None:
+            self.run_env_setup()
+        return self._resource_group  # type: ignore
+
+    @cached_property
+    def deployment_url(self) -> str:
+        # Keep a short, tight client lifecycle here to avoid fd leaks
+        client = litellm.module_level_client
+        # with httpx.Client(timeout=30) as client:
+        deployments = client.get(
+            f"{self.base_url}/lm/deployments", headers=self.headers
+        ).json()
+        valid: List[Tuple[str, str]] = []
+        for dep in deployments.get("resources", []):
+            if dep.get("scenarioId") == "orchestration":
+                cfg = client.get(
+                    f'{self.base_url}/lm/configurations/{dep["configurationId"]}',
+                    headers=self.headers,
+                ).json()
+                if cfg.get("executableId") == "orchestration":
+                    valid.append((dep["deploymentUrl"], dep["createdAt"]))
+            # newest first
+        return sorted(valid, key=lambda x: x[1], reverse=True)[0][0]
+
+    @classmethod
+    def get_config(cls):
+        return super().get_config()
+
+    def get_supported_openai_params(self, model):
+        params = [
+            "frequency_penalty",
+            "logit_bias",
+            "logprobs",
+            "top_logprobs",
+            "max_tokens",
+            "max_completion_tokens",
+            "prediction",
+            "n",
+            "presence_penalty",
+            "seed",
+            "stop",
+            "stream",
+            "stream_options",
+            "temperature",
+            "top_p",
+            "tools",
+            "tool_choice",
+            "function_call",
+            "functions",
+            "extra_headers",
+            "parallel_tool_calls",
+            "response_format",
+            "timeout",
+        ]
+        # Remove response_format for providers that don't support it on SAP GenAI Hub
+        if (
+            model.startswith("amazon")
+            or model.startswith("cohere")
+            or model.startswith("alephalpha")
+            or model == "gpt-4"
+        ):
+            params.remove("response_format")
+        if model.startswith("gemini") or model.startswith("amazon"):
+            params.remove("tool_choice")
+        return params
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        if api_key:
+            self.run_env_setup(api_key)
+        return self.headers
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ):
+        api_base_ = f"{self.deployment_url}/v2/completion"
+        return api_base_
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],  # type: ignore
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        # Filter out parameters that are not valid model params for SAP Orchestration API
+        # - tools, model_version, deployment_url: handled separately
+        excluded_params = {"tools", "model_version", "deployment_url"}
+
+        # Filter strict for GPT models only - SAP AI Core doesn't accept it as a model param
+        # LangChain agents pass strict=true at top level, which fails for GPT models
+        # Anthropic models accept strict, so preserve it for them
+        if model.startswith("gpt"):
+            excluded_params.add("strict")
+
+        model_params = {
+            k: v for k, v in optional_params.items() if k not in excluded_params
+        }
+
+        model_version = optional_params.pop("model_version", "latest")
+        template = []
+        for message in messages:
+            if message["role"] == "user":
+                template.append(validate_dict(message, SAPUserMessage))
+            elif message["role"] == "assistant":
+                template.append(validate_dict(message, SAPAssistantMessage))
+            elif message["role"] == "tool":
+                template.append(validate_dict(message, SAPToolChatMessage))
+            else:
+                template.append(validate_dict(message, SAPMessage))
+
+        tools_ = optional_params.pop("tools", [])
+        tools_ = [validate_dict(tool, ChatCompletionTool) for tool in tools_]
+        if tools_ != []:
+            tools = {"tools": tools_}
+        else:
+            tools = {}
+
+        response_format = model_params.pop("response_format", {})
+        resp_type = response_format.get("type", None)
+        if resp_type:
+            if resp_type == "json_schema":
+                response_format = validate_dict(
+                    response_format, ResponseFormatJSONSchema
+                )
+            else:
+                response_format = validate_dict(response_format, ResponseFormat)
+            response_format = {"response_format": response_format}
+        model_params.pop("stream", False)
+        stream_config = {}
+        if "stream_options" in model_params:
+            # stream_config["enabled"] = True
+            stream_options = model_params.pop("stream_options", {})
+            stream_config["chunk_size"] = stream_options.get("chunk_size", 100)
+            if "delimiters" in stream_options:
+                stream_config["delimiters"] = stream_options.get("delimiters")
+        # else:
+        #     stream_config["enabled"] = False
+        config = {
+            "config": {
+                "modules": {
+                    "prompt_templating": {
+                        "prompt": {"template": template, **tools, **response_format},
+                        "model": {
+                            "name": model,
+                            "params": model_params,
+                            "version": model_version,
+                        },
+                    },
+                },
+                "stream": stream_config,
+            }
+        }
+
+        return config
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LiteLLMLoggingObj,
+        request_data: dict,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        logging_obj.post_call(
+            input=messages,
+            api_key=api_key,
+            original_response=raw_response.text,
+            additional_args={"complete_input_dict": request_data},
+        )
+        response = ModelResponse.model_validate(raw_response.json()["final_result"])
+
+        # Strip markdown code blocks if JSON response_format was used with Anthropic models
+        # SAP GenAI Hub with Anthropic models sometimes wraps JSON in ```json ... ```
+        # based on prompt phrasing. GPT/Gemini models don't exhibit this behavior,
+        # so we gate the stripping to avoid accidentally modifying valid responses.
+        response_format = optional_params.get("response_format", {})
+        if response_format.get("type") in ("json_object", "json_schema"):
+            if model.startswith("anthropic"):
+                response = self._strip_markdown_json(response)
+
+        return response
+
+    def _strip_markdown_json(self, response: ModelResponse) -> ModelResponse:
+        """Strip markdown code block wrapper from JSON content if present.
+
+        SAP GenAI Hub with Anthropic models sometimes returns JSON wrapped in
+        markdown code blocks (```json ... ```) depending on prompt phrasing.
+        This method strips that wrapper to ensure consistent JSON output.
+        """
+        import re
+
+        for choice in response.choices or []:
+            if choice.message and choice.message.content:
+                content = choice.message.content.strip()
+                # Match ```json ... ``` or ``` ... ```
+                match = re.match(r"^```(?:json)?\s*\n?(.*?)\n?```$", content, re.DOTALL)
+                if match:
+                    choice.message.content = match.group(1).strip()
+
+        return response
+
+    def get_model_response_iterator(
+        self,
+        streaming_response: Union[Iterator[str], AsyncIterator[str], "ModelResponse"],
+        sync_stream: bool,
+        json_mode: Optional[bool] = False,
+    ):
+        if sync_stream:
+            return SAPStreamIterator(response=streaming_response)  # type: ignore
+        else:
+            return AsyncSAPStreamIterator(response=streaming_response)  # type: ignore