chore: initial snapshot for gitea/github upload

This commit is contained in:
Your Name
2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions

View File

@@ -0,0 +1,260 @@
from __future__ import annotations
import json
import time
from typing import AsyncIterator, Iterator, Optional
import httpx
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.types.llms.openai import OpenAIChatCompletionChunk
from ...custom_httpx.llm_http_handler import BaseLLMHTTPHandler
# -------------------------------
# Errors
# -------------------------------
class GenAIHubOrchestrationError(BaseLLMException):
def __init__(self, status_code: int, message: str):
super().__init__(status_code=status_code, message=message)
self.status_code = status_code
self.message = message
# -------------------------------
# Stream parsing helpers
# -------------------------------
def _now_ts() -> int:
return int(time.time())
def _is_terminal_chunk(chunk: OpenAIChatCompletionChunk) -> bool:
"""OpenAI-shaped chunk is terminal if any choice has a non-None finish_reason."""
try:
for ch in chunk.choices or []:
if ch.finish_reason is not None:
return True
except Exception:
pass
return False
class _StreamParser:
"""Normalize orchestration streaming events into OpenAI-like chunks."""
@staticmethod
def _from_orchestration_result(evt: dict) -> Optional[OpenAIChatCompletionChunk]:
"""
Accepts orchestration_result shape and maps it to an OpenAI-like *chunk*.
"""
orc = evt.get("orchestration_result") or {}
if not orc:
return None
return OpenAIChatCompletionChunk.model_validate(
{
"id": orc.get("id") or evt.get("request_id") or "stream-chunk",
"object": orc.get("object") or "chat.completion.chunk",
"created": orc.get("created") or evt.get("created") or _now_ts(),
"model": orc.get("model") or "unknown",
"choices": [
{
"index": c.get("index", 0),
"delta": c.get("delta") or {},
"finish_reason": c.get("finish_reason"),
}
for c in (orc.get("choices") or [])
],
}
)
@staticmethod
def to_openai_chunk(event_obj: dict) -> Optional[OpenAIChatCompletionChunk]:
"""
Accepts:
- {"final_result": <openai-style CHUNK>} (IMPORTANT: this is just another chunk, NOT terminal)
- {"orchestration_result": {...}} (map to chunk)
- already-openai-shaped chunks
- other events (ignored)
Raises:
- ValueError for in-stream error objects
"""
# In-stream error per spec (surface as exception)
if "code" in event_obj or "error" in event_obj:
raise ValueError(json.dumps(event_obj))
# FINAL RESULT IS *NOT* TERMINAL: treat it as the next chunk
if "final_result" in event_obj:
fr = event_obj["final_result"] or {}
# ensure it looks like an OpenAI chunk
if "object" not in fr:
fr["object"] = "chat.completion.chunk"
return OpenAIChatCompletionChunk.model_validate(fr)
# Orchestration incremental delta
if "orchestration_result" in event_obj:
return _StreamParser._from_orchestration_result(event_obj)
# Already an OpenAI-like chunk
if "choices" in event_obj and "object" in event_obj:
return OpenAIChatCompletionChunk.model_validate(event_obj)
# Unknown / heartbeat / metrics
return None
# -------------------------------
# Iterators
# -------------------------------
class SAPStreamIterator:
"""
Sync iterator over an httpx streaming response that yields OpenAIChatCompletionChunk.
Accepts both SSE `data: ...` and raw JSON lines. Closes on terminal chunk or [DONE].
"""
def __init__(
self,
response: Iterator,
event_prefix: str = "data: ",
final_msg: str = "[DONE]",
):
self._resp = response
self._iter = response
self._prefix = event_prefix
self._final = final_msg
self._done = False
def __iter__(self) -> Iterator[OpenAIChatCompletionChunk]:
return self
def __next__(self) -> OpenAIChatCompletionChunk:
if self._done:
raise StopIteration
for raw in self._iter:
line = (raw or "").strip()
if not line:
continue
payload = (
line[len(self._prefix) :] if line.startswith(self._prefix) else line
)
if payload == self._final:
self._safe_close()
raise StopIteration
try:
obj = json.loads(payload)
except Exception:
continue
try:
chunk = _StreamParser.to_openai_chunk(obj)
except ValueError as e:
self._safe_close()
raise e
if chunk is None:
continue
# Close on terminal
if _is_terminal_chunk(chunk):
self._safe_close()
return chunk
self._safe_close()
raise StopIteration
def _safe_close(self) -> None:
if self._done:
return
else:
self._done = True
class AsyncSAPStreamIterator:
sync_stream = False
def __init__(
self,
response: AsyncIterator,
event_prefix: str = "data: ",
final_msg: str = "[DONE]",
):
self._resp = response
self._prefix = event_prefix
self._final = final_msg
self._line_iter = None
self._done = False
def __aiter__(self):
return self
async def __anext__(self):
if self._done:
raise StopAsyncIteration
if self._line_iter is None:
self._line_iter = self._resp
while True:
try:
raw = await self._line_iter.__anext__()
except (StopAsyncIteration, httpx.ReadError, OSError):
await self._aclose()
raise StopAsyncIteration
line = (raw or "").strip()
if not line:
continue
# now = lambda: int(time.time() * 1000)
payload = (
line[len(self._prefix) :] if line.startswith(self._prefix) else line
)
if payload == self._final:
await self._aclose()
raise StopAsyncIteration
try:
obj = json.loads(payload)
except Exception:
continue
try:
chunk = _StreamParser.to_openai_chunk(obj)
except ValueError as e:
await self._aclose()
raise GenAIHubOrchestrationError(502, str(e))
if chunk is None:
continue
# If terminal, close BEFORE returning. Next __anext__() will stop immediately.
if any(c.finish_reason is not None for c in (chunk.choices or [])):
await self._aclose()
return chunk
async def _aclose(self):
if self._done:
return
else:
self._done = True
# -------------------------------
# LLM handler
# -------------------------------
class GenAIHubOrchestration(BaseLLMHTTPHandler):
def _add_stream_param_to_request_body(
self, data: dict, provider_config: BaseConfig, fake_stream: bool
):
if data.get("config", {}).get("stream", None) is not None:
data["config"]["stream"]["enabled"] = True
else:
data["config"]["stream"] = {"enabled": True}
return data

View File

@@ -0,0 +1,130 @@
from typing import Union, Literal
from pydantic import BaseModel, Field, field_validator
def validate_different_content(v: Union[str, dict, list]) -> str:
if v in ((), {}, []):
return ""
elif isinstance(v, dict) and "text" in v:
return v["text"]
elif isinstance(v, list):
new_v = []
for item in v:
if isinstance(item, dict) and "text" in item:
if item["text"]:
new_v.append(item["text"])
elif isinstance(item, str):
new_v.append(item)
return "\n".join(new_v)
elif isinstance(v, str):
return v
raise ValueError("Content must be a string")
return v
class TextContent(BaseModel):
type_: Literal["text"] = Field(default="text", alias="type")
text: str
class ImageURLContent(BaseModel):
url: str
detail: str = "auto"
class ImageContent(BaseModel):
type_: Literal["image_url"] = Field(default="image_url", alias="type")
image_url: ImageURLContent
class FunctionObj(BaseModel):
name: str
arguments: str
class FunctionTool(BaseModel):
description: str = ""
name: str
parameters: dict = {"type": "object", "properties": {}}
strict: bool = False
@field_validator("parameters", mode="before")
@classmethod
def ensure_object_type(cls, v: dict) -> dict:
"""Ensure parameters has type='object' as required by SAP Orchestration Service."""
if not v:
return {"type": "object", "properties": {}}
if "type" not in v:
v = {"type": "object", **v}
if "properties" not in v:
v["properties"] = {}
return v
class ChatCompletionTool(BaseModel):
type_: Literal["function"] = Field(default="function", alias="type")
function: FunctionTool
class MessageToolCall(BaseModel):
id: str
type_: Literal["function"] = Field(default="function", alias="type")
function: FunctionObj
class SAPMessage(BaseModel):
"""
Model for SystemChatMessage and DeveloperChatMessage
"""
role: Literal["system", "developer"] = "system"
content: str
_content_validator = field_validator("content", mode="before")(
validate_different_content
)
class SAPUserMessage(BaseModel):
role: Literal["user"] = "user"
content: Union[
str, TextContent, ImageContent, list[Union[TextContent, ImageContent]]
]
class SAPAssistantMessage(BaseModel):
role: Literal["assistant"] = "assistant"
content: str = ""
refusal: str = ""
tool_calls: list[MessageToolCall] = []
_content_validator = field_validator("content", mode="before")(
validate_different_content
)
class SAPToolChatMessage(BaseModel):
role: Literal["tool"] = "tool"
tool_call_id: str
content: str
_content_validator = field_validator("content", mode="before")(
validate_different_content
)
class ResponseFormat(BaseModel):
type_: Literal["text", "json_object"] = Field(default="text", alias="type")
class JSONResponseSchema(BaseModel):
description: str = ""
name: str
schema_: dict = Field(default_factory=dict, alias="schema")
strict: bool = False
class ResponseFormatJSONSchema(BaseModel):
type_: Literal["json_schema"] = Field(default="json_schema", alias="type")
json_schema: JSONResponseSchema

View File

@@ -0,0 +1,351 @@
"""
Translate from OpenAI's `/v1/chat/completions` to SAP Generative AI Hub's Orchestration Service`v2/completion`
"""
from typing import (
List,
Optional,
Union,
Dict,
Tuple,
Any,
TYPE_CHECKING,
Iterator,
AsyncIterator,
)
from functools import cached_property
import litellm
import httpx
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ModelResponse
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
LiteLLMLoggingObj = _LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any
from ..credentials import get_token_creator
from .models import (
SAPMessage,
SAPAssistantMessage,
SAPToolChatMessage,
ChatCompletionTool,
ResponseFormatJSONSchema,
ResponseFormat,
SAPUserMessage,
)
from .handler import (
GenAIHubOrchestrationError,
AsyncSAPStreamIterator,
SAPStreamIterator,
)
def validate_dict(data: dict, model) -> dict:
return model(**data).model_dump(by_alias=True)
class GenAIHubOrchestrationConfig(OpenAIGPTConfig):
frequency_penalty: Optional[int] = None
function_call: Optional[Union[str, dict]] = None
functions: Optional[list] = None
logit_bias: Optional[dict] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[int] = None
stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
response_format: Optional[dict] = None
tools: Optional[list] = None
tool_choice: Optional[Union[str, dict]] = None #
model_version: str = "latest"
def __init__(
self,
frequency_penalty: Optional[int] = None,
function_call: Optional[Union[str, dict]] = None,
functions: Optional[list] = None,
logit_bias: Optional[dict] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
response_format: Optional[dict] = None,
tools: Optional[list] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
self.token_creator = None
self._base_url = None
self._resource_group = None
def run_env_setup(self, service_key: Optional[str] = None) -> None:
try:
self.token_creator, self._base_url, self._resource_group = get_token_creator(service_key) # type: ignore
except ValueError as err:
raise GenAIHubOrchestrationError(status_code=400, message=err.args[0])
@property
def headers(self) -> Dict[str, str]:
if self.token_creator is None:
self.run_env_setup()
access_token = self.token_creator() # type: ignore
return {
"Authorization": access_token,
"AI-Resource-Group": self.resource_group,
"Content-Type": "application/json",
"AI-Client-Type": "LiteLLM",
}
@property
def base_url(self) -> str:
if self._base_url is None:
self.run_env_setup()
return self._base_url # type: ignore
@property
def resource_group(self) -> str:
if self._resource_group is None:
self.run_env_setup()
return self._resource_group # type: ignore
@cached_property
def deployment_url(self) -> str:
# Keep a short, tight client lifecycle here to avoid fd leaks
client = litellm.module_level_client
# with httpx.Client(timeout=30) as client:
deployments = client.get(
f"{self.base_url}/lm/deployments", headers=self.headers
).json()
valid: List[Tuple[str, str]] = []
for dep in deployments.get("resources", []):
if dep.get("scenarioId") == "orchestration":
cfg = client.get(
f'{self.base_url}/lm/configurations/{dep["configurationId"]}',
headers=self.headers,
).json()
if cfg.get("executableId") == "orchestration":
valid.append((dep["deploymentUrl"], dep["createdAt"]))
# newest first
return sorted(valid, key=lambda x: x[1], reverse=True)[0][0]
@classmethod
def get_config(cls):
return super().get_config()
def get_supported_openai_params(self, model):
params = [
"frequency_penalty",
"logit_bias",
"logprobs",
"top_logprobs",
"max_tokens",
"max_completion_tokens",
"prediction",
"n",
"presence_penalty",
"seed",
"stop",
"stream",
"stream_options",
"temperature",
"top_p",
"tools",
"tool_choice",
"function_call",
"functions",
"extra_headers",
"parallel_tool_calls",
"response_format",
"timeout",
]
# Remove response_format for providers that don't support it on SAP GenAI Hub
if (
model.startswith("amazon")
or model.startswith("cohere")
or model.startswith("alephalpha")
or model == "gpt-4"
):
params.remove("response_format")
if model.startswith("gemini") or model.startswith("amazon"):
params.remove("tool_choice")
return params
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
if api_key:
self.run_env_setup(api_key)
return self.headers
def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
):
api_base_ = f"{self.deployment_url}/v2/completion"
return api_base_
def transform_request(
self,
model: str,
messages: List[Dict[str, str]], # type: ignore
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
# Filter out parameters that are not valid model params for SAP Orchestration API
# - tools, model_version, deployment_url: handled separately
excluded_params = {"tools", "model_version", "deployment_url"}
# Filter strict for GPT models only - SAP AI Core doesn't accept it as a model param
# LangChain agents pass strict=true at top level, which fails for GPT models
# Anthropic models accept strict, so preserve it for them
if model.startswith("gpt"):
excluded_params.add("strict")
model_params = {
k: v for k, v in optional_params.items() if k not in excluded_params
}
model_version = optional_params.pop("model_version", "latest")
template = []
for message in messages:
if message["role"] == "user":
template.append(validate_dict(message, SAPUserMessage))
elif message["role"] == "assistant":
template.append(validate_dict(message, SAPAssistantMessage))
elif message["role"] == "tool":
template.append(validate_dict(message, SAPToolChatMessage))
else:
template.append(validate_dict(message, SAPMessage))
tools_ = optional_params.pop("tools", [])
tools_ = [validate_dict(tool, ChatCompletionTool) for tool in tools_]
if tools_ != []:
tools = {"tools": tools_}
else:
tools = {}
response_format = model_params.pop("response_format", {})
resp_type = response_format.get("type", None)
if resp_type:
if resp_type == "json_schema":
response_format = validate_dict(
response_format, ResponseFormatJSONSchema
)
else:
response_format = validate_dict(response_format, ResponseFormat)
response_format = {"response_format": response_format}
model_params.pop("stream", False)
stream_config = {}
if "stream_options" in model_params:
# stream_config["enabled"] = True
stream_options = model_params.pop("stream_options", {})
stream_config["chunk_size"] = stream_options.get("chunk_size", 100)
if "delimiters" in stream_options:
stream_config["delimiters"] = stream_options.get("delimiters")
# else:
# stream_config["enabled"] = False
config = {
"config": {
"modules": {
"prompt_templating": {
"prompt": {"template": template, **tools, **response_format},
"model": {
"name": model,
"params": model_params,
"version": model_version,
},
},
},
"stream": stream_config,
}
}
return config
def transform_response(
self,
model: str,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
request_data: dict,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
logging_obj.post_call(
input=messages,
api_key=api_key,
original_response=raw_response.text,
additional_args={"complete_input_dict": request_data},
)
response = ModelResponse.model_validate(raw_response.json()["final_result"])
# Strip markdown code blocks if JSON response_format was used with Anthropic models
# SAP GenAI Hub with Anthropic models sometimes wraps JSON in ```json ... ```
# based on prompt phrasing. GPT/Gemini models don't exhibit this behavior,
# so we gate the stripping to avoid accidentally modifying valid responses.
response_format = optional_params.get("response_format", {})
if response_format.get("type") in ("json_object", "json_schema"):
if model.startswith("anthropic"):
response = self._strip_markdown_json(response)
return response
def _strip_markdown_json(self, response: ModelResponse) -> ModelResponse:
"""Strip markdown code block wrapper from JSON content if present.
SAP GenAI Hub with Anthropic models sometimes returns JSON wrapped in
markdown code blocks (```json ... ```) depending on prompt phrasing.
This method strips that wrapper to ensure consistent JSON output.
"""
import re
for choice in response.choices or []:
if choice.message and choice.message.content:
content = choice.message.content.strip()
# Match ```json ... ``` or ``` ... ```
match = re.match(r"^```(?:json)?\s*\n?(.*?)\n?```$", content, re.DOTALL)
if match:
choice.message.content = match.group(1).strip()
return response
def get_model_response_iterator(
self,
streaming_response: Union[Iterator[str], AsyncIterator[str], "ModelResponse"],
sync_stream: bool,
json_mode: Optional[bool] = False,
):
if sync_stream:
return SAPStreamIterator(response=streaming_response) # type: ignore
else:
return AsyncSAPStreamIterator(response=streaming_response) # type: ignore