chore: initial public snapshot for github upload

2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/guardrails/guardrail_hooks/noma/init.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/guardrails/guardrail_hooks/noma/init.py
@@ -0,0 +1,66 @@
+from typing import TYPE_CHECKING
+
+from litellm.types.guardrails import SupportedGuardrailIntegrations
+
+from .noma import NomaGuardrail
+from .noma_v2 import NomaV2Guardrail
+
+if TYPE_CHECKING:
+    from litellm.types.guardrails import Guardrail, LitellmParams
+
+
+def initialize_guardrail(litellm_params: "LitellmParams", guardrail: "Guardrail"):
+    use_v2 = getattr(litellm_params, "use_v2", False)
+    if isinstance(use_v2, str):
+        use_v2 = use_v2.lower() == "true"
+    if use_v2:
+        return initialize_guardrail_v2(
+            litellm_params=litellm_params, guardrail=guardrail
+        )
+
+    import litellm
+
+    _noma_callback = NomaGuardrail(
+        guardrail_name=guardrail.get("guardrail_name", ""),
+        api_key=litellm_params.api_key,
+        api_base=litellm_params.api_base,
+        application_id=litellm_params.application_id,
+        monitor_mode=litellm_params.monitor_mode,
+        block_failures=litellm_params.block_failures,
+        anonymize_input=litellm_params.anonymize_input,
+        event_hook=litellm_params.mode,
+        default_on=litellm_params.default_on,
+    )
+    litellm.logging_callback_manager.add_litellm_callback(_noma_callback)
+
+    return _noma_callback
+
+
+def initialize_guardrail_v2(litellm_params: "LitellmParams", guardrail: "Guardrail"):
+    import litellm
+
+    _noma_v2_callback = NomaV2Guardrail(
+        guardrail_name=guardrail.get("guardrail_name", ""),
+        api_key=litellm_params.api_key,
+        api_base=litellm_params.api_base,
+        application_id=litellm_params.application_id,
+        monitor_mode=litellm_params.monitor_mode,
+        block_failures=litellm_params.block_failures,
+        event_hook=litellm_params.mode,
+        default_on=litellm_params.default_on,
+    )
+    litellm.logging_callback_manager.add_litellm_callback(_noma_v2_callback)
+
+    return _noma_v2_callback
+
+
+guardrail_initializer_registry = {
+    SupportedGuardrailIntegrations.NOMA.value: initialize_guardrail,
+    SupportedGuardrailIntegrations.NOMA_V2.value: initialize_guardrail_v2,
+}
+
+
+guardrail_class_registry = {
+    SupportedGuardrailIntegrations.NOMA.value: NomaGuardrail,
+    SupportedGuardrailIntegrations.NOMA_V2.value: NomaV2Guardrail,
+}
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/guardrails/guardrail_hooks/noma/noma.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/guardrails/guardrail_hooks/noma/noma.py
@@ -0,0 +1,914 @@
+# +-------------------------------------------------------------+
+#
+#           Noma Security Guardrail Integration for LiteLLM
+#                       https://noma.security
+#
+# +-------------------------------------------------------------+
+
+import asyncio
+import json
+import os
+import warnings
+from datetime import datetime
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncGenerator,
+    Dict,
+    Final,
+    List,
+    Literal,
+    Optional,
+    Type,
+    Union,
+)
+from urllib.parse import urljoin
+
+from fastapi import HTTPException
+
+import litellm
+from litellm import DualCache, ModelResponse
+from litellm._logging import verbose_proxy_logger
+from litellm.completion_extras.litellm_responses_transformation.transformation import (
+    LiteLLMResponsesTransformationHandler,
+)
+from litellm.integrations.custom_guardrail import CustomGuardrail
+from litellm.llms.base_llm.base_model_iterator import MockResponseIterator
+from litellm.llms.custom_httpx.http_handler import (
+    get_async_httpx_client,
+    httpxSpecialProvider,
+)
+from litellm.main import stream_chunk_builder
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.guardrails import GuardrailEventHooks
+from litellm.types.utils import (
+    CallTypes,
+    CallTypesLiteral,
+    EmbeddingResponse,
+    GuardrailStatus,
+    ImageResponse,
+    ModelResponseStream,
+    TextCompletionResponse,
+)
+
+# Constants
+USER_ROLE: Final[Literal["user"]] = "user"
+ASSISTANT_ROLE: Final[Literal["assistant"]] = "assistant"
+SENSITIVE_DATA_DETECTOR_KEYS: Final[list[str]] = ["sensitiveData", "dataDetector"]
+
+# Type aliases
+MessageRole = Literal["user", "assistant"]
+LLMResponse = Union[Any, ModelResponse, EmbeddingResponse, ImageResponse]
+_LEGACY_NOMA_DEPRECATION_WARNED = False
+
+if TYPE_CHECKING:
+    from litellm.types.proxy.guardrails.guardrail_hooks.base import GuardrailConfigModel
+
+
+class NomaBlockedMessage(HTTPException):
+    """Exception raised when Noma guardrail blocks a message"""
+
+    def __init__(self, classification_response: dict):
+        super().__init__(
+            status_code=400,
+            detail={
+                "error": "Request blocked by Noma guardrail",
+                "details": classification_response,
+            },
+        )
+
+    def _is_result_true(self, result_obj: Optional[Dict[str, Any]]) -> bool:
+        """
+        Check if a result object has a "result" field that is True.
+
+        Args:
+            result_obj: A dictionary that may contain a "result" field
+
+        Returns:
+            True if the "result" field exists and is True, False otherwise
+        """
+        if not result_obj or not isinstance(result_obj, dict):
+            return False
+
+        return result_obj.get("result") is True
+
+
+class NomaGuardrail(CustomGuardrail):
+    """
+    Noma Security Guardrail for LiteLLM
+
+    This guardrail integrates with Noma Security's AI-DR API to provide
+    content moderation and safety checks for LLM inputs and outputs.
+    """
+
+    _DEFAULT_API_BASE = "https://api.noma.security/"
+    _AIDR_ENDPOINT = "/ai-dr/v2/prompt/scan"
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        application_id: Optional[str] = None,
+        monitor_mode: Optional[bool] = None,
+        block_failures: Optional[bool] = None,
+        anonymize_input: Optional[bool] = None,
+        **kwargs,
+    ):
+        global _LEGACY_NOMA_DEPRECATION_WARNED
+        if not _LEGACY_NOMA_DEPRECATION_WARNED:
+            warnings.warn(
+                "Guardrail provider 'noma' is deprecated. "
+                "Please migrate to 'noma_v2'. "
+                "The legacy 'noma' API will no longer be supported after March 31, 2026.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            _LEGACY_NOMA_DEPRECATION_WARNED = True
+
+        self.async_handler = get_async_httpx_client(
+            llm_provider=httpxSpecialProvider.GuardrailCallback
+        )
+        self._responses_transform_handler = LiteLLMResponsesTransformationHandler()
+        self.api_key = api_key or os.environ.get("NOMA_API_KEY")
+        self.api_base = api_base or os.environ.get(
+            "NOMA_API_BASE", NomaGuardrail._DEFAULT_API_BASE
+        )
+        self.application_id = application_id or os.environ.get("NOMA_APPLICATION_ID")
+        self.default_application_id = "litellm"
+
+        if monitor_mode is None:
+            self.monitor_mode = (
+                os.environ.get("NOMA_MONITOR_MODE", "false").lower() == "true"
+            )
+        else:
+            self.monitor_mode = monitor_mode
+
+        if block_failures is None:
+            self.block_failures = (
+                os.environ.get("NOMA_BLOCK_FAILURES", "true").lower() == "true"
+            )
+        else:
+            self.block_failures = block_failures
+
+        if anonymize_input is None:
+            self.anonymize_input = (
+                os.environ.get("NOMA_ANONYMIZE_INPUT", "false").lower() == "true"
+            )
+        else:
+            self.anonymize_input = anonymize_input
+
+        super().__init__(**kwargs)
+
+    def _create_background_noma_check(
+        self,
+        coro,
+    ) -> None:
+        """Create a background task for Noma API calls without blocking the main flow"""
+        try:
+            asyncio.create_task(coro)
+        except Exception as e:
+            verbose_proxy_logger.error(
+                f"Failed to create background Noma task: {str(e)}"
+            )
+
+    async def _process_user_message_check(
+        self,
+        request_data: dict,
+        user_auth: UserAPIKeyAuth,
+        event_type: Optional[GuardrailEventHooks] = None,
+    ) -> Optional[str]:
+        """Shared logic for processing user message checks"""
+        start_time = datetime.now()
+        extra_data = self.get_guardrail_dynamic_request_body_params(request_data)
+
+        messages = request_data.get("messages") or []
+        if not messages:
+            return None
+
+        input_items, instructions = self._responses_transform_handler.convert_chat_completion_messages_to_responses_api(  # type: ignore[arg-type]
+            messages
+        )
+
+        if instructions:
+            system_message = {
+                "type": "message",
+                "role": "system",
+                "content": [
+                    {"type": "input_text", "text": instructions},
+                ],
+            }
+            input_items.insert(0, system_message)
+
+        if not input_items:
+            return None
+
+        payload = {"input": input_items}
+        response_json = await self._call_noma_api(
+            payload=payload,
+            llm_request_id=None,
+            request_data=request_data,
+            user_auth=user_auth,
+            extra_data=extra_data,
+        )
+
+        end_time = datetime.now()
+        duration = (end_time - start_time).total_seconds()
+
+        # Determine guardrail status based on response
+        guardrail_status = self._determine_guardrail_status(response_json)
+
+        # Always log guardrail information for consistency
+        self.add_standard_logging_guardrail_information_to_request_data(
+            guardrail_provider="noma",
+            guardrail_json_response=response_json,
+            request_data=request_data,
+            guardrail_status=guardrail_status,
+            start_time=start_time.timestamp(),
+            end_time=end_time.timestamp(),
+            duration=duration,
+            event_type=event_type,
+        )
+
+        if self.monitor_mode:
+            await self._handle_verdict_background(
+                USER_ROLE, json.dumps(input_items), response_json
+            )
+            return json.dumps(input_items)
+
+        # Check if we should anonymize content
+        if self._should_anonymize(response_json, USER_ROLE):
+            anonymized_content = self._extract_anonymized_content(
+                response_json, USER_ROLE
+            )
+            if anonymized_content:
+                # Replace the user message content with anonymized version
+                self._replace_user_message_content(request_data, anonymized_content)
+                verbose_proxy_logger.debug(
+                    f"Noma guardrail anonymized user message: {anonymized_content}"
+                )
+                return anonymized_content
+
+        await self._check_verdict(USER_ROLE, json.dumps(input_items), response_json)
+        return json.dumps(input_items)
+
+    async def _process_llm_response_check(
+        self,
+        request_data: dict,
+        response: LLMResponse,
+        user_auth: UserAPIKeyAuth,
+        event_type: Optional[GuardrailEventHooks] = None,
+    ) -> Optional[str]:
+        """Shared logic for processing LLM response checks"""
+
+        start_time = datetime.now()
+        extra_data = self.get_guardrail_dynamic_request_body_params(request_data)
+
+        if not isinstance(response, litellm.ModelResponse):
+            return None
+
+        content = None
+        for choice in response.choices:
+            if isinstance(choice, litellm.Choices) and choice.message.content:
+                content = choice.message.content
+                break
+
+        if not content or not isinstance(content, str):
+            return None
+
+        payload = {
+            "input": [
+                {
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{"type": "input_text", "text": content}],
+                }
+            ]
+        }
+
+        response_json = await self._call_noma_api(
+            payload=payload,
+            llm_request_id=response.id,
+            request_data=request_data,
+            user_auth=user_auth,
+            extra_data=extra_data,
+        )
+
+        end_time = datetime.now()
+        duration = (end_time - start_time).total_seconds()
+
+        # Determine guardrail status based on response
+        guardrail_status = self._determine_guardrail_status(response_json)
+
+        # Always log guardrail information for consistency
+        self.add_standard_logging_guardrail_information_to_request_data(
+            guardrail_provider="noma",
+            guardrail_json_response=response_json,
+            request_data=request_data,
+            guardrail_status=guardrail_status,
+            start_time=start_time.timestamp(),
+            end_time=end_time.timestamp(),
+            duration=duration,
+            event_type=event_type,
+        )
+
+        if self.monitor_mode:
+            await self._handle_verdict_background(
+                ASSISTANT_ROLE, json.dumps(content), response_json
+            )
+            return content
+
+        # Check if we should anonymize content
+        if self._should_anonymize(response_json, ASSISTANT_ROLE):
+            anonymized_content = self._extract_anonymized_content(
+                response_json, ASSISTANT_ROLE
+            )
+            if anonymized_content:
+                # Replace the LLM response content with anonymized version
+                self._replace_llm_response_content(response, anonymized_content)
+                verbose_proxy_logger.debug(
+                    f"Noma guardrail anonymized LLM response: {anonymized_content}"
+                )
+                return anonymized_content
+
+        await self._check_verdict(ASSISTANT_ROLE, content, response_json)
+        return content
+
+    def _determine_guardrail_status(self, response_json: dict) -> GuardrailStatus:
+        """
+        Determine the guardrail status based on NOMA API response.
+
+        Args:
+            response_json: Response from NOMA API
+
+        Returns:
+            "success": Content allowed through with no violations
+            "guardrail_intervened": Content blocked due to policy violations
+            "guardrail_failed_to_respond": Technical error or API failure
+        """
+        try:
+            # Check if we got a valid response structure
+            if not isinstance(response_json, dict):
+                return "guardrail_failed_to_respond"
+
+            # Get the aggregatedScanResult from the response
+            # aggregatedScanResult=True means unsafe (block), False means safe (allow)
+            aggregated_scan_result = response_json.get("aggregatedScanResult", False)
+
+            # If aggregatedScanResult is False, content is safe/allowed
+            if aggregated_scan_result is False:
+                return "success"
+
+            # If aggregatedScanResult is True, content is blocked/flagged
+            if aggregated_scan_result is True:
+                return "guardrail_intervened"
+
+            # If aggregatedScanResult is missing or invalid, treat as failure
+            return "guardrail_failed_to_respond"
+
+        except Exception as e:
+            verbose_proxy_logger.error(
+                f"Error determining NOMA guardrail status: {str(e)}"
+            )
+            return "guardrail_failed_to_respond"
+
+    def _should_only_sensitive_data_failed(self, classification_obj: dict) -> bool:
+        """
+        Check if only sensitive data detectors (PII, PCI, secrets) have result=true in the classification.
+
+        Args:
+            classification_obj: The prompt or response classification object from Noma API
+
+        Returns:
+            True if only sensitiveData detectors have result=true, False otherwise
+        """
+        if not classification_obj:
+            return False
+
+        # Track which detectors have result=true (detected violations)
+        failed_detectors = []
+        sensitive_data_detected = False
+
+        for key, value in classification_obj.items():
+            if key in SENSITIVE_DATA_DETECTOR_KEYS and isinstance(value, dict):
+                # Check if any sensitive data detector has result=true
+                for data_type, data_result in value.items():
+                    if self._is_result_true(data_result):
+                        sensitive_data_detected = True
+                        # Don't add to failed_detectors as we want to allow these
+
+            elif isinstance(value, dict) and "result" in value:
+                # Check other detectors - these should NOT have result=true
+                if self._is_result_true(value):
+                    failed_detectors.append(key)
+
+            elif isinstance(value, dict):
+                # Handle nested detectors
+                for nested_key, nested_value in value.items():
+                    if self._is_result_true(nested_value):
+                        failed_detectors.append(f"{key}.{nested_key}")
+
+        # Return True only if sensitive data was detected AND no other detectors have result=true
+        return sensitive_data_detected and len(failed_detectors) == 0
+
+    def _extract_anonymized_content(
+        self, response_json: dict, message_type: MessageRole
+    ) -> Optional[str]:
+        """
+        Extract anonymized content from Noma API response.
+
+        Args:
+            response_json: The full response from Noma API
+            message_type: Either 'user' or 'assistant' to determine which content to extract
+
+        Returns:
+            The anonymized content string if available, None otherwise
+        """
+        # Extract from new scanResult structure
+        scan_result = response_json.get("scanResult", [])
+        if not scan_result:
+            return None
+
+        # Find the scan result matching the message type (role)
+        for result_item in scan_result:
+            if result_item.get("role") == message_type:
+                return (
+                    result_item.get("results", {})
+                    .get("anonymizedContent", {})
+                    .get("anonymized", "")
+                )
+
+        return None
+
+    def _should_anonymize(self, response_json: dict, message_type: MessageRole) -> bool:
+        """
+        Determine if content should be anonymized based on Noma API response.
+
+        Logic:
+        - If aggregatedScanResult=False: Content is safe, anonymize if anonymized version exists
+        - If aggregatedScanResult=True: Check if only sensitiveData detectors have result=True
+          - If yes: Anonymize
+          - If no: Block (other violations detected)
+
+        Args:
+            response_json: The full response from Noma API
+            message_type: Either 'user' or 'assistant' to determine which classification to check
+
+        Returns:
+            True if content should be anonymized, False if it should be blocked
+        """
+        # Only anonymize in blocking mode when anonymize_input is enabled
+        if self.monitor_mode or not self.anonymize_input:
+            return False
+
+        # aggregatedScanResult=False means safe, True means unsafe
+        aggregated_scan_result = response_json.get("aggregatedScanResult", False)
+
+        # If aggregatedScanResult is False, content is safe - anonymize if available
+        if not aggregated_scan_result:
+            return True
+
+        # If aggregatedScanResult is True (unsafe), check if only sensitive data detectors triggered
+        scan_result = response_json.get("scanResult", [])
+        if not scan_result:
+            return False
+
+        if not isinstance(scan_result, list) or len(scan_result) == 0:
+            return False
+
+        for result_item in scan_result:
+            if result_item.get("role") == message_type:
+                return self._should_only_sensitive_data_failed(
+                    result_item.get("results", {})
+                )
+
+        return False
+
+    def _is_result_true(self, result_obj: Optional[Dict[str, Any]]) -> bool:
+        """
+        Check if a result object has a "result" field that is True.
+
+        Args:
+            result_obj: A dictionary that may contain a "result" field
+
+        Returns:
+            True if the "result" field exists and is True, False otherwise
+        """
+        if not result_obj or not isinstance(result_obj, dict):
+            return False
+
+        return result_obj.get("result") is True
+
+    def _replace_user_message_content(
+        self, request_data: dict, anonymized_content: str
+    ):
+        """
+        Replace the user message content in request data with anonymized version.
+
+        Args:
+            request_data: The original request data
+            anonymized_content: The anonymized content to replace with
+        """
+        messages = request_data.get("messages", [])
+        if not messages:
+            return
+
+        # Find and replace the last user message
+        for i in range(len(messages) - 1, -1, -1):
+            if messages[i].get("role") == USER_ROLE:
+                messages[i]["content"] = anonymized_content
+                break
+
+    def _replace_llm_response_content(
+        self, response: LLMResponse, anonymized_content: str
+    ):
+        """
+        Replace the LLM response content with anonymized version.
+
+        Args:
+            response: The original LLM response
+            anonymized_content: The anonymized content to replace with
+        """
+        if not isinstance(response, litellm.ModelResponse):
+            return
+
+        # Replace content in all choices
+        for choice in response.choices:
+            if isinstance(choice, litellm.Choices) and choice.message.content:
+                choice.message.content = anonymized_content
+
+    async def _check_user_message_background(
+        self,
+        request_data: dict,
+        user_auth: UserAPIKeyAuth,
+    ) -> None:
+        """Check user message in background for monitor mode - non-blocking"""
+        try:
+            await self._process_user_message_check(request_data, user_auth)
+        except Exception as e:
+            verbose_proxy_logger.error(
+                f"Noma background user message check failed: {str(e)}"
+            )
+
+    async def _check_llm_response_background(
+        self,
+        request_data: dict,
+        response: LLMResponse,
+        user_auth: UserAPIKeyAuth,
+    ) -> None:
+        """Check LLM response in background for monitor mode - non-blocking"""
+        try:
+            await self._process_llm_response_check(request_data, response, user_auth)
+        except Exception as e:
+            verbose_proxy_logger.error(
+                f"Noma background response check failed: {str(e)}"
+            )
+
+    async def _handle_verdict_background(
+        self,
+        type: MessageRole,
+        message: str,
+        response_json: dict,
+    ) -> None:
+        """Handle aggregatedScanResult from Noma API in background - logging only, never blocks
+        aggregatedScanResult=True means unsafe, False means safe
+        """
+        try:
+            # aggregatedScanResult=True means blocked, False means allowed
+            aggregated_scan_result = response_json.get("aggregatedScanResult", False)
+
+            if aggregated_scan_result:  # True = unsafe
+                msg = f"Noma guardrail blocked {type} message: {message}"
+                verbose_proxy_logger.warning(msg)
+            else:  # False = safe
+                msg = f"Noma guardrail allowed {type} message: {message}"
+                verbose_proxy_logger.info(msg)
+        except Exception as e:
+            verbose_proxy_logger.error(
+                f"Noma background verdict handling failed: {str(e)}"
+            )
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: CallTypesLiteral,
+    ) -> Optional[Union[Exception, str, dict]]:
+        verbose_proxy_logger.debug("Running Noma pre-call hook")
+
+        event_type = GuardrailEventHooks.pre_call
+        if call_type == CallTypes.call_mcp_tool.value:
+            event_type = GuardrailEventHooks.pre_mcp_call
+
+        if self.should_run_guardrail(data=data, event_type=event_type) is False:
+            return data
+
+        # In monitor mode, run Noma check in background and return immediately
+        if self.monitor_mode:
+            try:
+                self._create_background_noma_check(
+                    self._check_user_message_background(data, user_api_key_dict)
+                )
+            except Exception as e:
+                verbose_proxy_logger.error(
+                    f"Failed to start background Noma pre-call check: {str(e)}"
+                )
+            return data
+
+        try:
+            return await self._check_user_message(
+                data, user_api_key_dict, GuardrailEventHooks.pre_call
+            )
+        except NomaBlockedMessage:
+            # Blocked requests were already logged in _process_user_message_check with "blocked" status
+            raise
+        except Exception as e:
+            # Log technical failures
+            from datetime import datetime
+
+            start_time = datetime.now()
+            self.add_standard_logging_guardrail_information_to_request_data(
+                guardrail_provider="noma",
+                guardrail_json_response=str(e),
+                request_data=data,
+                guardrail_status="guardrail_failed_to_respond",
+                start_time=start_time.timestamp(),
+                end_time=start_time.timestamp(),
+                duration=0.0,
+                event_type=GuardrailEventHooks.pre_call,
+            )
+
+            verbose_proxy_logger.error(f"Noma pre-call hook failed: {str(e)}")
+
+            if self.block_failures:
+                raise
+            return data
+
+    async def async_moderation_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        call_type: CallTypesLiteral,
+    ) -> Union[Exception, str, dict, None]:
+        event_type: GuardrailEventHooks = GuardrailEventHooks.during_call
+        if call_type == CallTypes.call_mcp_tool.value:
+            event_type = GuardrailEventHooks.pre_mcp_call
+
+        if self.should_run_guardrail(data=data, event_type=event_type) is not True:
+            return data
+
+        # In monitor mode, run Noma check in background and return immediately
+        if self.monitor_mode:
+            try:
+                self._create_background_noma_check(
+                    self._check_user_message_background(data, user_api_key_dict)
+                )
+            except Exception as e:
+                verbose_proxy_logger.error(
+                    f"Failed to start background Noma moderation check: {str(e)}"
+                )
+            return data
+
+        try:
+            return await self._check_user_message(
+                data, user_api_key_dict, GuardrailEventHooks.during_call
+            )
+        except NomaBlockedMessage:
+            # Blocked requests were already logged in _process_user_message_check with "blocked" status
+            raise
+        except Exception as e:
+            # Log technical failures
+            from datetime import datetime
+
+            start_time = datetime.now()
+            self.add_standard_logging_guardrail_information_to_request_data(
+                guardrail_provider="noma",
+                guardrail_json_response=str(e),
+                request_data=data,
+                guardrail_status="guardrail_failed_to_respond",
+                start_time=start_time.timestamp(),
+                end_time=start_time.timestamp(),
+                duration=0.0,
+                event_type=GuardrailEventHooks.during_call,
+            )
+
+            verbose_proxy_logger.error(f"Noma moderation hook failed: {str(e)}")
+
+            if self.block_failures:
+                raise
+            return data
+
+    async def async_post_call_success_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: LLMResponse,
+    ):
+        event_type: GuardrailEventHooks = GuardrailEventHooks.post_call
+        if self.should_run_guardrail(data=data, event_type=event_type) is not True:
+            return response
+
+        # In monitor mode, run Noma check in background and return immediately
+        if self.monitor_mode:
+            try:
+                self._create_background_noma_check(
+                    self._check_llm_response_background(
+                        data, response, user_api_key_dict
+                    )
+                )
+            except Exception as e:
+                verbose_proxy_logger.error(
+                    f"Failed to start background Noma post-call check: {str(e)}"
+                )
+            return response
+
+        try:
+            return await self._check_llm_response(
+                data, response, user_api_key_dict, GuardrailEventHooks.post_call
+            )
+        except NomaBlockedMessage:
+            # Blocked requests were already logged in _process_llm_response_check with "blocked" status
+            raise
+        except Exception as e:
+            # Log technical failures
+            from datetime import datetime
+
+            start_time = datetime.now()
+            self.add_standard_logging_guardrail_information_to_request_data(
+                guardrail_provider="noma",
+                guardrail_json_response=str(e),
+                request_data=data,
+                guardrail_status="guardrail_failed_to_respond",
+                start_time=start_time.timestamp(),
+                end_time=start_time.timestamp(),
+                duration=0.0,
+                event_type=GuardrailEventHooks.post_call,
+            )
+
+            verbose_proxy_logger.error(f"Noma post-call hook failed: {str(e)}")
+            if self.block_failures:
+                raise
+            return response
+
+    async def _check_user_message(
+        self,
+        request_data: dict,
+        user_auth: UserAPIKeyAuth,
+        event_type: Optional[GuardrailEventHooks] = None,
+    ) -> Union[Exception, str, dict, None]:
+        """Check user message for policy violations"""
+        user_message = await self._process_user_message_check(
+            request_data, user_auth, event_type
+        )
+        if not user_message:
+            return request_data
+
+        return request_data
+
+    async def _check_llm_response(
+        self,
+        request_data: dict,
+        response: LLMResponse,
+        user_auth: UserAPIKeyAuth,
+        event_type: Optional[GuardrailEventHooks] = None,
+    ) -> Any:
+        """Check LLM response for policy violations"""
+        content = await self._process_llm_response_check(
+            request_data, response, user_auth, event_type
+        )
+        if not content:
+            return response
+
+        return response
+
+    async def _call_noma_api(
+        self,
+        payload: dict,
+        llm_request_id: Optional[str],
+        request_data: dict,
+        user_auth: UserAPIKeyAuth,
+        extra_data: dict,
+    ) -> dict:
+        call_id = request_data.get("litellm_call_id")
+        headers = {
+            **({"Authorization": f"Bearer {self.api_key}"} if self.api_key else {}),
+            **({"X-Noma-Request-ID": call_id} if call_id else {}),
+        }
+        endpoint = urljoin(
+            self.api_base or "https://api.noma.security/", NomaGuardrail._AIDR_ENDPOINT
+        )
+
+        response = await self.async_handler.post(
+            endpoint,
+            headers=headers,
+            json={
+                **payload,
+                "x-noma-context": {
+                    "applicationId": extra_data.get("application_id")
+                    or request_data.get("metadata", {})
+                    .get("headers", {})
+                    .get("x-noma-application-id")
+                    or self.application_id
+                    or user_auth.key_alias
+                    or self.default_application_id,
+                    "ipAddress": request_data.get("metadata", {}).get(
+                        "requester_ip_address", None
+                    ),
+                    "userId": (
+                        user_auth.user_email
+                        if user_auth.user_email
+                        else user_auth.user_id
+                    ),
+                    "sessionId": call_id,
+                    "requestId": llm_request_id,
+                },
+            },
+        )
+        response.raise_for_status()
+
+        return response.json()
+
+    async def _check_verdict(
+        self,
+        type: MessageRole,
+        message: str,
+        response_json: dict,
+    ) -> None:
+        """
+        Check the aggregatedScanResult from the Noma API and raise an exception if needed.
+        aggregatedScanResult=True means unsafe (block), False means safe (allow)
+        """
+        # aggregatedScanResult=True means blocked, False means allowed
+        aggregated_scan_result = response_json.get("aggregatedScanResult", False)
+
+        if aggregated_scan_result:  # True = unsafe, block it
+            msg = f"Noma guardrail blocked {type} message: {message}"
+
+            if self.monitor_mode:
+                verbose_proxy_logger.warning(msg)
+            else:
+                verbose_proxy_logger.debug(msg)
+                original_response = response_json.get("scanResult", {})
+                # Use the full response as the original response for error details
+                raise NomaBlockedMessage(original_response)
+        else:  # False = safe, allow it
+            msg = f"Noma guardrail allowed {type} message: {message}"
+            if self.monitor_mode:
+                verbose_proxy_logger.info(msg)
+            else:
+                verbose_proxy_logger.debug(msg)
+
+    @staticmethod
+    def get_config_model() -> Optional[Type["GuardrailConfigModel"]]:
+        from litellm.types.proxy.guardrails.guardrail_hooks.noma import (
+            NomaGuardrailConfigModel,
+        )
+
+        return NomaGuardrailConfigModel
+
+    async def async_post_call_streaming_iterator_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: Any,
+        request_data: dict,
+    ) -> AsyncGenerator[ModelResponseStream, None]:
+        """Process streaming response chunks with Noma guardrail."""
+
+        all_chunks: List[ModelResponseStream] = []
+        async for chunk in response:
+            all_chunks.append(chunk)
+
+        if not all_chunks:
+            return
+
+        assembled_model_response: Optional[
+            Union[ModelResponse, TextCompletionResponse]
+        ] = stream_chunk_builder(chunks=all_chunks)
+
+        if isinstance(assembled_model_response, ModelResponse):
+            try:
+                processed_response = await self._check_llm_response(
+                    request_data,
+                    assembled_model_response,
+                    user_api_key_dict,
+                    GuardrailEventHooks.post_call,
+                )
+            except NomaBlockedMessage:
+                raise
+            except Exception as e:
+                if self.block_failures:
+                    raise
+                verbose_proxy_logger.error(
+                    f"Noma streaming post-call hook failed: {str(e)}"
+                )
+                for chunk in all_chunks:
+                    yield chunk
+                return
+
+            mock_response = MockResponseIterator(model_response=processed_response)
+            async for chunk in mock_response:
+                yield chunk
+            return
+
+        for chunk in all_chunks:
+            yield chunk
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/guardrails/guardrail_hooks/noma/noma_v2.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/proxy/guardrails/guardrail_hooks/noma/noma_v2.py
@@ -0,0 +1,329 @@
+# +-------------------------------------------------------------+
+#
+#      Noma Security V2 Guardrail Integration for LiteLLM
+#
+# +-------------------------------------------------------------+
+
+import enum
+import json
+import os
+from copy import deepcopy
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Literal, Optional, Type, cast
+from urllib.parse import urlparse
+
+from litellm._logging import verbose_proxy_logger
+from litellm.integrations.custom_guardrail import (
+    CustomGuardrail,
+    log_guardrail_information,
+)
+from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
+from litellm.litellm_core_utils.safe_json_loads import safe_json_loads
+from litellm.llms.custom_httpx.http_handler import (
+    get_async_httpx_client,
+    httpxSpecialProvider,
+)
+from litellm.proxy.guardrails.guardrail_hooks.noma.noma import NomaBlockedMessage
+from litellm.types.guardrails import GuardrailEventHooks
+from litellm.types.utils import GenericGuardrailAPIInputs, GuardrailStatus
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+    from litellm.types.proxy.guardrails.guardrail_hooks.base import GuardrailConfigModel
+
+
+_DEFAULT_API_BASE = "https://api.noma.security/"
+_AIDR_SCAN_ENDPOINT = "/litellm/guardrail"
+_INTERVENED_INPUT_FIELDS = ("texts", "images", "tools", "tool_calls")
+_DEFAULT_API_BASE_HOSTNAME = urlparse(_DEFAULT_API_BASE).hostname
+
+
+class _Action(str, enum.Enum):
+    BLOCKED = "BLOCKED"
+    NONE = "NONE"
+    GUARDRAIL_INTERVENED = "GUARDRAIL_INTERVENED"
+
+
+class NomaV2Guardrail(CustomGuardrail):
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        application_id: Optional[str] = None,
+        monitor_mode: Optional[bool] = None,
+        block_failures: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> None:
+        self.async_handler = get_async_httpx_client(
+            llm_provider=httpxSpecialProvider.GuardrailCallback
+        )
+
+        self.api_key = api_key or os.environ.get("NOMA_API_KEY")
+        self.api_base = (
+            api_base or os.environ.get("NOMA_API_BASE") or _DEFAULT_API_BASE
+        ).rstrip("/")
+        self.application_id = application_id or os.environ.get("NOMA_APPLICATION_ID")
+        if monitor_mode is None:
+            self.monitor_mode = (
+                os.environ.get("NOMA_MONITOR_MODE", "false").lower() == "true"
+            )
+        else:
+            self.monitor_mode = monitor_mode
+
+        if block_failures is None:
+            self.block_failures = (
+                os.environ.get("NOMA_BLOCK_FAILURES", "true").lower() == "true"
+            )
+        else:
+            self.block_failures = block_failures
+
+        if self._requires_api_key(api_base=self.api_base) and not self.api_key:
+            raise ValueError(
+                "Noma v2 guardrail requires api_key when using Noma SaaS endpoint"
+            )
+
+        if "supported_event_hooks" not in kwargs:
+            kwargs["supported_event_hooks"] = [
+                GuardrailEventHooks.pre_call,
+                GuardrailEventHooks.during_call,
+                GuardrailEventHooks.post_call,
+                GuardrailEventHooks.pre_mcp_call,
+                GuardrailEventHooks.during_mcp_call,
+            ]
+
+        super().__init__(**kwargs)
+
+    @staticmethod
+    def get_config_model() -> Optional[Type["GuardrailConfigModel"]]:
+        from litellm.types.proxy.guardrails.guardrail_hooks.noma import (
+            NomaV2GuardrailConfigModel,
+        )
+
+        return NomaV2GuardrailConfigModel
+
+    def _get_authorization_header(self) -> str:
+        if not self.api_key:
+            return ""
+        return f"Bearer {self.api_key}"
+
+    @staticmethod
+    def _requires_api_key(api_base: str) -> bool:
+        parsed = urlparse(api_base)
+        return parsed.hostname == _DEFAULT_API_BASE_HOSTNAME
+
+    @staticmethod
+    def _get_non_empty_str(value: Any) -> Optional[str]:
+        if not isinstance(value, str):
+            return None
+        stripped = value.strip()
+        return stripped or None
+
+    def _resolve_action_from_response(
+        self,
+        response_json: dict,
+    ) -> _Action:
+        action = response_json.get("action")
+        if isinstance(action, str):
+            try:
+                return _Action(action)
+            except ValueError:
+                pass
+
+        raise ValueError("Noma v2 response missing valid action")
+
+    def _build_scan_payload(
+        self,
+        inputs: GenericGuardrailAPIInputs,
+        request_data: dict,
+        input_type: Literal["request", "response"],
+        logging_obj: Optional["LiteLLMLoggingObj"],
+        application_id: Optional[str],
+    ) -> dict:
+        payload_request_data = deepcopy(request_data)
+        if logging_obj is not None:
+            payload_request_data["litellm_logging_obj"] = getattr(
+                logging_obj, "model_call_details", None
+            )
+
+        payload: dict[str, Any] = {
+            "inputs": inputs,
+            "request_data": payload_request_data,
+            "input_type": input_type,
+            "monitor_mode": self.monitor_mode,
+        }
+        if application_id:
+            payload["application_id"] = application_id
+        return payload
+
+    @staticmethod
+    def _sanitize_payload_for_transport(payload: dict) -> dict:
+        def _default(obj: Any) -> Any:
+            if hasattr(obj, "model_dump"):
+                try:
+                    return obj.model_dump()
+                except Exception:
+                    pass
+            return str(obj)
+
+        try:
+            json_str = json.dumps(payload, default=_default)
+        except (ValueError, TypeError):
+            json_str = safe_dumps(payload)
+
+        safe_payload = safe_json_loads(json_str, default={})
+        if safe_payload == {} and payload:
+            verbose_proxy_logger.warning(
+                "Noma v2 guardrail: payload serialization failed, falling back to empty payload"
+            )
+
+        if isinstance(safe_payload, dict):
+            return safe_payload
+
+        verbose_proxy_logger.warning(
+            "Noma v2 guardrail: payload sanitization produced non-dict output (type=%s), falling back to empty payload",
+            type(safe_payload).__name__,
+        )
+        return {}
+
+    async def _call_noma_scan(
+        self,
+        payload: dict,
+    ) -> dict:
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        authorization_header = self._get_authorization_header()
+        if authorization_header:
+            headers["Authorization"] = authorization_header
+
+        endpoint = f"{self.api_base}{_AIDR_SCAN_ENDPOINT}"
+        sanitized_payload = self._sanitize_payload_for_transport(payload)
+        response = await self.async_handler.post(
+            url=endpoint,
+            headers=headers,
+            json=sanitized_payload,
+        )
+        verbose_proxy_logger.debug(
+            "Noma v2 AIDR response: status_code=%s body=%s",
+            response.status_code,
+            response.text,
+        )
+        response.raise_for_status()
+        response_json = response.json()
+        verbose_proxy_logger.debug(
+            "Noma v2 AIDR response parsed: %s",
+            json.dumps(response_json, default=str),
+        )
+        return response_json
+
+    def _add_guardrail_observability(
+        self,
+        request_data: dict,
+        start_time: datetime,
+        guardrail_status: GuardrailStatus,
+        guardrail_json_response: Any,
+    ) -> None:
+        end_time = datetime.now()
+        duration = (end_time - start_time).total_seconds()
+        self.add_standard_logging_guardrail_information_to_request_data(
+            guardrail_provider="noma_v2",
+            guardrail_json_response=guardrail_json_response,
+            request_data=request_data,
+            guardrail_status=guardrail_status,
+            start_time=start_time.timestamp(),
+            end_time=end_time.timestamp(),
+            duration=duration,
+        )
+
+    def _apply_action(
+        self,
+        inputs: GenericGuardrailAPIInputs,
+        response_json: dict,
+        action: _Action,
+    ) -> GenericGuardrailAPIInputs:
+        if action == _Action.BLOCKED:
+            raise NomaBlockedMessage(response_json)
+
+        if action == _Action.GUARDRAIL_INTERVENED:
+            updated_inputs = cast(GenericGuardrailAPIInputs, dict(inputs))
+            for field in _INTERVENED_INPUT_FIELDS:
+                value = response_json.get(field)
+                if isinstance(value, list):
+                    updated_inputs[field] = value  # type: ignore[literal-required]
+            return updated_inputs
+
+        return inputs
+
+    @log_guardrail_information
+    async def apply_guardrail(
+        self,
+        inputs: GenericGuardrailAPIInputs,
+        request_data: dict,
+        input_type: Literal["request", "response"],
+        logging_obj: Optional["LiteLLMLoggingObj"] = None,
+    ) -> GenericGuardrailAPIInputs:
+        start_time = datetime.now()
+        guardrail_status: GuardrailStatus = "success"
+        guardrail_json_response: Any = {}
+        dynamic_params = self.get_guardrail_dynamic_request_body_params(request_data)
+        if not isinstance(dynamic_params, dict):
+            dynamic_params = {}
+        response_json: Optional[dict] = None
+
+        # Per-request dynamic params can override configured application context.
+        application_id = self._get_non_empty_str(dynamic_params.get("application_id"))
+
+        if application_id is None:
+            application_id = self._get_non_empty_str(self.application_id)
+
+        try:
+            payload = self._build_scan_payload(
+                inputs=inputs,
+                request_data=request_data,
+                input_type=input_type,
+                logging_obj=logging_obj,
+                application_id=application_id,
+            )
+
+            response_json = await self._call_noma_scan(payload=payload)
+            if self.monitor_mode:
+                action = _Action.NONE
+            else:
+                action = self._resolve_action_from_response(response_json=response_json)
+            guardrail_json_response = response_json
+            verbose_proxy_logger.debug(
+                "Noma v2 guardrail decision: input_type=%s action=%s",
+                input_type,
+                action.value,
+            )
+            processed_inputs = self._apply_action(
+                inputs=inputs,
+                response_json=response_json,
+                action=action,
+            )
+
+            guardrail_status = (
+                "success" if action == _Action.NONE else "guardrail_intervened"
+            )
+            return processed_inputs
+
+        except NomaBlockedMessage as e:
+            guardrail_status = "guardrail_intervened"
+            guardrail_json_response = (
+                response_json
+                if isinstance(response_json, dict)
+                else getattr(e, "detail", {"error": "blocked"})
+            )
+            raise
+        except Exception as e:
+            guardrail_status = "guardrail_failed_to_respond"
+            guardrail_json_response = str(e)
+            verbose_proxy_logger.error("Noma v2 guardrail failed: %s", str(e))
+            if self.block_failures:
+                raise
+            return inputs
+        finally:
+            self._add_guardrail_observability(
+                request_data=request_data,
+                start_time=start_time,
+                guardrail_status=guardrail_status,
+                guardrail_json_response=guardrail_json_response,
+            )