chore: initial snapshot for gitea/github upload

2026-03-26 16:04:46 +08:00
commit a699a1ac98
3497 changed files with 1586237 additions and 0 deletions
--- a/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/websearch_interception/handler.py
+++ b/llm-gateway-competitors/litellm-wheel-src/litellm/integrations/websearch_interception/handler.py
@@ -0,0 +1,957 @@
+"""
+WebSearch Interception Handler
+
+CustomLogger that intercepts WebSearch tool calls for models that don't
+natively support web search (e.g., Bedrock/Claude) and executes them
+server-side using litellm router's search tools.
+"""
+
+import asyncio
+import math
+from typing import Any, Dict, List, Optional, Tuple, Union, cast
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.anthropic_interface import messages as anthropic_messages
+from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.integrations.websearch_interception.tools import (
+    get_litellm_web_search_tool,
+    get_litellm_web_search_tool_openai,
+    is_web_search_tool,
+    is_web_search_tool_chat_completion,
+)
+from litellm.integrations.websearch_interception.transformation import (
+    WebSearchTransformation,
+)
+from litellm.types.integrations.websearch_interception import (
+    WebSearchInterceptionConfig,
+)
+from litellm.types.utils import LlmProviders
+
+
+class WebSearchInterceptionLogger(CustomLogger):
+    """
+    CustomLogger that intercepts WebSearch tool calls for models that don't
+    natively support web search.
+
+    Implements agentic loop:
+    1. Detects WebSearch tool_use in model response
+    2. Executes litellm.asearch() for each query using router's search tools
+    3. Makes follow-up request with search results
+    4. Returns final response
+    """
+
+    def __init__(
+        self,
+        enabled_providers: Optional[List[Union[LlmProviders, str]]] = None,
+        search_tool_name: Optional[str] = None,
+    ):
+        """
+        Args:
+            enabled_providers: List of LLM providers to enable interception for.
+                              Use LlmProviders enum values (e.g., [LlmProviders.BEDROCK])
+                              If None or empty list, enables for ALL providers.
+                              Default: None (all providers enabled)
+            search_tool_name: Name of search tool configured in router's search_tools.
+                             If None, will attempt to use first available search tool.
+        """
+        super().__init__()
+        # Convert enum values to strings for comparison
+        if enabled_providers is None:
+            self.enabled_providers = [LlmProviders.BEDROCK.value]
+        else:
+            self.enabled_providers = [
+                p.value if isinstance(p, LlmProviders) else p for p in enabled_providers
+            ]
+        self.search_tool_name = search_tool_name
+        self._request_has_websearch = False  # Track if current request has web search
+
+    async def async_pre_call_deployment_hook(
+        self, kwargs: Dict[str, Any], call_type: Optional[Any]
+    ) -> Optional[dict]:
+        """
+        Pre-call hook to convert native Anthropic web_search tools to regular tools.
+
+        This prevents Bedrock from trying to execute web search server-side (which fails).
+        Instead, we convert it to a regular tool so the model returns tool_use blocks
+        that we can intercept and execute ourselves.
+        """
+        # Check if this is for an enabled provider
+        # Try top-level kwargs first, then nested litellm_params, then derive from model name
+        custom_llm_provider = kwargs.get("custom_llm_provider", "") or kwargs.get(
+            "litellm_params", {}
+        ).get("custom_llm_provider", "")
+        if not custom_llm_provider:
+            try:
+                _, custom_llm_provider, _, _ = litellm.get_llm_provider(
+                    model=kwargs.get("model", "")
+                )
+            except Exception:
+                custom_llm_provider = ""
+        if custom_llm_provider not in self.enabled_providers:
+            return None
+
+        # Check if request has tools with native web_search
+        tools = kwargs.get("tools")
+        if not tools:
+            return None
+
+        # Check if any tool is a web search tool (native or already LiteLLM standard)
+        has_websearch = any(is_web_search_tool(t) for t in tools)
+
+        if not has_websearch:
+            return None
+
+        verbose_logger.debug(
+            "WebSearchInterception: Converting native web_search tools to LiteLLM standard"
+        )
+
+        # Convert native/custom web_search tools to LiteLLM standard
+        converted_tools = []
+        for tool in tools:
+            if is_web_search_tool(tool):
+                # Convert to LiteLLM standard web search tool
+                converted_tool = get_litellm_web_search_tool_openai()
+                converted_tools.append(converted_tool)
+                verbose_logger.debug(
+                    f"WebSearchInterception: Converted {tool.get('name', 'unknown')} "
+                    f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}"
+                )
+            else:
+                # Keep other tools as-is
+                converted_tools.append(tool)
+
+        # Update tools in-place and return full kwargs
+        kwargs["tools"] = converted_tools
+        return kwargs
+
+    @classmethod
+    def from_config_yaml(
+        cls, config: WebSearchInterceptionConfig
+    ) -> "WebSearchInterceptionLogger":
+        """
+        Initialize WebSearchInterceptionLogger from proxy config.yaml parameters.
+
+        Args:
+            config: Configuration dictionary from litellm_settings.websearch_interception_params
+
+        Returns:
+            Configured WebSearchInterceptionLogger instance
+
+        Example:
+            From proxy_config.yaml:
+                litellm_settings:
+                  websearch_interception_params:
+                    enabled_providers: ["bedrock"]
+                    search_tool_name: "my-perplexity-search"
+
+            Usage:
+                config = litellm_settings.get("websearch_interception_params", {})
+                logger = WebSearchInterceptionLogger.from_config_yaml(config)
+        """
+        # Extract parameters from config
+        enabled_providers_str = config.get("enabled_providers", None)
+        search_tool_name = config.get("search_tool_name", None)
+
+        # Convert string provider names to LlmProviders enum values
+        enabled_providers: Optional[List[Union[LlmProviders, str]]] = None
+        if enabled_providers_str is not None:
+            enabled_providers = []
+            for provider in enabled_providers_str:
+                try:
+                    # Try to convert string to LlmProviders enum
+                    provider_enum = LlmProviders(provider)
+                    enabled_providers.append(provider_enum)
+                except ValueError:
+                    # If conversion fails, keep as string
+                    enabled_providers.append(provider)
+
+        return cls(
+            enabled_providers=enabled_providers,
+            search_tool_name=search_tool_name,
+        )
+
+    async def async_pre_request_hook(
+        self, model: str, messages: List[Dict], kwargs: Dict
+    ) -> Optional[Dict]:
+        """
+        Pre-request hook to convert native web search tools to LiteLLM standard.
+
+        This hook is called before the API request is made, allowing us to:
+        1. Detect native web search tools (web_search_20250305, etc.)
+        2. Convert them to LiteLLM standard format (litellm_web_search)
+        3. Convert stream=True to stream=False for interception
+
+        This prevents providers like Bedrock from trying to execute web search
+        natively (which fails), and ensures our agentic loop can intercept tool_use.
+
+        Returns:
+            Modified kwargs dict with converted tools, or None if no modifications needed
+        """
+        # Check if this request is for an enabled provider
+        custom_llm_provider = kwargs.get("litellm_params", {}).get(
+            "custom_llm_provider", ""
+        )
+
+        verbose_logger.debug(
+            f"WebSearchInterception: Pre-request hook called"
+            f" - custom_llm_provider={custom_llm_provider}"
+            f" - enabled_providers={self.enabled_providers or 'ALL'}"
+        )
+
+        if (
+            self.enabled_providers is not None
+            and custom_llm_provider not in self.enabled_providers
+        ):
+            verbose_logger.debug(
+                f"WebSearchInterception: Skipping - provider {custom_llm_provider} not in {self.enabled_providers}"
+            )
+            return None
+
+        # Check if request has tools
+        tools = kwargs.get("tools")
+        if not tools:
+            return None
+
+        # Check if any tool is a web search tool
+        has_websearch = any(is_web_search_tool(t) for t in tools)
+        if not has_websearch:
+            return None
+
+        verbose_logger.debug(
+            f"WebSearchInterception: Pre-request hook triggered for provider={custom_llm_provider}"
+        )
+
+        # Convert native web search tools to LiteLLM standard
+        converted_tools = []
+        for tool in tools:
+            if is_web_search_tool(tool):
+                standard_tool = get_litellm_web_search_tool()
+                converted_tools.append(standard_tool)
+                verbose_logger.debug(
+                    f"WebSearchInterception: Converted {tool.get('name', 'unknown')} "
+                    f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}"
+                )
+            else:
+                converted_tools.append(tool)
+
+        # Update kwargs with converted tools
+        kwargs["tools"] = converted_tools
+        verbose_logger.debug(
+            f"WebSearchInterception: Tools after conversion: {[t.get('name') for t in converted_tools]}"
+        )
+
+        # Convert stream=True to stream=False for WebSearch interception
+        if kwargs.get("stream"):
+            verbose_logger.debug(
+                "WebSearchInterception: Converting stream=True to stream=False"
+            )
+            kwargs["stream"] = False
+            kwargs["_websearch_interception_converted_stream"] = True
+
+        return kwargs
+
+    async def async_should_run_agentic_loop(
+        self,
+        response: Any,
+        model: str,
+        messages: List[Dict],
+        tools: Optional[List[Dict]],
+        stream: bool,
+        custom_llm_provider: str,
+        kwargs: Dict,
+    ) -> Tuple[bool, Dict]:
+        """
+        Check if WebSearch tool interception is needed for Anthropic Messages API.
+
+        This is the legacy method for Anthropic-style responses.
+        For chat completions, use async_should_run_chat_completion_agentic_loop instead.
+        """
+
+        verbose_logger.debug(
+            f"WebSearchInterception: Hook called! provider={custom_llm_provider}, stream={stream}"
+        )
+        verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}")
+
+        # Check if provider should be intercepted
+        # Note: custom_llm_provider is already normalized by get_llm_provider()
+        # (e.g., "bedrock/invoke/..." -> "bedrock")
+        if (
+            self.enabled_providers is not None
+            and custom_llm_provider not in self.enabled_providers
+        ):
+            verbose_logger.debug(
+                f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})"
+            )
+            return False, {}
+
+        # Check if tools include any web search tool (LiteLLM standard or native)
+        has_websearch_tool = any(is_web_search_tool(t) for t in (tools or []))
+        if not has_websearch_tool:
+            verbose_logger.debug("WebSearchInterception: No web search tool in request")
+            return False, {}
+
+        # Detect WebSearch tool_use in response (Anthropic format)
+        should_intercept, tool_calls = WebSearchTransformation.transform_request(
+            response=response,
+            stream=stream,
+            response_format="anthropic",
+        )
+
+        if not should_intercept:
+            verbose_logger.debug(
+                "WebSearchInterception: No WebSearch tool_use detected in response"
+            )
+            return False, {}
+
+        verbose_logger.debug(
+            f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
+        )
+
+        # Extract thinking blocks from response content.
+        # When extended thinking is enabled, the model response includes
+        # thinking/redacted_thinking blocks that must be preserved and
+        # prepended to the follow-up assistant message.
+        thinking_blocks: List[Dict] = []
+        if isinstance(response, dict):
+            content = response.get("content", [])
+        else:
+            content = getattr(response, "content", []) or []
+
+        for block in content:
+            if isinstance(block, dict):
+                block_type = block.get("type")
+            else:
+                block_type = getattr(block, "type", None)
+
+            if block_type in ("thinking", "redacted_thinking"):
+                if isinstance(block, dict):
+                    thinking_blocks.append(block)
+                else:
+                    # Convert object to dict using getattr, matching the
+                    # pattern in _detect_from_non_streaming_response
+                    thinking_block_dict: Dict = {"type": block_type}
+                    if block_type == "thinking":
+                        thinking_block_dict["thinking"] = getattr(block, "thinking", "")
+                        thinking_block_dict["signature"] = getattr(
+                            block, "signature", ""
+                        )
+                    else:  # redacted_thinking
+                        thinking_block_dict["data"] = getattr(block, "data", "")
+                    thinking_blocks.append(thinking_block_dict)
+
+        if thinking_blocks:
+            verbose_logger.debug(
+                f"WebSearchInterception: Extracted {len(thinking_blocks)} thinking block(s) from response"
+            )
+
+        # Return tools dict with tool calls and thinking blocks
+        tools_dict = {
+            "tool_calls": tool_calls,
+            "tool_type": "websearch",
+            "provider": custom_llm_provider,
+            "response_format": "anthropic",
+            "thinking_blocks": thinking_blocks,
+        }
+        return True, tools_dict
+
+    async def async_should_run_chat_completion_agentic_loop(
+        self,
+        response: Any,
+        model: str,
+        messages: List[Dict],
+        tools: Optional[List[Dict]],
+        stream: bool,
+        custom_llm_provider: str,
+        kwargs: Dict,
+    ) -> Tuple[bool, Dict]:
+        """
+        Check if WebSearch tool interception is needed for Chat Completions API.
+
+        Similar to async_should_run_agentic_loop but for OpenAI-style chat completions.
+        """
+
+        verbose_logger.debug(
+            f"WebSearchInterception: Chat completion hook called! provider={custom_llm_provider}, stream={stream}"
+        )
+        verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}")
+
+        # Check if provider should be intercepted
+        if (
+            self.enabled_providers is not None
+            and custom_llm_provider not in self.enabled_providers
+        ):
+            verbose_logger.debug(
+                f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})"
+            )
+            return False, {}
+
+        # Check if tools include any web search tool (strict check for chat completions)
+        has_websearch_tool = any(
+            is_web_search_tool_chat_completion(t) for t in (tools or [])
+        )
+        if not has_websearch_tool:
+            verbose_logger.debug(
+                "WebSearchInterception: No litellm_web_search tool in request"
+            )
+            return False, {}
+
+        # Detect WebSearch tool_calls in response (OpenAI format)
+        should_intercept, tool_calls = WebSearchTransformation.transform_request(
+            response=response,
+            stream=stream,
+            response_format="openai",
+        )
+
+        if not should_intercept:
+            verbose_logger.debug(
+                "WebSearchInterception: No WebSearch tool_calls detected in response"
+            )
+            return False, {}
+
+        verbose_logger.debug(
+            f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
+        )
+
+        # Return tools dict with tool calls
+        tools_dict = {
+            "tool_calls": tool_calls,
+            "tool_type": "websearch",
+            "provider": custom_llm_provider,
+            "response_format": "openai",
+        }
+        return True, tools_dict
+
+    async def async_run_agentic_loop(
+        self,
+        tools: Dict,
+        model: str,
+        messages: List[Dict],
+        response: Any,
+        anthropic_messages_provider_config: Any,
+        anthropic_messages_optional_request_params: Dict,
+        logging_obj: Any,
+        stream: bool,
+        kwargs: Dict,
+    ) -> Any:
+        """
+        Execute agentic loop with WebSearch execution for Anthropic Messages API.
+
+        This is the legacy method for Anthropic-style responses.
+        """
+
+        tool_calls = tools["tool_calls"]
+        thinking_blocks = tools.get("thinking_blocks", [])
+
+        verbose_logger.debug(
+            f"WebSearchInterception: Executing agentic loop for {len(tool_calls)} search(es)"
+        )
+
+        return await self._execute_agentic_loop(
+            model=model,
+            messages=messages,
+            tool_calls=tool_calls,
+            thinking_blocks=thinking_blocks,
+            anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
+            logging_obj=logging_obj,
+            stream=stream,
+            kwargs=kwargs,
+        )
+
+    async def async_run_chat_completion_agentic_loop(
+        self,
+        tools: Dict,
+        model: str,
+        messages: List[Dict],
+        response: Any,
+        optional_params: Dict,
+        logging_obj: Any,
+        stream: bool,
+        kwargs: Dict,
+    ) -> Any:
+        """
+        Execute agentic loop with WebSearch execution for Chat Completions API.
+
+        Similar to async_run_agentic_loop but for OpenAI-style chat completions.
+        """
+
+        tool_calls = tools["tool_calls"]
+        response_format = tools.get("response_format", "openai")
+
+        verbose_logger.debug(
+            f"WebSearchInterception: Executing chat completion agentic loop for {len(tool_calls)} search(es)"
+        )
+
+        return await self._execute_chat_completion_agentic_loop(
+            model=model,
+            messages=messages,
+            tool_calls=tool_calls,
+            optional_params=optional_params,
+            logging_obj=logging_obj,
+            stream=stream,
+            kwargs=kwargs,
+            response_format=response_format,
+        )
+
+    @staticmethod
+    def _resolve_max_tokens(
+        optional_params: Dict,
+        kwargs: Dict,
+    ) -> int:
+        """Extract max_tokens and validate against thinking.budget_tokens.
+
+        Anthropic API requires ``max_tokens > thinking.budget_tokens``.
+        If the constraint is violated, auto-adjust to ``budget_tokens + 1024``.
+        """
+        max_tokens: int = optional_params.get(
+            "max_tokens",
+            kwargs.get("max_tokens", 1024),
+        )
+        thinking_param = optional_params.get("thinking")
+        if thinking_param and isinstance(thinking_param, dict):
+            budget_tokens = thinking_param.get("budget_tokens")
+            if (
+                budget_tokens is not None
+                and isinstance(budget_tokens, (int, float))
+                and math.isfinite(budget_tokens)
+                and budget_tokens > 0
+            ):
+                if max_tokens <= budget_tokens:
+                    adjusted = math.ceil(budget_tokens) + 1024
+                    verbose_logger.debug(
+                        "WebSearchInterception: max_tokens=%s <= thinking.budget_tokens=%s, "
+                        "adjusting to %s to satisfy Anthropic API constraint",
+                        max_tokens,
+                        budget_tokens,
+                        adjusted,
+                    )
+                    max_tokens = adjusted
+        return max_tokens
+
+    @staticmethod
+    def _prepare_followup_kwargs(kwargs: Dict) -> Dict:
+        """Build kwargs for the follow-up call, excluding internal keys.
+
+        ``litellm_logging_obj`` MUST be excluded so the follow-up call creates
+        its own ``Logging`` instance via ``function_setup``.  Reusing the
+        initial call's logging object triggers the dedup flag
+        (``has_logged_async_success``) which silently prevents the initial
+        call's spend from being recorded — the root cause of the
+        SpendLog / AWS billing mismatch.
+        """
+        _internal_keys = {"litellm_logging_obj"}
+        return {
+            k: v
+            for k, v in kwargs.items()
+            if not k.startswith("_websearch_interception") and k not in _internal_keys
+        }
+
+    async def _execute_agentic_loop(
+        self,
+        model: str,
+        messages: List[Dict],
+        tool_calls: List[Dict],
+        thinking_blocks: List[Dict],
+        anthropic_messages_optional_request_params: Dict,
+        logging_obj: Any,
+        stream: bool,
+        kwargs: Dict,
+    ) -> Any:
+        """Execute litellm.search() and make follow-up request"""
+
+        # Extract search queries from tool_use blocks
+        search_tasks = []
+        for tool_call in tool_calls:
+            query = tool_call["input"].get("query")
+            if query:
+                verbose_logger.debug(
+                    f"WebSearchInterception: Queuing search for query='{query}'"
+                )
+                search_tasks.append(self._execute_search(query))
+            else:
+                verbose_logger.debug(
+                    f"WebSearchInterception: Tool call {tool_call['id']} has no query"
+                )
+                # Add empty result for tools without query
+                search_tasks.append(self._create_empty_search_result())
+
+        # Execute searches in parallel
+        verbose_logger.debug(
+            f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel"
+        )
+        search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
+
+        # Handle any exceptions in search results
+        final_search_results: List[str] = []
+        for i, result in enumerate(search_results):
+            if isinstance(result, Exception):
+                verbose_logger.error(
+                    f"WebSearchInterception: Search {i} failed with error: {str(result)}"
+                )
+                final_search_results.append(f"Search failed: {str(result)}")
+            elif isinstance(result, str):
+                # Explicitly cast to str for type checker
+                final_search_results.append(cast(str, result))
+            else:
+                # Should never happen, but handle for type safety
+                verbose_logger.debug(
+                    f"WebSearchInterception: Unexpected result type {type(result)} at index {i}"
+                )
+                final_search_results.append(str(result))
+
+        # Build assistant and user messages using transformation
+        assistant_message, user_message = WebSearchTransformation.transform_response(
+            tool_calls=tool_calls,
+            search_results=final_search_results,
+            thinking_blocks=thinking_blocks,
+        )
+
+        # Make follow-up request with search results
+        # Type cast: user_message is a Dict for Anthropic format (default response_format)
+        follow_up_messages = messages + [assistant_message, cast(Dict, user_message)]
+
+        verbose_logger.debug(
+            "WebSearchInterception: Making follow-up request with search results"
+        )
+        verbose_logger.debug(
+            f"WebSearchInterception: Follow-up messages count: {len(follow_up_messages)}"
+        )
+        verbose_logger.debug(
+            f"WebSearchInterception: Last message (tool_result): {user_message}"
+        )
+
+        # Correlation context for structured logging
+        _call_id = getattr(logging_obj, "litellm_call_id", None) or kwargs.get(
+            "litellm_call_id", "unknown"
+        )
+
+        full_model_name = model  # safe default before try block
+
+        # Use anthropic_messages.acreate for follow-up request
+        try:
+            max_tokens = self._resolve_max_tokens(
+                anthropic_messages_optional_request_params, kwargs
+            )
+
+            verbose_logger.debug(
+                f"WebSearchInterception: Using max_tokens={max_tokens} for follow-up request"
+            )
+
+            # Create a copy of optional params without max_tokens (since we pass it explicitly)
+            optional_params_without_max_tokens = {
+                k: v
+                for k, v in anthropic_messages_optional_request_params.items()
+                if k != "max_tokens"
+            }
+
+            kwargs_for_followup = self._prepare_followup_kwargs(kwargs)
+
+            # Get model from logging_obj.model_call_details["agentic_loop_params"]
+            # This preserves the full model name with provider prefix (e.g., "bedrock/invoke/...")
+            if logging_obj is not None:
+                agentic_params = logging_obj.model_call_details.get(
+                    "agentic_loop_params", {}
+                )
+                full_model_name = agentic_params.get("model", model)
+            verbose_logger.debug(
+                f"WebSearchInterception: Using model name: {full_model_name}"
+            )
+
+            final_response = await anthropic_messages.acreate(
+                max_tokens=max_tokens,
+                messages=follow_up_messages,
+                model=full_model_name,
+                **optional_params_without_max_tokens,
+                **kwargs_for_followup,
+            )
+            verbose_logger.debug(
+                f"WebSearchInterception: Follow-up request completed, response type: {type(final_response)}"
+            )
+            verbose_logger.debug(
+                f"WebSearchInterception: Final response: {final_response}"
+            )
+            return final_response
+        except Exception as e:
+            verbose_logger.exception(
+                "WebSearchInterception: Follow-up request failed "
+                "[call_id=%s model=%s messages=%d searches=%d]: %s",
+                _call_id,
+                full_model_name,
+                len(follow_up_messages),
+                len(final_search_results),
+                str(e),
+            )
+            raise
+
+    async def _execute_search(self, query: str) -> str:
+        """Execute a single web search using router's search tools"""
+        try:
+            # Import router from proxy_server
+            try:
+                from litellm.proxy.proxy_server import llm_router
+            except ImportError:
+                verbose_logger.debug(
+                    "WebSearchInterception: Could not import llm_router from proxy_server, "
+                    "falling back to direct litellm.asearch() with perplexity"
+                )
+                llm_router = None
+
+            # Determine search provider from router's search_tools
+            search_provider: Optional[str] = None
+            if llm_router is not None and hasattr(llm_router, "search_tools"):
+                if self.search_tool_name:
+                    # Find specific search tool by name
+                    matching_tools = [
+                        tool
+                        for tool in llm_router.search_tools
+                        if tool.get("search_tool_name") == self.search_tool_name
+                    ]
+                    if matching_tools:
+                        search_tool = matching_tools[0]
+                        search_provider = search_tool.get("litellm_params", {}).get(
+                            "search_provider"
+                        )
+                        verbose_logger.debug(
+                            f"WebSearchInterception: Found search tool '{self.search_tool_name}' "
+                            f"with provider '{search_provider}'"
+                        )
+                    else:
+                        verbose_logger.debug(
+                            f"WebSearchInterception: Search tool '{self.search_tool_name}' not found in router, "
+                            "falling back to first available or perplexity"
+                        )
+
+                # If no specific tool or not found, use first available
+                if not search_provider and llm_router.search_tools:
+                    first_tool = llm_router.search_tools[0]
+                    search_provider = first_tool.get("litellm_params", {}).get(
+                        "search_provider"
+                    )
+                    verbose_logger.debug(
+                        f"WebSearchInterception: Using first available search tool with provider '{search_provider}'"
+                    )
+
+            # Fallback to perplexity if no router or no search tools configured
+            if not search_provider:
+                search_provider = "perplexity"
+                verbose_logger.debug(
+                    "WebSearchInterception: No search tools configured in router, "
+                    f"using default provider '{search_provider}'"
+                )
+
+            verbose_logger.debug(
+                f"WebSearchInterception: Executing search for '{query}' using provider '{search_provider}'"
+            )
+            result = await litellm.asearch(query=query, search_provider=search_provider)
+
+            # Format using transformation function
+            search_result_text = WebSearchTransformation.format_search_response(result)
+
+            verbose_logger.debug(
+                f"WebSearchInterception: Search completed for '{query}', got {len(search_result_text)} chars"
+            )
+            return search_result_text
+        except Exception as e:
+            verbose_logger.error(
+                f"WebSearchInterception: Search failed for '{query}': {str(e)}"
+            )
+            raise
+
+    async def _execute_chat_completion_agentic_loop(  # noqa: PLR0915
+        self,
+        model: str,
+        messages: List[Dict],
+        tool_calls: List[Dict],
+        optional_params: Dict,
+        logging_obj: Any,
+        stream: bool,
+        kwargs: Dict,
+        response_format: str = "openai",
+    ) -> Any:
+        """Execute litellm.search() and make follow-up chat completion request"""
+
+        # Extract search queries from tool_calls
+        search_tasks = []
+        for tool_call in tool_calls:
+            # Handle both Anthropic-style input and OpenAI-style function.arguments
+            query = None
+            if "input" in tool_call and isinstance(tool_call["input"], dict):
+                query = tool_call["input"].get("query")
+            elif "function" in tool_call:
+                func = tool_call["function"]
+                if isinstance(func, dict):
+                    args = func.get("arguments", {})
+                    if isinstance(args, dict):
+                        query = args.get("query")
+
+            if query:
+                verbose_logger.debug(
+                    f"WebSearchInterception: Queuing search for query='{query}'"
+                )
+                search_tasks.append(self._execute_search(query))
+            else:
+                verbose_logger.debug(
+                    f"WebSearchInterception: Tool call {tool_call.get('id')} has no query"
+                )
+                # Add empty result for tools without query
+                search_tasks.append(self._create_empty_search_result())
+
+        # Execute searches in parallel
+        verbose_logger.debug(
+            f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel"
+        )
+        search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
+
+        # Handle any exceptions in search results
+        final_search_results: List[str] = []
+        for i, result in enumerate(search_results):
+            if isinstance(result, Exception):
+                verbose_logger.error(
+                    f"WebSearchInterception: Search {i} failed with error: {str(result)}"
+                )
+                final_search_results.append(f"Search failed: {str(result)}")
+            elif isinstance(result, str):
+                final_search_results.append(cast(str, result))
+            else:
+                verbose_logger.debug(
+                    f"WebSearchInterception: Unexpected result type {type(result)} at index {i}"
+                )
+                final_search_results.append(str(result))
+
+        # Build assistant and tool messages using transformation
+        (
+            assistant_message,
+            tool_messages_or_user,
+        ) = WebSearchTransformation.transform_response(
+            tool_calls=tool_calls,
+            search_results=final_search_results,
+            response_format=response_format,
+        )
+
+        # Make follow-up request with search results
+        # For OpenAI format, tool_messages_or_user is a list of tool messages
+        if response_format == "openai":
+            follow_up_messages = (
+                messages + [assistant_message] + cast(List[Dict], tool_messages_or_user)
+            )
+        else:
+            # For Anthropic format (shouldn't happen in this method, but handle it)
+            follow_up_messages = messages + [
+                assistant_message,
+                cast(Dict, tool_messages_or_user),
+            ]
+
+        verbose_logger.debug(
+            "WebSearchInterception: Making follow-up chat completion request with search results"
+        )
+        verbose_logger.debug(
+            f"WebSearchInterception: Follow-up messages count: {len(follow_up_messages)}"
+        )
+
+        # Use litellm.acompletion for follow-up request
+        try:
+            # Remove internal parameters that shouldn't be passed to follow-up request
+            internal_params = {
+                "_websearch_interception",
+                "acompletion",
+                "litellm_logging_obj",
+                "custom_llm_provider",
+                "model_alias_map",
+                "stream_response",
+                "custom_prompt_dict",
+            }
+            kwargs_for_followup = {
+                k: v
+                for k, v in kwargs.items()
+                if not k.startswith("_websearch_interception")
+                and k not in internal_params
+            }
+
+            # Get full model name from kwargs
+            full_model_name = model
+            if "custom_llm_provider" in kwargs:
+                custom_llm_provider = kwargs["custom_llm_provider"]
+                # Reconstruct full model name with provider prefix if needed
+                if not model.startswith(custom_llm_provider):
+                    # Check if model already has a provider prefix
+                    if "/" not in model:
+                        full_model_name = f"{custom_llm_provider}/{model}"
+
+            verbose_logger.debug(
+                f"WebSearchInterception: Using model name: {full_model_name}"
+            )
+
+            # Prepare tools for follow-up request (same as original)
+            tools_param = optional_params.get("tools")
+
+            # Remove tools and extra_body from optional_params to avoid issues
+            # extra_body often contains internal LiteLLM params that shouldn't be forwarded
+            optional_params_clean = {
+                k: v
+                for k, v in optional_params.items()
+                if k
+                not in {
+                    "tools",
+                    "extra_body",
+                    "model_alias_map",
+                    "stream_response",
+                    "custom_prompt_dict",
+                }
+            }
+
+            final_response = await litellm.acompletion(
+                model=full_model_name,
+                messages=follow_up_messages,
+                tools=tools_param,
+                **optional_params_clean,
+                **kwargs_for_followup,
+            )
+
+            verbose_logger.debug(
+                f"WebSearchInterception: Follow-up request completed, response type: {type(final_response)}"
+            )
+            return final_response
+        except Exception as e:
+            verbose_logger.exception(
+                f"WebSearchInterception: Follow-up request failed: {str(e)}"
+            )
+            raise
+
+    async def _create_empty_search_result(self) -> str:
+        """Create an empty search result for tool calls without queries"""
+        return "No search query provided"
+
+    @staticmethod
+    def initialize_from_proxy_config(
+        litellm_settings: Dict[str, Any],
+        callback_specific_params: Dict[str, Any],
+    ) -> "WebSearchInterceptionLogger":
+        """
+        Static method to initialize WebSearchInterceptionLogger from proxy config.
+
+        Used in callback_utils.py to simplify initialization logic.
+
+        Args:
+            litellm_settings: Dictionary containing litellm_settings from proxy_config.yaml
+            callback_specific_params: Dictionary containing callback-specific parameters
+
+        Returns:
+            Configured WebSearchInterceptionLogger instance
+
+        Example:
+            From callback_utils.py:
+                websearch_obj = WebSearchInterceptionLogger.initialize_from_proxy_config(
+                    litellm_settings=litellm_settings,
+                    callback_specific_params=callback_specific_params
+                )
+        """
+        # Get websearch_interception_params from litellm_settings or callback_specific_params
+        websearch_params: WebSearchInterceptionConfig = {}
+        if "websearch_interception_params" in litellm_settings:
+            websearch_params = litellm_settings["websearch_interception_params"]
+        elif "websearch_interception" in callback_specific_params:
+            websearch_params = callback_specific_params["websearch_interception"]
+
+        # Use classmethod to initialize from config
+        return WebSearchInterceptionLogger.from_config_yaml(websearch_params)