Files
2026-03-26 20:06:14 +08:00

958 lines
37 KiB
Python

"""
WebSearch Interception Handler
CustomLogger that intercepts WebSearch tool calls for models that don't
natively support web search (e.g., Bedrock/Claude) and executes them
server-side using litellm router's search tools.
"""
import asyncio
import math
from typing import Any, Dict, List, Optional, Tuple, Union, cast
import litellm
from litellm._logging import verbose_logger
from litellm.anthropic_interface import messages as anthropic_messages
from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.websearch_interception.tools import (
get_litellm_web_search_tool,
get_litellm_web_search_tool_openai,
is_web_search_tool,
is_web_search_tool_chat_completion,
)
from litellm.integrations.websearch_interception.transformation import (
WebSearchTransformation,
)
from litellm.types.integrations.websearch_interception import (
WebSearchInterceptionConfig,
)
from litellm.types.utils import LlmProviders
class WebSearchInterceptionLogger(CustomLogger):
"""
CustomLogger that intercepts WebSearch tool calls for models that don't
natively support web search.
Implements agentic loop:
1. Detects WebSearch tool_use in model response
2. Executes litellm.asearch() for each query using router's search tools
3. Makes follow-up request with search results
4. Returns final response
"""
def __init__(
self,
enabled_providers: Optional[List[Union[LlmProviders, str]]] = None,
search_tool_name: Optional[str] = None,
):
"""
Args:
enabled_providers: List of LLM providers to enable interception for.
Use LlmProviders enum values (e.g., [LlmProviders.BEDROCK])
If None or empty list, enables for ALL providers.
Default: None (all providers enabled)
search_tool_name: Name of search tool configured in router's search_tools.
If None, will attempt to use first available search tool.
"""
super().__init__()
# Convert enum values to strings for comparison
if enabled_providers is None:
self.enabled_providers = [LlmProviders.BEDROCK.value]
else:
self.enabled_providers = [
p.value if isinstance(p, LlmProviders) else p for p in enabled_providers
]
self.search_tool_name = search_tool_name
self._request_has_websearch = False # Track if current request has web search
async def async_pre_call_deployment_hook(
self, kwargs: Dict[str, Any], call_type: Optional[Any]
) -> Optional[dict]:
"""
Pre-call hook to convert native Anthropic web_search tools to regular tools.
This prevents Bedrock from trying to execute web search server-side (which fails).
Instead, we convert it to a regular tool so the model returns tool_use blocks
that we can intercept and execute ourselves.
"""
# Check if this is for an enabled provider
# Try top-level kwargs first, then nested litellm_params, then derive from model name
custom_llm_provider = kwargs.get("custom_llm_provider", "") or kwargs.get(
"litellm_params", {}
).get("custom_llm_provider", "")
if not custom_llm_provider:
try:
_, custom_llm_provider, _, _ = litellm.get_llm_provider(
model=kwargs.get("model", "")
)
except Exception:
custom_llm_provider = ""
if custom_llm_provider not in self.enabled_providers:
return None
# Check if request has tools with native web_search
tools = kwargs.get("tools")
if not tools:
return None
# Check if any tool is a web search tool (native or already LiteLLM standard)
has_websearch = any(is_web_search_tool(t) for t in tools)
if not has_websearch:
return None
verbose_logger.debug(
"WebSearchInterception: Converting native web_search tools to LiteLLM standard"
)
# Convert native/custom web_search tools to LiteLLM standard
converted_tools = []
for tool in tools:
if is_web_search_tool(tool):
# Convert to LiteLLM standard web search tool
converted_tool = get_litellm_web_search_tool_openai()
converted_tools.append(converted_tool)
verbose_logger.debug(
f"WebSearchInterception: Converted {tool.get('name', 'unknown')} "
f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}"
)
else:
# Keep other tools as-is
converted_tools.append(tool)
# Update tools in-place and return full kwargs
kwargs["tools"] = converted_tools
return kwargs
@classmethod
def from_config_yaml(
cls, config: WebSearchInterceptionConfig
) -> "WebSearchInterceptionLogger":
"""
Initialize WebSearchInterceptionLogger from proxy config.yaml parameters.
Args:
config: Configuration dictionary from litellm_settings.websearch_interception_params
Returns:
Configured WebSearchInterceptionLogger instance
Example:
From proxy_config.yaml:
litellm_settings:
websearch_interception_params:
enabled_providers: ["bedrock"]
search_tool_name: "my-perplexity-search"
Usage:
config = litellm_settings.get("websearch_interception_params", {})
logger = WebSearchInterceptionLogger.from_config_yaml(config)
"""
# Extract parameters from config
enabled_providers_str = config.get("enabled_providers", None)
search_tool_name = config.get("search_tool_name", None)
# Convert string provider names to LlmProviders enum values
enabled_providers: Optional[List[Union[LlmProviders, str]]] = None
if enabled_providers_str is not None:
enabled_providers = []
for provider in enabled_providers_str:
try:
# Try to convert string to LlmProviders enum
provider_enum = LlmProviders(provider)
enabled_providers.append(provider_enum)
except ValueError:
# If conversion fails, keep as string
enabled_providers.append(provider)
return cls(
enabled_providers=enabled_providers,
search_tool_name=search_tool_name,
)
async def async_pre_request_hook(
self, model: str, messages: List[Dict], kwargs: Dict
) -> Optional[Dict]:
"""
Pre-request hook to convert native web search tools to LiteLLM standard.
This hook is called before the API request is made, allowing us to:
1. Detect native web search tools (web_search_20250305, etc.)
2. Convert them to LiteLLM standard format (litellm_web_search)
3. Convert stream=True to stream=False for interception
This prevents providers like Bedrock from trying to execute web search
natively (which fails), and ensures our agentic loop can intercept tool_use.
Returns:
Modified kwargs dict with converted tools, or None if no modifications needed
"""
# Check if this request is for an enabled provider
custom_llm_provider = kwargs.get("litellm_params", {}).get(
"custom_llm_provider", ""
)
verbose_logger.debug(
f"WebSearchInterception: Pre-request hook called"
f" - custom_llm_provider={custom_llm_provider}"
f" - enabled_providers={self.enabled_providers or 'ALL'}"
)
if (
self.enabled_providers is not None
and custom_llm_provider not in self.enabled_providers
):
verbose_logger.debug(
f"WebSearchInterception: Skipping - provider {custom_llm_provider} not in {self.enabled_providers}"
)
return None
# Check if request has tools
tools = kwargs.get("tools")
if not tools:
return None
# Check if any tool is a web search tool
has_websearch = any(is_web_search_tool(t) for t in tools)
if not has_websearch:
return None
verbose_logger.debug(
f"WebSearchInterception: Pre-request hook triggered for provider={custom_llm_provider}"
)
# Convert native web search tools to LiteLLM standard
converted_tools = []
for tool in tools:
if is_web_search_tool(tool):
standard_tool = get_litellm_web_search_tool()
converted_tools.append(standard_tool)
verbose_logger.debug(
f"WebSearchInterception: Converted {tool.get('name', 'unknown')} "
f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}"
)
else:
converted_tools.append(tool)
# Update kwargs with converted tools
kwargs["tools"] = converted_tools
verbose_logger.debug(
f"WebSearchInterception: Tools after conversion: {[t.get('name') for t in converted_tools]}"
)
# Convert stream=True to stream=False for WebSearch interception
if kwargs.get("stream"):
verbose_logger.debug(
"WebSearchInterception: Converting stream=True to stream=False"
)
kwargs["stream"] = False
kwargs["_websearch_interception_converted_stream"] = True
return kwargs
async def async_should_run_agentic_loop(
self,
response: Any,
model: str,
messages: List[Dict],
tools: Optional[List[Dict]],
stream: bool,
custom_llm_provider: str,
kwargs: Dict,
) -> Tuple[bool, Dict]:
"""
Check if WebSearch tool interception is needed for Anthropic Messages API.
This is the legacy method for Anthropic-style responses.
For chat completions, use async_should_run_chat_completion_agentic_loop instead.
"""
verbose_logger.debug(
f"WebSearchInterception: Hook called! provider={custom_llm_provider}, stream={stream}"
)
verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}")
# Check if provider should be intercepted
# Note: custom_llm_provider is already normalized by get_llm_provider()
# (e.g., "bedrock/invoke/..." -> "bedrock")
if (
self.enabled_providers is not None
and custom_llm_provider not in self.enabled_providers
):
verbose_logger.debug(
f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})"
)
return False, {}
# Check if tools include any web search tool (LiteLLM standard or native)
has_websearch_tool = any(is_web_search_tool(t) for t in (tools or []))
if not has_websearch_tool:
verbose_logger.debug("WebSearchInterception: No web search tool in request")
return False, {}
# Detect WebSearch tool_use in response (Anthropic format)
should_intercept, tool_calls = WebSearchTransformation.transform_request(
response=response,
stream=stream,
response_format="anthropic",
)
if not should_intercept:
verbose_logger.debug(
"WebSearchInterception: No WebSearch tool_use detected in response"
)
return False, {}
verbose_logger.debug(
f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
)
# Extract thinking blocks from response content.
# When extended thinking is enabled, the model response includes
# thinking/redacted_thinking blocks that must be preserved and
# prepended to the follow-up assistant message.
thinking_blocks: List[Dict] = []
if isinstance(response, dict):
content = response.get("content", [])
else:
content = getattr(response, "content", []) or []
for block in content:
if isinstance(block, dict):
block_type = block.get("type")
else:
block_type = getattr(block, "type", None)
if block_type in ("thinking", "redacted_thinking"):
if isinstance(block, dict):
thinking_blocks.append(block)
else:
# Convert object to dict using getattr, matching the
# pattern in _detect_from_non_streaming_response
thinking_block_dict: Dict = {"type": block_type}
if block_type == "thinking":
thinking_block_dict["thinking"] = getattr(block, "thinking", "")
thinking_block_dict["signature"] = getattr(
block, "signature", ""
)
else: # redacted_thinking
thinking_block_dict["data"] = getattr(block, "data", "")
thinking_blocks.append(thinking_block_dict)
if thinking_blocks:
verbose_logger.debug(
f"WebSearchInterception: Extracted {len(thinking_blocks)} thinking block(s) from response"
)
# Return tools dict with tool calls and thinking blocks
tools_dict = {
"tool_calls": tool_calls,
"tool_type": "websearch",
"provider": custom_llm_provider,
"response_format": "anthropic",
"thinking_blocks": thinking_blocks,
}
return True, tools_dict
async def async_should_run_chat_completion_agentic_loop(
self,
response: Any,
model: str,
messages: List[Dict],
tools: Optional[List[Dict]],
stream: bool,
custom_llm_provider: str,
kwargs: Dict,
) -> Tuple[bool, Dict]:
"""
Check if WebSearch tool interception is needed for Chat Completions API.
Similar to async_should_run_agentic_loop but for OpenAI-style chat completions.
"""
verbose_logger.debug(
f"WebSearchInterception: Chat completion hook called! provider={custom_llm_provider}, stream={stream}"
)
verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}")
# Check if provider should be intercepted
if (
self.enabled_providers is not None
and custom_llm_provider not in self.enabled_providers
):
verbose_logger.debug(
f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})"
)
return False, {}
# Check if tools include any web search tool (strict check for chat completions)
has_websearch_tool = any(
is_web_search_tool_chat_completion(t) for t in (tools or [])
)
if not has_websearch_tool:
verbose_logger.debug(
"WebSearchInterception: No litellm_web_search tool in request"
)
return False, {}
# Detect WebSearch tool_calls in response (OpenAI format)
should_intercept, tool_calls = WebSearchTransformation.transform_request(
response=response,
stream=stream,
response_format="openai",
)
if not should_intercept:
verbose_logger.debug(
"WebSearchInterception: No WebSearch tool_calls detected in response"
)
return False, {}
verbose_logger.debug(
f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
)
# Return tools dict with tool calls
tools_dict = {
"tool_calls": tool_calls,
"tool_type": "websearch",
"provider": custom_llm_provider,
"response_format": "openai",
}
return True, tools_dict
async def async_run_agentic_loop(
self,
tools: Dict,
model: str,
messages: List[Dict],
response: Any,
anthropic_messages_provider_config: Any,
anthropic_messages_optional_request_params: Dict,
logging_obj: Any,
stream: bool,
kwargs: Dict,
) -> Any:
"""
Execute agentic loop with WebSearch execution for Anthropic Messages API.
This is the legacy method for Anthropic-style responses.
"""
tool_calls = tools["tool_calls"]
thinking_blocks = tools.get("thinking_blocks", [])
verbose_logger.debug(
f"WebSearchInterception: Executing agentic loop for {len(tool_calls)} search(es)"
)
return await self._execute_agentic_loop(
model=model,
messages=messages,
tool_calls=tool_calls,
thinking_blocks=thinking_blocks,
anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
logging_obj=logging_obj,
stream=stream,
kwargs=kwargs,
)
async def async_run_chat_completion_agentic_loop(
self,
tools: Dict,
model: str,
messages: List[Dict],
response: Any,
optional_params: Dict,
logging_obj: Any,
stream: bool,
kwargs: Dict,
) -> Any:
"""
Execute agentic loop with WebSearch execution for Chat Completions API.
Similar to async_run_agentic_loop but for OpenAI-style chat completions.
"""
tool_calls = tools["tool_calls"]
response_format = tools.get("response_format", "openai")
verbose_logger.debug(
f"WebSearchInterception: Executing chat completion agentic loop for {len(tool_calls)} search(es)"
)
return await self._execute_chat_completion_agentic_loop(
model=model,
messages=messages,
tool_calls=tool_calls,
optional_params=optional_params,
logging_obj=logging_obj,
stream=stream,
kwargs=kwargs,
response_format=response_format,
)
@staticmethod
def _resolve_max_tokens(
optional_params: Dict,
kwargs: Dict,
) -> int:
"""Extract max_tokens and validate against thinking.budget_tokens.
Anthropic API requires ``max_tokens > thinking.budget_tokens``.
If the constraint is violated, auto-adjust to ``budget_tokens + 1024``.
"""
max_tokens: int = optional_params.get(
"max_tokens",
kwargs.get("max_tokens", 1024),
)
thinking_param = optional_params.get("thinking")
if thinking_param and isinstance(thinking_param, dict):
budget_tokens = thinking_param.get("budget_tokens")
if (
budget_tokens is not None
and isinstance(budget_tokens, (int, float))
and math.isfinite(budget_tokens)
and budget_tokens > 0
):
if max_tokens <= budget_tokens:
adjusted = math.ceil(budget_tokens) + 1024
verbose_logger.debug(
"WebSearchInterception: max_tokens=%s <= thinking.budget_tokens=%s, "
"adjusting to %s to satisfy Anthropic API constraint",
max_tokens,
budget_tokens,
adjusted,
)
max_tokens = adjusted
return max_tokens
@staticmethod
def _prepare_followup_kwargs(kwargs: Dict) -> Dict:
"""Build kwargs for the follow-up call, excluding internal keys.
``litellm_logging_obj`` MUST be excluded so the follow-up call creates
its own ``Logging`` instance via ``function_setup``. Reusing the
initial call's logging object triggers the dedup flag
(``has_logged_async_success``) which silently prevents the initial
call's spend from being recorded — the root cause of the
SpendLog / AWS billing mismatch.
"""
_internal_keys = {"litellm_logging_obj"}
return {
k: v
for k, v in kwargs.items()
if not k.startswith("_websearch_interception") and k not in _internal_keys
}
async def _execute_agentic_loop(
self,
model: str,
messages: List[Dict],
tool_calls: List[Dict],
thinking_blocks: List[Dict],
anthropic_messages_optional_request_params: Dict,
logging_obj: Any,
stream: bool,
kwargs: Dict,
) -> Any:
"""Execute litellm.search() and make follow-up request"""
# Extract search queries from tool_use blocks
search_tasks = []
for tool_call in tool_calls:
query = tool_call["input"].get("query")
if query:
verbose_logger.debug(
f"WebSearchInterception: Queuing search for query='{query}'"
)
search_tasks.append(self._execute_search(query))
else:
verbose_logger.debug(
f"WebSearchInterception: Tool call {tool_call['id']} has no query"
)
# Add empty result for tools without query
search_tasks.append(self._create_empty_search_result())
# Execute searches in parallel
verbose_logger.debug(
f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel"
)
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
# Handle any exceptions in search results
final_search_results: List[str] = []
for i, result in enumerate(search_results):
if isinstance(result, Exception):
verbose_logger.error(
f"WebSearchInterception: Search {i} failed with error: {str(result)}"
)
final_search_results.append(f"Search failed: {str(result)}")
elif isinstance(result, str):
# Explicitly cast to str for type checker
final_search_results.append(cast(str, result))
else:
# Should never happen, but handle for type safety
verbose_logger.debug(
f"WebSearchInterception: Unexpected result type {type(result)} at index {i}"
)
final_search_results.append(str(result))
# Build assistant and user messages using transformation
assistant_message, user_message = WebSearchTransformation.transform_response(
tool_calls=tool_calls,
search_results=final_search_results,
thinking_blocks=thinking_blocks,
)
# Make follow-up request with search results
# Type cast: user_message is a Dict for Anthropic format (default response_format)
follow_up_messages = messages + [assistant_message, cast(Dict, user_message)]
verbose_logger.debug(
"WebSearchInterception: Making follow-up request with search results"
)
verbose_logger.debug(
f"WebSearchInterception: Follow-up messages count: {len(follow_up_messages)}"
)
verbose_logger.debug(
f"WebSearchInterception: Last message (tool_result): {user_message}"
)
# Correlation context for structured logging
_call_id = getattr(logging_obj, "litellm_call_id", None) or kwargs.get(
"litellm_call_id", "unknown"
)
full_model_name = model # safe default before try block
# Use anthropic_messages.acreate for follow-up request
try:
max_tokens = self._resolve_max_tokens(
anthropic_messages_optional_request_params, kwargs
)
verbose_logger.debug(
f"WebSearchInterception: Using max_tokens={max_tokens} for follow-up request"
)
# Create a copy of optional params without max_tokens (since we pass it explicitly)
optional_params_without_max_tokens = {
k: v
for k, v in anthropic_messages_optional_request_params.items()
if k != "max_tokens"
}
kwargs_for_followup = self._prepare_followup_kwargs(kwargs)
# Get model from logging_obj.model_call_details["agentic_loop_params"]
# This preserves the full model name with provider prefix (e.g., "bedrock/invoke/...")
if logging_obj is not None:
agentic_params = logging_obj.model_call_details.get(
"agentic_loop_params", {}
)
full_model_name = agentic_params.get("model", model)
verbose_logger.debug(
f"WebSearchInterception: Using model name: {full_model_name}"
)
final_response = await anthropic_messages.acreate(
max_tokens=max_tokens,
messages=follow_up_messages,
model=full_model_name,
**optional_params_without_max_tokens,
**kwargs_for_followup,
)
verbose_logger.debug(
f"WebSearchInterception: Follow-up request completed, response type: {type(final_response)}"
)
verbose_logger.debug(
f"WebSearchInterception: Final response: {final_response}"
)
return final_response
except Exception as e:
verbose_logger.exception(
"WebSearchInterception: Follow-up request failed "
"[call_id=%s model=%s messages=%d searches=%d]: %s",
_call_id,
full_model_name,
len(follow_up_messages),
len(final_search_results),
str(e),
)
raise
async def _execute_search(self, query: str) -> str:
"""Execute a single web search using router's search tools"""
try:
# Import router from proxy_server
try:
from litellm.proxy.proxy_server import llm_router
except ImportError:
verbose_logger.debug(
"WebSearchInterception: Could not import llm_router from proxy_server, "
"falling back to direct litellm.asearch() with perplexity"
)
llm_router = None
# Determine search provider from router's search_tools
search_provider: Optional[str] = None
if llm_router is not None and hasattr(llm_router, "search_tools"):
if self.search_tool_name:
# Find specific search tool by name
matching_tools = [
tool
for tool in llm_router.search_tools
if tool.get("search_tool_name") == self.search_tool_name
]
if matching_tools:
search_tool = matching_tools[0]
search_provider = search_tool.get("litellm_params", {}).get(
"search_provider"
)
verbose_logger.debug(
f"WebSearchInterception: Found search tool '{self.search_tool_name}' "
f"with provider '{search_provider}'"
)
else:
verbose_logger.debug(
f"WebSearchInterception: Search tool '{self.search_tool_name}' not found in router, "
"falling back to first available or perplexity"
)
# If no specific tool or not found, use first available
if not search_provider and llm_router.search_tools:
first_tool = llm_router.search_tools[0]
search_provider = first_tool.get("litellm_params", {}).get(
"search_provider"
)
verbose_logger.debug(
f"WebSearchInterception: Using first available search tool with provider '{search_provider}'"
)
# Fallback to perplexity if no router or no search tools configured
if not search_provider:
search_provider = "perplexity"
verbose_logger.debug(
"WebSearchInterception: No search tools configured in router, "
f"using default provider '{search_provider}'"
)
verbose_logger.debug(
f"WebSearchInterception: Executing search for '{query}' using provider '{search_provider}'"
)
result = await litellm.asearch(query=query, search_provider=search_provider)
# Format using transformation function
search_result_text = WebSearchTransformation.format_search_response(result)
verbose_logger.debug(
f"WebSearchInterception: Search completed for '{query}', got {len(search_result_text)} chars"
)
return search_result_text
except Exception as e:
verbose_logger.error(
f"WebSearchInterception: Search failed for '{query}': {str(e)}"
)
raise
async def _execute_chat_completion_agentic_loop( # noqa: PLR0915
self,
model: str,
messages: List[Dict],
tool_calls: List[Dict],
optional_params: Dict,
logging_obj: Any,
stream: bool,
kwargs: Dict,
response_format: str = "openai",
) -> Any:
"""Execute litellm.search() and make follow-up chat completion request"""
# Extract search queries from tool_calls
search_tasks = []
for tool_call in tool_calls:
# Handle both Anthropic-style input and OpenAI-style function.arguments
query = None
if "input" in tool_call and isinstance(tool_call["input"], dict):
query = tool_call["input"].get("query")
elif "function" in tool_call:
func = tool_call["function"]
if isinstance(func, dict):
args = func.get("arguments", {})
if isinstance(args, dict):
query = args.get("query")
if query:
verbose_logger.debug(
f"WebSearchInterception: Queuing search for query='{query}'"
)
search_tasks.append(self._execute_search(query))
else:
verbose_logger.debug(
f"WebSearchInterception: Tool call {tool_call.get('id')} has no query"
)
# Add empty result for tools without query
search_tasks.append(self._create_empty_search_result())
# Execute searches in parallel
verbose_logger.debug(
f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel"
)
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
# Handle any exceptions in search results
final_search_results: List[str] = []
for i, result in enumerate(search_results):
if isinstance(result, Exception):
verbose_logger.error(
f"WebSearchInterception: Search {i} failed with error: {str(result)}"
)
final_search_results.append(f"Search failed: {str(result)}")
elif isinstance(result, str):
final_search_results.append(cast(str, result))
else:
verbose_logger.debug(
f"WebSearchInterception: Unexpected result type {type(result)} at index {i}"
)
final_search_results.append(str(result))
# Build assistant and tool messages using transformation
(
assistant_message,
tool_messages_or_user,
) = WebSearchTransformation.transform_response(
tool_calls=tool_calls,
search_results=final_search_results,
response_format=response_format,
)
# Make follow-up request with search results
# For OpenAI format, tool_messages_or_user is a list of tool messages
if response_format == "openai":
follow_up_messages = (
messages + [assistant_message] + cast(List[Dict], tool_messages_or_user)
)
else:
# For Anthropic format (shouldn't happen in this method, but handle it)
follow_up_messages = messages + [
assistant_message,
cast(Dict, tool_messages_or_user),
]
verbose_logger.debug(
"WebSearchInterception: Making follow-up chat completion request with search results"
)
verbose_logger.debug(
f"WebSearchInterception: Follow-up messages count: {len(follow_up_messages)}"
)
# Use litellm.acompletion for follow-up request
try:
# Remove internal parameters that shouldn't be passed to follow-up request
internal_params = {
"_websearch_interception",
"acompletion",
"litellm_logging_obj",
"custom_llm_provider",
"model_alias_map",
"stream_response",
"custom_prompt_dict",
}
kwargs_for_followup = {
k: v
for k, v in kwargs.items()
if not k.startswith("_websearch_interception")
and k not in internal_params
}
# Get full model name from kwargs
full_model_name = model
if "custom_llm_provider" in kwargs:
custom_llm_provider = kwargs["custom_llm_provider"]
# Reconstruct full model name with provider prefix if needed
if not model.startswith(custom_llm_provider):
# Check if model already has a provider prefix
if "/" not in model:
full_model_name = f"{custom_llm_provider}/{model}"
verbose_logger.debug(
f"WebSearchInterception: Using model name: {full_model_name}"
)
# Prepare tools for follow-up request (same as original)
tools_param = optional_params.get("tools")
# Remove tools and extra_body from optional_params to avoid issues
# extra_body often contains internal LiteLLM params that shouldn't be forwarded
optional_params_clean = {
k: v
for k, v in optional_params.items()
if k
not in {
"tools",
"extra_body",
"model_alias_map",
"stream_response",
"custom_prompt_dict",
}
}
final_response = await litellm.acompletion(
model=full_model_name,
messages=follow_up_messages,
tools=tools_param,
**optional_params_clean,
**kwargs_for_followup,
)
verbose_logger.debug(
f"WebSearchInterception: Follow-up request completed, response type: {type(final_response)}"
)
return final_response
except Exception as e:
verbose_logger.exception(
f"WebSearchInterception: Follow-up request failed: {str(e)}"
)
raise
async def _create_empty_search_result(self) -> str:
"""Create an empty search result for tool calls without queries"""
return "No search query provided"
@staticmethod
def initialize_from_proxy_config(
litellm_settings: Dict[str, Any],
callback_specific_params: Dict[str, Any],
) -> "WebSearchInterceptionLogger":
"""
Static method to initialize WebSearchInterceptionLogger from proxy config.
Used in callback_utils.py to simplify initialization logic.
Args:
litellm_settings: Dictionary containing litellm_settings from proxy_config.yaml
callback_specific_params: Dictionary containing callback-specific parameters
Returns:
Configured WebSearchInterceptionLogger instance
Example:
From callback_utils.py:
websearch_obj = WebSearchInterceptionLogger.initialize_from_proxy_config(
litellm_settings=litellm_settings,
callback_specific_params=callback_specific_params
)
"""
# Get websearch_interception_params from litellm_settings or callback_specific_params
websearch_params: WebSearchInterceptionConfig = {}
if "websearch_interception_params" in litellm_settings:
websearch_params = litellm_settings["websearch_interception_params"]
elif "websearch_interception" in callback_specific_params:
websearch_params = callback_specific_params["websearch_interception"]
# Use classmethod to initialize from config
return WebSearchInterceptionLogger.from_config_yaml(websearch_params)