361 lines
14 KiB
Python
361 lines
14 KiB
Python
import os
|
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
|
|
from litellm._logging import verbose_logger
|
|
from litellm.integrations.arize import _utils
|
|
from litellm.integrations.arize._utils import ArizeOTELAttributes
|
|
from litellm.types.integrations.arize_phoenix import ArizePhoenixConfig
|
|
|
|
if TYPE_CHECKING:
|
|
from opentelemetry.sdk.trace import TracerProvider
|
|
from opentelemetry.trace import Span as _Span
|
|
from opentelemetry.trace import SpanKind
|
|
|
|
from litellm.integrations.opentelemetry import OpenTelemetry as _OpenTelemetry
|
|
from litellm.integrations.opentelemetry import (
|
|
OpenTelemetryConfig as _OpenTelemetryConfig,
|
|
)
|
|
from litellm.types.integrations.arize import Protocol as _Protocol
|
|
|
|
Protocol = _Protocol
|
|
OpenTelemetryConfig = _OpenTelemetryConfig
|
|
Span = Union[_Span, Any]
|
|
OpenTelemetry = _OpenTelemetry
|
|
else:
|
|
Protocol = Any
|
|
OpenTelemetryConfig = Any
|
|
Span = Any
|
|
TracerProvider = Any
|
|
SpanKind = Any
|
|
# Import OpenTelemetry at runtime
|
|
try:
|
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
|
except ImportError:
|
|
OpenTelemetry = None # type: ignore
|
|
|
|
|
|
ARIZE_HOSTED_PHOENIX_ENDPOINT = "https://otlp.arize.com/v1/traces"
|
|
|
|
|
|
class ArizePhoenixLogger(OpenTelemetry): # type: ignore
|
|
"""
|
|
Arize Phoenix logger that sends traces to a Phoenix endpoint.
|
|
|
|
Creates its own dedicated TracerProvider so it can coexist with the
|
|
generic ``otel`` callback (or any other OTEL-based integration) without
|
|
fighting over the global ``opentelemetry.trace`` TracerProvider singleton.
|
|
"""
|
|
|
|
def _init_tracing(self, tracer_provider):
|
|
"""
|
|
Override to always create a *private* TracerProvider for Arize Phoenix.
|
|
|
|
The base ``OpenTelemetry._init_tracing`` falls back to the global
|
|
TracerProvider when one already exists. That causes whichever
|
|
integration initialises second to silently reuse the first one's
|
|
exporter, so spans only reach one destination.
|
|
|
|
By creating our own provider we guarantee Arize Phoenix always gets
|
|
its own exporter pipeline, regardless of initialisation order.
|
|
"""
|
|
from opentelemetry.sdk.trace import TracerProvider
|
|
from opentelemetry.trace import SpanKind
|
|
|
|
if tracer_provider is not None:
|
|
# Explicitly supplied (e.g. in tests) — honour it.
|
|
self.tracer = tracer_provider.get_tracer("litellm")
|
|
self.span_kind = SpanKind
|
|
return
|
|
|
|
# Always create a dedicated provider — never touch the global one.
|
|
provider = TracerProvider(resource=self._get_litellm_resource(self.config))
|
|
provider.add_span_processor(self._get_span_processor())
|
|
self.tracer = provider.get_tracer("litellm")
|
|
self.span_kind = SpanKind
|
|
verbose_logger.debug(
|
|
"ArizePhoenixLogger: Created dedicated TracerProvider "
|
|
"(endpoint=%s, exporter=%s)",
|
|
self.config.endpoint,
|
|
self.config.exporter,
|
|
)
|
|
|
|
def _init_otel_logger_on_litellm_proxy(self):
|
|
"""
|
|
Override: Arize Phoenix should NOT overwrite the proxy's
|
|
``open_telemetry_logger``. That attribute is reserved for the
|
|
primary ``otel`` callback which handles proxy-level parent spans.
|
|
"""
|
|
pass
|
|
|
|
def set_attributes(self, span: Span, kwargs, response_obj: Optional[Any]):
|
|
ArizePhoenixLogger.set_arize_phoenix_attributes(span, kwargs, response_obj)
|
|
return
|
|
|
|
@staticmethod
|
|
def set_arize_phoenix_attributes(span: Span, kwargs, response_obj):
|
|
from litellm.integrations.opentelemetry_utils.base_otel_llm_obs_attributes import (
|
|
safe_set_attribute,
|
|
)
|
|
|
|
_utils.set_attributes(span, kwargs, response_obj, ArizeOTELAttributes)
|
|
|
|
# Dynamic project name: check metadata first, then fall back to env var config
|
|
dynamic_project_name = ArizePhoenixLogger._get_dynamic_project_name(kwargs)
|
|
if dynamic_project_name:
|
|
safe_set_attribute(span, "openinference.project.name", dynamic_project_name)
|
|
else:
|
|
# Fall back to static config from env var
|
|
config = ArizePhoenixLogger.get_arize_phoenix_config()
|
|
if config.project_name:
|
|
safe_set_attribute(
|
|
span, "openinference.project.name", config.project_name
|
|
)
|
|
|
|
return
|
|
|
|
@staticmethod
|
|
def _get_dynamic_project_name(kwargs) -> Optional[str]:
|
|
"""
|
|
Retrieve dynamic Phoenix project name from request metadata.
|
|
|
|
Users can set `metadata.phoenix_project_name` in their request to route
|
|
traces to different Phoenix projects dynamically.
|
|
"""
|
|
standard_logging_payload = kwargs.get("standard_logging_object")
|
|
if isinstance(standard_logging_payload, dict):
|
|
metadata = standard_logging_payload.get("metadata")
|
|
if isinstance(metadata, dict):
|
|
project_name = metadata.get("phoenix_project_name")
|
|
if project_name:
|
|
return str(project_name)
|
|
|
|
# Also check litellm_params.metadata for SDK usage
|
|
litellm_params = kwargs.get("litellm_params")
|
|
if isinstance(litellm_params, dict):
|
|
metadata = litellm_params.get("metadata") or {}
|
|
else:
|
|
metadata = {}
|
|
if isinstance(metadata, dict):
|
|
project_name = metadata.get("phoenix_project_name")
|
|
if project_name:
|
|
return str(project_name)
|
|
|
|
return None
|
|
|
|
def _get_phoenix_context(self, kwargs):
|
|
"""
|
|
Build a trace context for Phoenix's dedicated TracerProvider.
|
|
|
|
The base ``_get_span_context`` returns parent spans from the global
|
|
TracerProvider (the ``otel`` callback). Those spans live on a
|
|
*different* TracerProvider, so they won't appear in Phoenix — using
|
|
them as parents just creates broken links.
|
|
|
|
Instead we:
|
|
1. Honour an incoming ``traceparent`` HTTP header (distributed tracing).
|
|
2. In proxy mode, create our *own* parent span on Phoenix's tracer
|
|
so the hierarchy is visible end-to-end inside Phoenix.
|
|
3. In SDK (non-proxy) mode, just return (None, None) for a root span.
|
|
"""
|
|
from opentelemetry import trace
|
|
|
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
|
proxy_server_request = litellm_params.get("proxy_server_request", {}) or {}
|
|
headers = proxy_server_request.get("headers", {}) or {}
|
|
|
|
# Propagate distributed trace context if the caller sent a traceparent
|
|
traceparent_ctx = (
|
|
self.get_traceparent_from_header(headers=headers)
|
|
if headers.get("traceparent")
|
|
else None
|
|
)
|
|
|
|
is_proxy_mode = bool(proxy_server_request)
|
|
|
|
if is_proxy_mode:
|
|
# Create a parent span on Phoenix's own tracer so both parent
|
|
# and child are exported to Phoenix.
|
|
start_time_val = kwargs.get("start_time", kwargs.get("api_call_start_time"))
|
|
parent_span = self.tracer.start_span(
|
|
name="litellm_proxy_request",
|
|
start_time=self._to_ns(start_time_val)
|
|
if start_time_val is not None
|
|
else None,
|
|
context=traceparent_ctx,
|
|
kind=self.span_kind.SERVER,
|
|
)
|
|
ctx = trace.set_span_in_context(parent_span)
|
|
return ctx, parent_span
|
|
|
|
# SDK mode — no parent span needed
|
|
return traceparent_ctx, None
|
|
|
|
def _handle_success(self, kwargs, response_obj, start_time, end_time):
|
|
"""
|
|
Override to always create spans on ArizePhoenixLogger's dedicated TracerProvider.
|
|
|
|
The base class's ``_get_span_context`` would find the parent span created by
|
|
the ``otel`` callback on the *global* TracerProvider. That span is invisible
|
|
in Phoenix (different exporter pipeline), so we ignore it and build our own
|
|
hierarchy via ``_get_phoenix_context``.
|
|
"""
|
|
from opentelemetry.trace import Status, StatusCode
|
|
|
|
verbose_logger.debug(
|
|
"ArizePhoenixLogger: Logging kwargs: %s, OTEL config settings=%s",
|
|
kwargs,
|
|
self.config,
|
|
)
|
|
|
|
ctx, parent_span = self._get_phoenix_context(kwargs)
|
|
|
|
# Create litellm_request span (child of our parent when in proxy mode)
|
|
span = self.tracer.start_span(
|
|
name=self._get_span_name(kwargs),
|
|
start_time=self._to_ns(start_time),
|
|
context=ctx,
|
|
)
|
|
span.set_status(Status(StatusCode.OK))
|
|
self.set_attributes(span, kwargs, response_obj)
|
|
|
|
# Raw-request sub-span (if enabled) — must be created before
|
|
# ending the parent span so the hierarchy is valid.
|
|
self._maybe_log_raw_request(kwargs, response_obj, start_time, end_time, span)
|
|
span.end(end_time=self._to_ns(end_time))
|
|
|
|
# Guardrail span
|
|
self._create_guardrail_span(kwargs=kwargs, context=ctx)
|
|
|
|
# Annotate and close our proxy parent span
|
|
if parent_span is not None:
|
|
parent_span.set_status(Status(StatusCode.OK))
|
|
self.set_attributes(parent_span, kwargs, response_obj)
|
|
parent_span.end(end_time=self._to_ns(end_time))
|
|
|
|
# Metrics & cost recording
|
|
self._record_metrics(kwargs, response_obj, start_time, end_time)
|
|
|
|
# Semantic logs
|
|
if self.config.enable_events:
|
|
self._emit_semantic_logs(kwargs, response_obj, span)
|
|
|
|
def _handle_failure(self, kwargs, response_obj, start_time, end_time):
|
|
"""
|
|
Override to always create failure spans on ArizePhoenixLogger's dedicated
|
|
TracerProvider. Mirrors ``_handle_success`` but sets ERROR status.
|
|
"""
|
|
from opentelemetry.trace import Status, StatusCode
|
|
|
|
verbose_logger.debug(
|
|
"ArizePhoenixLogger: Failure - Logging kwargs: %s, OTEL config settings=%s",
|
|
kwargs,
|
|
self.config,
|
|
)
|
|
|
|
ctx, parent_span = self._get_phoenix_context(kwargs)
|
|
|
|
# Create litellm_request span (child of our parent when in proxy mode)
|
|
span = self.tracer.start_span(
|
|
name=self._get_span_name(kwargs),
|
|
start_time=self._to_ns(start_time),
|
|
context=ctx,
|
|
)
|
|
span.set_status(Status(StatusCode.ERROR))
|
|
self.set_attributes(span, kwargs, response_obj)
|
|
self._record_exception_on_span(span=span, kwargs=kwargs)
|
|
span.end(end_time=self._to_ns(end_time))
|
|
|
|
# Guardrail span
|
|
self._create_guardrail_span(kwargs=kwargs, context=ctx)
|
|
|
|
# Annotate and close our proxy parent span
|
|
if parent_span is not None:
|
|
parent_span.set_status(Status(StatusCode.ERROR))
|
|
self.set_attributes(parent_span, kwargs, response_obj)
|
|
self._record_exception_on_span(span=parent_span, kwargs=kwargs)
|
|
parent_span.end(end_time=self._to_ns(end_time))
|
|
|
|
@staticmethod
|
|
def get_arize_phoenix_config() -> ArizePhoenixConfig:
|
|
"""
|
|
Retrieves the Arize Phoenix configuration based on environment variables.
|
|
Returns:
|
|
ArizePhoenixConfig: A Pydantic model containing Arize Phoenix configuration.
|
|
"""
|
|
api_key = os.environ.get("PHOENIX_API_KEY", None)
|
|
|
|
collector_endpoint = os.environ.get("PHOENIX_COLLECTOR_HTTP_ENDPOINT", None)
|
|
|
|
if not collector_endpoint:
|
|
grpc_endpoint = os.environ.get("PHOENIX_COLLECTOR_ENDPOINT", None)
|
|
http_endpoint = os.environ.get("PHOENIX_COLLECTOR_HTTP_ENDPOINT", None)
|
|
collector_endpoint = http_endpoint or grpc_endpoint
|
|
|
|
endpoint = None
|
|
protocol: Protocol = "otlp_http"
|
|
|
|
if collector_endpoint:
|
|
# Parse the endpoint to determine protocol
|
|
if collector_endpoint.startswith("grpc://") or (
|
|
":4317" in collector_endpoint and "/v1/traces" not in collector_endpoint
|
|
):
|
|
endpoint = collector_endpoint
|
|
protocol = "otlp_grpc"
|
|
else:
|
|
# Phoenix Cloud endpoints (app.phoenix.arize.com) include the space in the URL
|
|
if "app.phoenix.arize.com" in collector_endpoint:
|
|
endpoint = collector_endpoint
|
|
protocol = "otlp_http"
|
|
# For other HTTP endpoints, ensure they have the correct path
|
|
elif "/v1/traces" not in collector_endpoint:
|
|
if collector_endpoint.endswith("/v1"):
|
|
endpoint = collector_endpoint + "/traces"
|
|
elif collector_endpoint.endswith("/"):
|
|
endpoint = f"{collector_endpoint}v1/traces"
|
|
else:
|
|
endpoint = f"{collector_endpoint}/v1/traces"
|
|
else:
|
|
endpoint = collector_endpoint
|
|
protocol = "otlp_http"
|
|
else:
|
|
# If no endpoint specified, self hosted phoenix
|
|
endpoint = "http://localhost:6006/v1/traces"
|
|
protocol = "otlp_http"
|
|
verbose_logger.debug(
|
|
f"No PHOENIX_COLLECTOR_ENDPOINT found, using default local Phoenix endpoint: {endpoint}"
|
|
)
|
|
|
|
otlp_auth_headers = None
|
|
if api_key is not None:
|
|
otlp_auth_headers = f"Authorization=Bearer {api_key}"
|
|
elif "app.phoenix.arize.com" in endpoint:
|
|
# Phoenix Cloud requires an API key
|
|
raise ValueError(
|
|
"PHOENIX_API_KEY must be set when using Phoenix Cloud (app.phoenix.arize.com)."
|
|
)
|
|
|
|
project_name = os.environ.get("PHOENIX_PROJECT_NAME", "default")
|
|
|
|
return ArizePhoenixConfig(
|
|
otlp_auth_headers=otlp_auth_headers,
|
|
protocol=protocol,
|
|
endpoint=endpoint,
|
|
project_name=project_name,
|
|
)
|
|
|
|
## cannot suppress additional proxy server spans, removed previous methods.
|
|
|
|
async def async_health_check(self):
|
|
config = self.get_arize_phoenix_config()
|
|
|
|
if not config.otlp_auth_headers:
|
|
return {
|
|
"status": "unhealthy",
|
|
"error_message": "PHOENIX_API_KEY environment variable not set",
|
|
}
|
|
|
|
return {
|
|
"status": "healthy",
|
|
"message": "Arize-Phoenix credentials are configured properly",
|
|
}
|