chore: initial snapshot for gitea/github upload
This commit is contained in:
@@ -0,0 +1,124 @@
|
||||
"""Core export engine for Focus integrations (heavy dependencies)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import polars as pl
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
|
||||
from .database import FocusLiteLLMDatabase
|
||||
from .destinations import FocusDestinationFactory, FocusTimeWindow
|
||||
from .serializers import FocusParquetSerializer, FocusSerializer
|
||||
from .transformer import FocusTransformer
|
||||
|
||||
|
||||
class FocusExportEngine:
|
||||
"""Engine that fetches, normalizes, and uploads Focus exports."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
provider: str,
|
||||
export_format: str,
|
||||
prefix: str,
|
||||
destination_config: Optional[dict[str, Any]] = None,
|
||||
) -> None:
|
||||
self.provider = provider
|
||||
self.export_format = export_format
|
||||
self.prefix = prefix
|
||||
self._destination = FocusDestinationFactory.create(
|
||||
provider=self.provider,
|
||||
prefix=self.prefix,
|
||||
config=destination_config,
|
||||
)
|
||||
self._serializer = self._init_serializer()
|
||||
self._transformer = FocusTransformer()
|
||||
self._database = FocusLiteLLMDatabase()
|
||||
|
||||
def _init_serializer(self) -> FocusSerializer:
|
||||
if self.export_format != "parquet":
|
||||
raise NotImplementedError("Only parquet export supported currently")
|
||||
return FocusParquetSerializer()
|
||||
|
||||
async def dry_run_export_usage_data(self, limit: Optional[int]) -> Dict[str, Any]:
|
||||
data = await self._database.get_usage_data(limit=limit)
|
||||
normalized = self._transformer.transform(data)
|
||||
|
||||
usage_sample = data.head(min(50, len(data))).to_dicts()
|
||||
normalized_sample = normalized.head(min(50, len(normalized))).to_dicts()
|
||||
|
||||
summary = {
|
||||
"total_records": len(normalized),
|
||||
"total_spend": self._sum_column(normalized, "spend"),
|
||||
"total_tokens": self._sum_column(normalized, "total_tokens"),
|
||||
"unique_teams": self._count_unique(normalized, "team_id"),
|
||||
"unique_models": self._count_unique(normalized, "model"),
|
||||
}
|
||||
|
||||
return {
|
||||
"usage_data": usage_sample,
|
||||
"normalized_data": normalized_sample,
|
||||
"summary": summary,
|
||||
}
|
||||
|
||||
async def export_window(
|
||||
self,
|
||||
*,
|
||||
window: FocusTimeWindow,
|
||||
limit: Optional[int],
|
||||
) -> None:
|
||||
data = await self._database.get_usage_data(
|
||||
limit=limit,
|
||||
start_time_utc=window.start_time,
|
||||
end_time_utc=window.end_time,
|
||||
)
|
||||
if data.is_empty():
|
||||
verbose_logger.debug("Focus export: no usage data for window %s", window)
|
||||
return
|
||||
|
||||
normalized = self._transformer.transform(data)
|
||||
if normalized.is_empty():
|
||||
verbose_logger.debug(
|
||||
"Focus export: normalized data empty for window %s", window
|
||||
)
|
||||
return
|
||||
|
||||
await self._serialize_and_upload(normalized, window)
|
||||
|
||||
async def _serialize_and_upload(
|
||||
self, frame: pl.DataFrame, window: FocusTimeWindow
|
||||
) -> None:
|
||||
payload = self._serializer.serialize(frame)
|
||||
if not payload:
|
||||
verbose_logger.debug("Focus export: serializer returned empty payload")
|
||||
return
|
||||
await self._destination.deliver(
|
||||
content=payload,
|
||||
time_window=window,
|
||||
filename=self._build_filename(),
|
||||
)
|
||||
|
||||
def _build_filename(self) -> str:
|
||||
if not self._serializer.extension:
|
||||
raise ValueError("Serializer must declare a file extension")
|
||||
return f"usage.{self._serializer.extension}"
|
||||
|
||||
@staticmethod
|
||||
def _sum_column(frame: pl.DataFrame, column: str) -> float:
|
||||
if frame.is_empty() or column not in frame.columns:
|
||||
return 0.0
|
||||
value = frame.select(pl.col(column).sum().alias("sum")).row(0)[0]
|
||||
if value is None:
|
||||
return 0.0
|
||||
return float(value)
|
||||
|
||||
@staticmethod
|
||||
def _count_unique(frame: pl.DataFrame, column: str) -> int:
|
||||
if frame.is_empty() or column not in frame.columns:
|
||||
return 0
|
||||
value = frame.select(pl.col(column).n_unique().alias("unique")).row(0)[0]
|
||||
if value is None:
|
||||
return 0
|
||||
return int(value)
|
||||
Reference in New Issue
Block a user