chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,124 @@
"""Core export engine for Focus integrations (heavy dependencies)."""
from __future__ import annotations
from typing import Any, Dict, Optional
import polars as pl
from litellm._logging import verbose_logger
from .database import FocusLiteLLMDatabase
from .destinations import FocusDestinationFactory, FocusTimeWindow
from .serializers import FocusParquetSerializer, FocusSerializer
from .transformer import FocusTransformer
class FocusExportEngine:
"""Engine that fetches, normalizes, and uploads Focus exports."""
def __init__(
self,
*,
provider: str,
export_format: str,
prefix: str,
destination_config: Optional[dict[str, Any]] = None,
) -> None:
self.provider = provider
self.export_format = export_format
self.prefix = prefix
self._destination = FocusDestinationFactory.create(
provider=self.provider,
prefix=self.prefix,
config=destination_config,
)
self._serializer = self._init_serializer()
self._transformer = FocusTransformer()
self._database = FocusLiteLLMDatabase()
def _init_serializer(self) -> FocusSerializer:
if self.export_format != "parquet":
raise NotImplementedError("Only parquet export supported currently")
return FocusParquetSerializer()
async def dry_run_export_usage_data(self, limit: Optional[int]) -> Dict[str, Any]:
data = await self._database.get_usage_data(limit=limit)
normalized = self._transformer.transform(data)
usage_sample = data.head(min(50, len(data))).to_dicts()
normalized_sample = normalized.head(min(50, len(normalized))).to_dicts()
summary = {
"total_records": len(normalized),
"total_spend": self._sum_column(normalized, "spend"),
"total_tokens": self._sum_column(normalized, "total_tokens"),
"unique_teams": self._count_unique(normalized, "team_id"),
"unique_models": self._count_unique(normalized, "model"),
}
return {
"usage_data": usage_sample,
"normalized_data": normalized_sample,
"summary": summary,
}
async def export_window(
self,
*,
window: FocusTimeWindow,
limit: Optional[int],
) -> None:
data = await self._database.get_usage_data(
limit=limit,
start_time_utc=window.start_time,
end_time_utc=window.end_time,
)
if data.is_empty():
verbose_logger.debug("Focus export: no usage data for window %s", window)
return
normalized = self._transformer.transform(data)
if normalized.is_empty():
verbose_logger.debug(
"Focus export: normalized data empty for window %s", window
)
return
await self._serialize_and_upload(normalized, window)
async def _serialize_and_upload(
self, frame: pl.DataFrame, window: FocusTimeWindow
) -> None:
payload = self._serializer.serialize(frame)
if not payload:
verbose_logger.debug("Focus export: serializer returned empty payload")
return
await self._destination.deliver(
content=payload,
time_window=window,
filename=self._build_filename(),
)
def _build_filename(self) -> str:
if not self._serializer.extension:
raise ValueError("Serializer must declare a file extension")
return f"usage.{self._serializer.extension}"
@staticmethod
def _sum_column(frame: pl.DataFrame, column: str) -> float:
if frame.is_empty() or column not in frame.columns:
return 0.0
value = frame.select(pl.col(column).sum().alias("sum")).row(0)[0]
if value is None:
return 0.0
return float(value)
@staticmethod
def _count_unique(frame: pl.DataFrame, column: str) -> int:
if frame.is_empty() or column not in frame.columns:
return 0
value = frame.select(pl.col(column).n_unique().alias("unique")).row(0)[0]
if value is None:
return 0
return int(value)