chore: initial snapshot for gitea/github upload
This commit is contained in:
@@ -0,0 +1,422 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, cast
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.constants import CLOUDZERO_EXPORT_INTERVAL_MINUTES
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
else:
|
||||
AsyncIOScheduler = Any
|
||||
|
||||
|
||||
class CloudZeroLogger(CustomLogger):
|
||||
"""
|
||||
CloudZero Logger for exporting LiteLLM usage data to CloudZero AnyCost API.
|
||||
|
||||
Environment Variables:
|
||||
CLOUDZERO_API_KEY: CloudZero API key for authentication
|
||||
CLOUDZERO_CONNECTION_ID: CloudZero connection ID for data submission
|
||||
CLOUDZERO_TIMEZONE: Timezone for date handling (default: UTC)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
connection_id: Optional[str] = None,
|
||||
timezone: Optional[str] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize CloudZero logger with configuration from parameters or environment variables."""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Get configuration from parameters first, fall back to environment variables
|
||||
self.api_key = api_key or os.getenv("CLOUDZERO_API_KEY")
|
||||
self.connection_id = connection_id or os.getenv("CLOUDZERO_CONNECTION_ID")
|
||||
self.timezone = timezone or os.getenv("CLOUDZERO_TIMEZONE", "UTC")
|
||||
verbose_logger.debug(
|
||||
f"CloudZero Logger initialized with connection ID: {self.connection_id}, timezone: {self.timezone}"
|
||||
)
|
||||
|
||||
async def initialize_cloudzero_export_job(self):
|
||||
"""
|
||||
Handler for initializing CloudZero export job.
|
||||
|
||||
Runs when CloudZero logger starts up.
|
||||
|
||||
- If redis cache is available, we use the pod lock manager to acquire a lock and export the data.
|
||||
- Ensures only one pod exports the data at a time.
|
||||
- If redis cache is not available, we export the data directly.
|
||||
"""
|
||||
from litellm.constants import (
|
||||
CLOUDZERO_EXPORT_USAGE_DATA_JOB_NAME,
|
||||
)
|
||||
from litellm.proxy.proxy_server import proxy_logging_obj
|
||||
|
||||
pod_lock_manager = proxy_logging_obj.db_spend_update_writer.pod_lock_manager
|
||||
|
||||
# if using redis, ensure only one pod exports the data at a time
|
||||
if pod_lock_manager and pod_lock_manager.redis_cache:
|
||||
if await pod_lock_manager.acquire_lock(
|
||||
cronjob_id=CLOUDZERO_EXPORT_USAGE_DATA_JOB_NAME
|
||||
):
|
||||
try:
|
||||
await self._hourly_usage_data_export()
|
||||
finally:
|
||||
await pod_lock_manager.release_lock(
|
||||
cronjob_id=CLOUDZERO_EXPORT_USAGE_DATA_JOB_NAME
|
||||
)
|
||||
else:
|
||||
# if not using redis, export the data directly
|
||||
await self._hourly_usage_data_export()
|
||||
|
||||
async def _hourly_usage_data_export(self):
|
||||
"""
|
||||
Exports the hourly usage data to CloudZero.
|
||||
|
||||
Start time: 1 hour ago
|
||||
End time: current time
|
||||
"""
|
||||
from datetime import timedelta, timezone
|
||||
|
||||
from litellm.constants import CLOUDZERO_MAX_FETCHED_DATA_RECORDS
|
||||
|
||||
current_time_utc = datetime.now(timezone.utc)
|
||||
# Mitigates the possibility of missing spend if an hour is skipped due to a restart in an ephemeral environment
|
||||
one_hour_ago_utc = current_time_utc - timedelta(
|
||||
minutes=CLOUDZERO_EXPORT_INTERVAL_MINUTES * 2
|
||||
)
|
||||
await self.export_usage_data(
|
||||
limit=CLOUDZERO_MAX_FETCHED_DATA_RECORDS,
|
||||
operation="replace_hourly",
|
||||
start_time_utc=one_hour_ago_utc,
|
||||
end_time_utc=current_time_utc,
|
||||
)
|
||||
|
||||
async def export_usage_data(
|
||||
self,
|
||||
limit: Optional[int] = None,
|
||||
operation: str = "replace_hourly",
|
||||
start_time_utc: Optional[datetime] = None,
|
||||
end_time_utc: Optional[datetime] = None,
|
||||
):
|
||||
"""
|
||||
Exports the usage data to CloudZero.
|
||||
|
||||
- Reads data from the DB
|
||||
- Transforms the data to the CloudZero format
|
||||
- Sends the data to CloudZero
|
||||
|
||||
Args:
|
||||
limit: Optional limit on number of records to export
|
||||
operation: CloudZero operation type ("replace_hourly" or "sum")
|
||||
"""
|
||||
from litellm.integrations.cloudzero.cz_stream_api import CloudZeroStreamer
|
||||
from litellm.integrations.cloudzero.database import LiteLLMDatabase
|
||||
from litellm.integrations.cloudzero.transform import CBFTransformer
|
||||
|
||||
try:
|
||||
verbose_logger.debug("CloudZero Logger: Starting usage data export")
|
||||
|
||||
# Validate required configuration
|
||||
if not self.api_key or not self.connection_id:
|
||||
raise ValueError(
|
||||
"CloudZero configuration missing. Please set CLOUDZERO_API_KEY and CLOUDZERO_CONNECTION_ID environment variables."
|
||||
)
|
||||
|
||||
# Initialize database connection and load data
|
||||
database = LiteLLMDatabase()
|
||||
verbose_logger.debug("CloudZero Logger: Loading usage data from database")
|
||||
data = await database.get_usage_data(
|
||||
limit=limit, start_time_utc=start_time_utc, end_time_utc=end_time_utc
|
||||
)
|
||||
|
||||
if data.is_empty():
|
||||
verbose_logger.debug("CloudZero Logger: No usage data found to export")
|
||||
return
|
||||
|
||||
verbose_logger.debug(f"CloudZero Logger: Processing {len(data)} records")
|
||||
|
||||
# Transform data to CloudZero CBF format
|
||||
transformer = CBFTransformer()
|
||||
cbf_data = transformer.transform(data)
|
||||
|
||||
if cbf_data.is_empty():
|
||||
verbose_logger.warning(
|
||||
"CloudZero Logger: No valid data after transformation"
|
||||
)
|
||||
return
|
||||
|
||||
# Send data to CloudZero
|
||||
streamer = CloudZeroStreamer(
|
||||
api_key=self.api_key,
|
||||
connection_id=self.connection_id,
|
||||
user_timezone=self.timezone,
|
||||
)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"CloudZero Logger: Transmitting {len(cbf_data)} records to CloudZero"
|
||||
)
|
||||
streamer.send_batched(cbf_data, operation=operation)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"CloudZero Logger: Successfully exported {len(cbf_data)} records to CloudZero"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
f"CloudZero Logger: Error exporting usage data: {str(e)}"
|
||||
)
|
||||
raise
|
||||
|
||||
async def dry_run_export_usage_data(self, limit: Optional[int] = 10000):
|
||||
"""
|
||||
Returns the data that would be exported to CloudZero without actually sending it.
|
||||
|
||||
Args:
|
||||
limit: Limit number of records to display (default: 10000)
|
||||
|
||||
Returns:
|
||||
dict: Contains usage_data, cbf_data, and summary statistics
|
||||
"""
|
||||
from litellm.integrations.cloudzero.database import LiteLLMDatabase
|
||||
from litellm.integrations.cloudzero.transform import CBFTransformer
|
||||
|
||||
try:
|
||||
verbose_logger.debug("CloudZero Logger: Starting dry run export")
|
||||
|
||||
# Initialize database connection and load data
|
||||
database = LiteLLMDatabase()
|
||||
verbose_logger.debug("CloudZero Logger: Loading usage data for dry run")
|
||||
data = await database.get_usage_data(limit=limit)
|
||||
|
||||
if data.is_empty():
|
||||
verbose_logger.warning("CloudZero Dry Run: No usage data found")
|
||||
return {
|
||||
"usage_data": [],
|
||||
"cbf_data": [],
|
||||
"summary": {
|
||||
"total_records": 0,
|
||||
"total_cost": 0,
|
||||
"total_tokens": 0,
|
||||
"unique_accounts": 0,
|
||||
"unique_services": 0,
|
||||
},
|
||||
}
|
||||
|
||||
verbose_logger.debug(
|
||||
f"CloudZero Dry Run: Processing {len(data)} records..."
|
||||
)
|
||||
|
||||
# Convert usage data to dict format for response
|
||||
usage_data_sample = data.head(50).to_dicts() # Return first 50 rows
|
||||
|
||||
# Transform data to CloudZero CBF format
|
||||
transformer = CBFTransformer()
|
||||
cbf_data = transformer.transform(data)
|
||||
|
||||
if cbf_data.is_empty():
|
||||
verbose_logger.warning(
|
||||
"CloudZero Dry Run: No valid data after transformation"
|
||||
)
|
||||
return {
|
||||
"usage_data": usage_data_sample,
|
||||
"cbf_data": [],
|
||||
"summary": {
|
||||
"total_records": len(usage_data_sample),
|
||||
"total_cost": sum(
|
||||
row.get("spend", 0) for row in usage_data_sample
|
||||
),
|
||||
"total_tokens": sum(
|
||||
row.get("prompt_tokens", 0)
|
||||
+ row.get("completion_tokens", 0)
|
||||
for row in usage_data_sample
|
||||
),
|
||||
"unique_accounts": 0,
|
||||
"unique_services": 0,
|
||||
},
|
||||
}
|
||||
|
||||
# Convert CBF data to dict format for response
|
||||
cbf_data_dict = cbf_data.to_dicts()
|
||||
|
||||
# Calculate summary statistics
|
||||
total_cost = sum(record.get("cost/cost", 0) for record in cbf_data_dict)
|
||||
unique_accounts = len(
|
||||
set(
|
||||
record.get("resource/account", "")
|
||||
for record in cbf_data_dict
|
||||
if record.get("resource/account")
|
||||
)
|
||||
)
|
||||
unique_services = len(
|
||||
set(
|
||||
record.get("resource/service", "")
|
||||
for record in cbf_data_dict
|
||||
if record.get("resource/service")
|
||||
)
|
||||
)
|
||||
total_tokens = sum(
|
||||
record.get("usage/amount", 0) for record in cbf_data_dict
|
||||
)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"CloudZero Logger: Dry run completed for {len(cbf_data)} records"
|
||||
)
|
||||
|
||||
return {
|
||||
"usage_data": usage_data_sample,
|
||||
"cbf_data": cbf_data_dict,
|
||||
"summary": {
|
||||
"total_records": len(cbf_data_dict),
|
||||
"total_cost": total_cost,
|
||||
"total_tokens": total_tokens,
|
||||
"unique_accounts": unique_accounts,
|
||||
"unique_services": unique_services,
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"CloudZero Logger: Error in dry run export: {str(e)}")
|
||||
verbose_logger.error(f"CloudZero Dry Run Error: {str(e)}")
|
||||
raise
|
||||
|
||||
def _display_cbf_data_on_screen(self, cbf_data):
|
||||
"""Display CBF transformed data in a formatted table on screen."""
|
||||
from rich.box import SIMPLE
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
console = Console()
|
||||
|
||||
if cbf_data.is_empty():
|
||||
console.print("[yellow]No CBF data to display[/yellow]")
|
||||
return
|
||||
|
||||
console.print(
|
||||
f"\n[bold green]💰 CloudZero CBF Transformed Data ({len(cbf_data)} records)[/bold green]"
|
||||
)
|
||||
|
||||
# Convert to dicts for easier processing
|
||||
records = cbf_data.to_dicts()
|
||||
|
||||
# Create main CBF table
|
||||
cbf_table = Table(
|
||||
show_header=True, header_style="bold cyan", box=SIMPLE, padding=(0, 1)
|
||||
)
|
||||
cbf_table.add_column("time/usage_start", style="blue", no_wrap=False)
|
||||
cbf_table.add_column("cost/cost", style="green", justify="right", no_wrap=False)
|
||||
cbf_table.add_column(
|
||||
"entity_type", style="magenta", justify="right", no_wrap=False
|
||||
)
|
||||
cbf_table.add_column(
|
||||
"entity_id", style="magenta", justify="right", no_wrap=False
|
||||
)
|
||||
cbf_table.add_column("team_id", style="cyan", no_wrap=False)
|
||||
cbf_table.add_column("team_alias", style="cyan", no_wrap=False)
|
||||
cbf_table.add_column("user_email", style="cyan", no_wrap=False)
|
||||
cbf_table.add_column("api_key_alias", style="yellow", no_wrap=False)
|
||||
cbf_table.add_column(
|
||||
"usage/amount", style="yellow", justify="right", no_wrap=False
|
||||
)
|
||||
cbf_table.add_column("resource/id", style="magenta", no_wrap=False)
|
||||
cbf_table.add_column("resource/service", style="cyan", no_wrap=False)
|
||||
cbf_table.add_column("resource/account", style="white", no_wrap=False)
|
||||
cbf_table.add_column("resource/region", style="dim", no_wrap=False)
|
||||
|
||||
for record in records:
|
||||
# Use proper CBF field names
|
||||
time_usage_start = str(record.get("time/usage_start", "N/A"))
|
||||
cost_cost = str(record.get("cost/cost", 0))
|
||||
usage_amount = str(record.get("usage/amount", 0))
|
||||
resource_id = str(record.get("resource/id", "N/A"))
|
||||
resource_service = str(record.get("resource/service", "N/A"))
|
||||
resource_account = str(record.get("resource/account", "N/A"))
|
||||
resource_region = str(record.get("resource/region", "N/A"))
|
||||
entity_type = str(record.get("entity_type", "N/A"))
|
||||
entity_id = str(record.get("entity_id", "N/A"))
|
||||
team_id = str(record.get("resource/tag:team_id", "N/A"))
|
||||
team_alias = str(record.get("resource/tag:team_alias", "N/A"))
|
||||
user_email = str(record.get("resource/tag:user_email", "N/A"))
|
||||
api_key_alias = str(record.get("resource/tag:api_key_alias", "N/A"))
|
||||
|
||||
cbf_table.add_row(
|
||||
time_usage_start,
|
||||
cost_cost,
|
||||
entity_type,
|
||||
entity_id,
|
||||
team_id,
|
||||
team_alias,
|
||||
user_email,
|
||||
api_key_alias,
|
||||
usage_amount,
|
||||
resource_id,
|
||||
resource_service,
|
||||
resource_account,
|
||||
resource_region,
|
||||
)
|
||||
|
||||
console.print(cbf_table)
|
||||
|
||||
# Show summary statistics
|
||||
total_cost = sum(record.get("cost/cost", 0) for record in records)
|
||||
unique_accounts = len(
|
||||
set(
|
||||
record.get("resource/account", "")
|
||||
for record in records
|
||||
if record.get("resource/account")
|
||||
)
|
||||
)
|
||||
unique_services = len(
|
||||
set(
|
||||
record.get("resource/service", "")
|
||||
for record in records
|
||||
if record.get("resource/service")
|
||||
)
|
||||
)
|
||||
|
||||
# Count total tokens from usage metrics
|
||||
total_tokens = sum(record.get("usage/amount", 0) for record in records)
|
||||
|
||||
console.print("\n[bold blue]📊 CBF Summary[/bold blue]")
|
||||
console.print(f" Records: {len(records):,}")
|
||||
console.print(f" Total Cost: ${total_cost:.2f}")
|
||||
console.print(f" Total Tokens: {total_tokens:,}")
|
||||
console.print(f" Unique Accounts: {unique_accounts}")
|
||||
console.print(f" Unique Services: {unique_services}")
|
||||
|
||||
console.print(
|
||||
"\n[dim]💡 This is the CloudZero CBF format ready for AnyCost ingestion[/dim]"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def init_cloudzero_background_job(scheduler: AsyncIOScheduler):
|
||||
"""
|
||||
Initialize the CloudZero background job.
|
||||
|
||||
Starts the background job that exports the usage data to CloudZero every hour.
|
||||
"""
|
||||
from litellm.constants import CLOUDZERO_EXPORT_INTERVAL_MINUTES
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
prometheus_loggers: List[
|
||||
CustomLogger
|
||||
] = litellm.logging_callback_manager.get_custom_loggers_for_type(
|
||||
callback_type=CloudZeroLogger
|
||||
)
|
||||
# we need to get the initialized prometheus logger instance(s) and call logger.initialize_remaining_budget_metrics() on them
|
||||
verbose_logger.debug("found %s cloudzero loggers", len(prometheus_loggers))
|
||||
if len(prometheus_loggers) > 0:
|
||||
cloudzero_logger = cast(CloudZeroLogger, prometheus_loggers[0])
|
||||
verbose_logger.debug(
|
||||
"Initializing remaining budget metrics as a cron job executing every %s minutes"
|
||||
% CLOUDZERO_EXPORT_INTERVAL_MINUTES
|
||||
)
|
||||
scheduler.add_job(
|
||||
cloudzero_logger.initialize_cloudzero_export_job,
|
||||
"interval",
|
||||
minutes=CLOUDZERO_EXPORT_INTERVAL_MINUTES,
|
||||
)
|
||||
@@ -0,0 +1,161 @@
|
||||
# Copyright 2025 CloudZero
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# CHANGELOG: 2025-01-19 - Initial CZRN module for CloudZero Resource Names (erik.peterson)
|
||||
|
||||
"""CloudZero Resource Names (CZRN) generation and validation for LiteLLM resources."""
|
||||
|
||||
import re
|
||||
from enum import Enum
|
||||
from typing import Any, cast
|
||||
|
||||
import litellm
|
||||
|
||||
|
||||
class CZEntityType(str, Enum):
|
||||
TEAM = "team"
|
||||
|
||||
|
||||
class CZRNGenerator:
|
||||
"""Generate CloudZero Resource Names (CZRNs) for LiteLLM resources."""
|
||||
|
||||
CZRN_REGEX = re.compile(
|
||||
r"^czrn:([a-z0-9-]+):([a-zA-Z0-9-]+):([a-z0-9-]+):([a-z0-9-]+):([a-z0-9-]+):(.+)$"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize CZRN generator."""
|
||||
pass
|
||||
|
||||
def create_from_litellm_data(self, row: dict[str, Any]) -> str:
|
||||
"""Create a CZRN from LiteLLM daily spend data.
|
||||
|
||||
CZRN format: czrn:<service-type>:<provider>:<region>:<owner-account-id>:<resource-type>:<cloud-local-id>
|
||||
|
||||
For LiteLLM resources, we map:
|
||||
- service-type: 'litellm' (the service managing the LLM calls)
|
||||
- provider: The custom_llm_provider (e.g., 'openai', 'anthropic', 'azure')
|
||||
- region: 'cross-region' (LiteLLM operates across regions)
|
||||
- owner-account-id: The team_id or user_id (entity_id)
|
||||
- resource-type: 'llm-usage' (represents LLM usage/inference)
|
||||
- cloud-local-id: model
|
||||
"""
|
||||
service_type = "litellm"
|
||||
provider = self._normalize_provider(row.get("custom_llm_provider", "unknown"))
|
||||
region = "cross-region"
|
||||
|
||||
# Use the actual entity_id (team_id or user_id) as the owner account
|
||||
team_id = row.get("team_id", "unknown")
|
||||
owner_account_id = self._normalize_component(team_id)
|
||||
|
||||
resource_type = "llm-usage"
|
||||
|
||||
# Create a unique identifier with just the model (entity info already in owner_account_id)
|
||||
model = row.get("model", "unknown")
|
||||
|
||||
cloud_local_id = model
|
||||
|
||||
return self.create_from_components(
|
||||
service_type=service_type,
|
||||
provider=provider,
|
||||
region=region,
|
||||
owner_account_id=owner_account_id,
|
||||
resource_type=resource_type,
|
||||
cloud_local_id=cloud_local_id,
|
||||
)
|
||||
|
||||
def create_from_components(
|
||||
self,
|
||||
service_type: str,
|
||||
provider: str,
|
||||
region: str,
|
||||
owner_account_id: str,
|
||||
resource_type: str,
|
||||
cloud_local_id: str,
|
||||
) -> str:
|
||||
"""Create a CZRN from individual components."""
|
||||
# Normalize components to ensure they meet CZRN requirements
|
||||
service_type = self._normalize_component(service_type, allow_uppercase=True)
|
||||
provider = self._normalize_component(provider)
|
||||
region = self._normalize_component(region)
|
||||
owner_account_id = self._normalize_component(owner_account_id)
|
||||
resource_type = self._normalize_component(resource_type)
|
||||
# cloud_local_id can contain pipes and other characters, so don't normalize it
|
||||
|
||||
czrn = f"czrn:{service_type}:{provider}:{region}:{owner_account_id}:{resource_type}:{cloud_local_id}"
|
||||
|
||||
if not self.is_valid(czrn):
|
||||
raise ValueError(f"Generated CZRN is invalid: {czrn}")
|
||||
|
||||
return czrn
|
||||
|
||||
def is_valid(self, czrn: str) -> bool:
|
||||
"""Validate a CZRN string against the standard format."""
|
||||
return bool(self.CZRN_REGEX.match(czrn))
|
||||
|
||||
def extract_components(self, czrn: str) -> tuple[str, str, str, str, str, str]:
|
||||
"""Extract all components from a CZRN.
|
||||
|
||||
Returns: (service_type, provider, region, owner_account_id, resource_type, cloud_local_id)
|
||||
"""
|
||||
match = self.CZRN_REGEX.match(czrn)
|
||||
if not match:
|
||||
raise ValueError(f"Invalid CZRN format: {czrn}")
|
||||
|
||||
return cast(tuple[str, str, str, str, str, str], match.groups())
|
||||
|
||||
def _normalize_provider(self, provider: str) -> str:
|
||||
"""Normalize provider names to standard CZRN format."""
|
||||
# Map common provider names to CZRN standards
|
||||
provider_map = {
|
||||
litellm.LlmProviders.AZURE.value: "azure",
|
||||
litellm.LlmProviders.AZURE_AI.value: "azure",
|
||||
litellm.LlmProviders.ANTHROPIC.value: "anthropic",
|
||||
litellm.LlmProviders.BEDROCK.value: "aws",
|
||||
litellm.LlmProviders.VERTEX_AI.value: "gcp",
|
||||
litellm.LlmProviders.GEMINI.value: "google",
|
||||
litellm.LlmProviders.COHERE.value: "cohere",
|
||||
litellm.LlmProviders.HUGGINGFACE.value: "huggingface",
|
||||
litellm.LlmProviders.REPLICATE.value: "replicate",
|
||||
litellm.LlmProviders.TOGETHER_AI.value: "together-ai",
|
||||
}
|
||||
|
||||
normalized = provider.lower().replace("_", "-")
|
||||
|
||||
# use litellm custom llm provider if not in provider_map
|
||||
if normalized not in provider_map:
|
||||
return normalized
|
||||
return provider_map.get(normalized, normalized)
|
||||
|
||||
def _normalize_component(
|
||||
self, component: str, allow_uppercase: bool = False
|
||||
) -> str:
|
||||
"""Normalize a CZRN component to meet format requirements."""
|
||||
if not component:
|
||||
return "unknown"
|
||||
|
||||
# Convert to lowercase unless uppercase is allowed
|
||||
if not allow_uppercase:
|
||||
component = component.lower()
|
||||
|
||||
# Replace invalid characters with hyphens
|
||||
component = re.sub(r"[^a-zA-Z0-9-]", "-", component)
|
||||
|
||||
# Remove consecutive hyphens
|
||||
component = re.sub(r"-+", "-", component)
|
||||
|
||||
# Remove leading/trailing hyphens
|
||||
component = component.strip("-")
|
||||
|
||||
return component or "unknown"
|
||||
@@ -0,0 +1,278 @@
|
||||
# Copyright 2025 CloudZero
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# CHANGELOG: 2025-01-19 - Added pathlib for filesystem operations (erik.peterson)
|
||||
# CHANGELOG: 2025-01-19 - Migrated from pandas to polars and requests to httpx (erik.peterson)
|
||||
# CHANGELOG: 2025-01-19 - Initial output module for CSV and CloudZero API (erik.peterson)
|
||||
|
||||
"""Output modules for writing CBF data to various destinations."""
|
||||
|
||||
import zoneinfo
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import httpx
|
||||
import polars as pl
|
||||
from rich.console import Console
|
||||
|
||||
|
||||
class CloudZeroStreamer:
|
||||
"""Stream CBF data to CloudZero AnyCost API with proper batching and timezone handling."""
|
||||
|
||||
def __init__(
|
||||
self, api_key: str, connection_id: str, user_timezone: Optional[str] = None
|
||||
):
|
||||
"""Initialize CloudZero streamer with credentials."""
|
||||
self.api_key = api_key
|
||||
self.connection_id = connection_id
|
||||
self.base_url = "https://api.cloudzero.com"
|
||||
self.console = Console()
|
||||
|
||||
# Set timezone - default to UTC
|
||||
self.user_timezone: Union[zoneinfo.ZoneInfo, timezone]
|
||||
if user_timezone:
|
||||
try:
|
||||
self.user_timezone = zoneinfo.ZoneInfo(user_timezone)
|
||||
except zoneinfo.ZoneInfoNotFoundError:
|
||||
self.console.print(
|
||||
f"[yellow]Warning: Unknown timezone '{user_timezone}', using UTC[/yellow]"
|
||||
)
|
||||
self.user_timezone = timezone.utc
|
||||
else:
|
||||
self.user_timezone = timezone.utc
|
||||
|
||||
def send_batched(
|
||||
self, data: pl.DataFrame, operation: str = "replace_hourly"
|
||||
) -> None:
|
||||
"""Send CBF data in daily batches to CloudZero AnyCost API."""
|
||||
if data.is_empty():
|
||||
self.console.print("[yellow]No data to send to CloudZero[/yellow]")
|
||||
return
|
||||
|
||||
# Group data by date and send each day as a batch
|
||||
daily_batches = self._group_by_date(data)
|
||||
|
||||
if not daily_batches:
|
||||
self.console.print("[yellow]No valid daily batches to send[/yellow]")
|
||||
return
|
||||
|
||||
self.console.print(
|
||||
f"[blue]Sending {len(daily_batches)} daily batch(es) with operation '{operation}'[/blue]"
|
||||
)
|
||||
|
||||
for batch_date, batch_data in daily_batches.items():
|
||||
self._send_daily_batch(batch_date, batch_data, operation)
|
||||
|
||||
def _group_by_date(self, data: pl.DataFrame) -> dict[str, pl.DataFrame]:
|
||||
"""Group data by date, converting to UTC and validating dates."""
|
||||
daily_batches: dict[str, list[dict[str, Any]]] = {}
|
||||
|
||||
# Ensure we have the required columns
|
||||
if "time/usage_start" not in data.columns:
|
||||
self.console.print(
|
||||
"[red]Error: Missing 'time/usage_start' column for date grouping[/red]"
|
||||
)
|
||||
return {}
|
||||
|
||||
timestamp_str: Optional[str] = None
|
||||
for row in data.iter_rows(named=True):
|
||||
try:
|
||||
# Parse the timestamp and convert to UTC
|
||||
timestamp_str = row.get("time/usage_start")
|
||||
if not timestamp_str:
|
||||
continue
|
||||
|
||||
# Parse timestamp and handle timezone conversion
|
||||
dt = self._parse_and_convert_timestamp(timestamp_str)
|
||||
batch_date = dt.strftime("%Y-%m-%d")
|
||||
|
||||
if batch_date not in daily_batches:
|
||||
daily_batches[batch_date] = []
|
||||
|
||||
daily_batches[batch_date].append(row)
|
||||
|
||||
except Exception as e:
|
||||
self.console.print(
|
||||
f"[yellow]Warning: Could not process timestamp '{timestamp_str}': {e}[/yellow]"
|
||||
)
|
||||
continue
|
||||
|
||||
# Convert lists back to DataFrames
|
||||
return {
|
||||
date_key: pl.DataFrame(records)
|
||||
for date_key, records in daily_batches.items()
|
||||
if records
|
||||
}
|
||||
|
||||
def _parse_and_convert_timestamp(self, timestamp_str: str) -> datetime:
|
||||
"""Parse timestamp string and convert to UTC."""
|
||||
# Try to parse the timestamp string
|
||||
try:
|
||||
# Handle various ISO 8601 formats
|
||||
if timestamp_str.endswith("Z"):
|
||||
dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
|
||||
elif "+" in timestamp_str or timestamp_str.endswith(
|
||||
(
|
||||
"-00:00",
|
||||
"-01:00",
|
||||
"-02:00",
|
||||
"-03:00",
|
||||
"-04:00",
|
||||
"-05:00",
|
||||
"-06:00",
|
||||
"-07:00",
|
||||
"-08:00",
|
||||
"-09:00",
|
||||
"-10:00",
|
||||
"-11:00",
|
||||
"-12:00",
|
||||
"+01:00",
|
||||
"+02:00",
|
||||
"+03:00",
|
||||
"+04:00",
|
||||
"+05:00",
|
||||
"+06:00",
|
||||
"+07:00",
|
||||
"+08:00",
|
||||
"+09:00",
|
||||
"+10:00",
|
||||
"+11:00",
|
||||
"+12:00",
|
||||
)
|
||||
):
|
||||
dt = datetime.fromisoformat(timestamp_str)
|
||||
else:
|
||||
# Assume user timezone if no timezone info
|
||||
dt = datetime.fromisoformat(timestamp_str)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=self.user_timezone)
|
||||
|
||||
# Convert to UTC
|
||||
return dt.astimezone(timezone.utc)
|
||||
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Could not parse timestamp '{timestamp_str}': {e}")
|
||||
|
||||
def _send_daily_batch(
|
||||
self, batch_date: str, batch_data: pl.DataFrame, operation: str
|
||||
) -> None:
|
||||
"""Send a single daily batch to CloudZero API."""
|
||||
if batch_data.is_empty():
|
||||
return
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# Use the correct API endpoint format from documentation
|
||||
url = f"{self.base_url}/v2/connections/billing/anycost/{self.connection_id}/billing_drops"
|
||||
|
||||
# Prepare the batch payload according to AnyCost API format
|
||||
payload = self._prepare_batch_payload(batch_date, batch_data, operation)
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
self.console.print(
|
||||
f"[blue]Sending batch for {batch_date} ({len(batch_data)} records)[/blue]"
|
||||
)
|
||||
|
||||
response = client.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
|
||||
self.console.print(
|
||||
f"[green]✓ Successfully sent batch for {batch_date} ({len(batch_data)} records)[/green]"
|
||||
)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
self.console.print(
|
||||
f"[red]✗ Network error sending batch for {batch_date}: {e}[/red]"
|
||||
)
|
||||
raise
|
||||
except httpx.HTTPStatusError as e:
|
||||
self.console.print(
|
||||
f"[red]✗ HTTP error sending batch for {batch_date}: {e.response.status_code} {e.response.text}[/red]"
|
||||
)
|
||||
raise
|
||||
|
||||
def _prepare_batch_payload(
|
||||
self, batch_date: str, batch_data: pl.DataFrame, operation: str
|
||||
) -> dict[str, Any]:
|
||||
"""Prepare batch payload according to CloudZero AnyCost API format."""
|
||||
# Convert batch_date to month for the API (YYYY-MM format)
|
||||
try:
|
||||
date_obj = datetime.strptime(batch_date, "%Y-%m-%d")
|
||||
month_str = date_obj.strftime("%Y-%m")
|
||||
except ValueError:
|
||||
# Fallback to current month
|
||||
month_str = datetime.now().strftime("%Y-%m")
|
||||
|
||||
# Convert DataFrame rows to API format
|
||||
data_records = []
|
||||
for row in batch_data.iter_rows(named=True):
|
||||
record = self._convert_cbf_to_api_format(row)
|
||||
if record:
|
||||
data_records.append(record)
|
||||
|
||||
payload = {"month": month_str, "operation": operation, "data": data_records}
|
||||
|
||||
return payload
|
||||
|
||||
def _convert_cbf_to_api_format(
|
||||
self, row: dict[str, Any]
|
||||
) -> Optional[dict[str, Any]]:
|
||||
"""Convert CBF row to CloudZero API format - keeping CBF field names as CloudZero expects them."""
|
||||
try:
|
||||
# CloudZero expects CBF format field names directly, not converted names
|
||||
api_record = {}
|
||||
|
||||
# Copy all CBF fields, converting numeric values to strings as required by CloudZero
|
||||
for key, value in row.items():
|
||||
if value is not None:
|
||||
# CloudZero requires numeric values to be strings, but NOT in scientific notation
|
||||
if isinstance(value, (int, float)):
|
||||
# Format floats to avoid scientific notation
|
||||
if isinstance(value, float):
|
||||
# Use a reasonable precision that avoids scientific notation
|
||||
api_record[key] = f"{value:.10f}".rstrip("0").rstrip(".")
|
||||
else:
|
||||
api_record[key] = str(value)
|
||||
else:
|
||||
api_record[key] = value
|
||||
|
||||
# Ensure timestamp is in UTC format
|
||||
if "time/usage_start" in api_record:
|
||||
api_record["time/usage_start"] = self._ensure_utc_timestamp(
|
||||
api_record["time/usage_start"]
|
||||
)
|
||||
|
||||
return api_record
|
||||
|
||||
except Exception as e:
|
||||
self.console.print(
|
||||
f"[yellow]Warning: Could not convert record to API format: {e}[/yellow]"
|
||||
)
|
||||
return None
|
||||
|
||||
def _ensure_utc_timestamp(self, timestamp_str: str) -> str:
|
||||
"""Ensure timestamp is in UTC format for API."""
|
||||
if not timestamp_str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
try:
|
||||
dt = self._parse_and_convert_timestamp(timestamp_str)
|
||||
return dt.isoformat().replace("+00:00", "Z")
|
||||
except Exception:
|
||||
# Fallback to current time in UTC
|
||||
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
@@ -0,0 +1,101 @@
|
||||
# Copyright 2025 CloudZero
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# CHANGELOG: 2025-01-19 - Refactored to use daily spend tables for proper CBF mapping (erik.peterson)
|
||||
# CHANGELOG: 2025-01-19 - Migrated from pandas to polars for database operations (erik.peterson)
|
||||
# CHANGELOG: 2025-01-19 - Initial database module for LiteLLM data extraction (erik.peterson)
|
||||
|
||||
"""Database connection and data extraction for LiteLLM."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional, List
|
||||
|
||||
import polars as pl
|
||||
|
||||
|
||||
class LiteLLMDatabase:
|
||||
"""Handle LiteLLM PostgreSQL database connections and queries."""
|
||||
|
||||
def _ensure_prisma_client(self):
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
"""Ensure prisma client is available."""
|
||||
if prisma_client is None:
|
||||
raise Exception(
|
||||
"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||
)
|
||||
return prisma_client
|
||||
|
||||
async def get_usage_data(
|
||||
self,
|
||||
limit: Optional[int] = None,
|
||||
start_time_utc: Optional[datetime] = None,
|
||||
end_time_utc: Optional[datetime] = None,
|
||||
) -> pl.DataFrame:
|
||||
"""Retrieve usage data from LiteLLM daily user spend table."""
|
||||
client = self._ensure_prisma_client()
|
||||
|
||||
# Query to get user spend data with team information. Use parameter binding to
|
||||
# avoid SQL injection from user-supplied timestamps or limits.
|
||||
query = """
|
||||
SELECT
|
||||
dus.id,
|
||||
dus.date,
|
||||
dus.user_id,
|
||||
dus.api_key,
|
||||
dus.model,
|
||||
dus.model_group,
|
||||
dus.custom_llm_provider,
|
||||
dus.prompt_tokens,
|
||||
dus.completion_tokens,
|
||||
dus.spend,
|
||||
dus.api_requests,
|
||||
dus.successful_requests,
|
||||
dus.failed_requests,
|
||||
dus.cache_creation_input_tokens,
|
||||
dus.cache_read_input_tokens,
|
||||
dus.created_at,
|
||||
dus.updated_at,
|
||||
vt.team_id,
|
||||
vt.key_alias as api_key_alias,
|
||||
tt.team_alias,
|
||||
ut.user_email as user_email
|
||||
FROM "LiteLLM_DailyUserSpend" dus
|
||||
LEFT JOIN "LiteLLM_VerificationToken" vt ON dus.api_key = vt.token
|
||||
LEFT JOIN "LiteLLM_TeamTable" tt ON vt.team_id = tt.team_id
|
||||
LEFT JOIN "LiteLLM_UserTable" ut ON dus.user_id = ut.user_id
|
||||
WHERE ($1::timestamptz IS NULL OR dus.updated_at >= $1::timestamptz)
|
||||
AND ($2::timestamptz IS NULL OR dus.updated_at <= $2::timestamptz)
|
||||
ORDER BY dus.date DESC, dus.created_at DESC
|
||||
"""
|
||||
|
||||
params: List[Any] = [
|
||||
start_time_utc,
|
||||
end_time_utc,
|
||||
]
|
||||
|
||||
if limit is not None:
|
||||
try:
|
||||
params.append(int(limit))
|
||||
except (TypeError, ValueError):
|
||||
raise ValueError("limit must be an integer")
|
||||
query += " LIMIT $3"
|
||||
|
||||
try:
|
||||
db_response = await client.db.query_raw(query, *params)
|
||||
# Convert the response to polars DataFrame with full schema inference
|
||||
# This prevents schema mismatch errors when data types vary across rows
|
||||
return pl.DataFrame(db_response, infer_schema_length=None)
|
||||
except Exception as e:
|
||||
raise Exception(f"Error retrieving usage data: {str(e)}")
|
||||
@@ -0,0 +1,223 @@
|
||||
# Copyright 2025 CloudZero
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# CHANGELOG: 2025-01-19 - Updated CBF transformation for daily spend tables and proper CloudZero mapping (erik.peterson)
|
||||
# CHANGELOG: 2025-01-19 - Migrated from pandas to polars for data transformation (erik.peterson)
|
||||
# CHANGELOG: 2025-01-19 - Initial CBF transformation module (erik.peterson)
|
||||
|
||||
"""Transform LiteLLM data to CloudZero AnyCost CBF format."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
|
||||
import polars as pl
|
||||
|
||||
from ...types.integrations.cloudzero import CBFRecord
|
||||
from .cz_resource_names import CZEntityType, CZRNGenerator
|
||||
|
||||
|
||||
class CBFTransformer:
|
||||
"""Transform LiteLLM usage data to CloudZero Billing Format (CBF)."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize transformer with CZRN generator."""
|
||||
self.czrn_generator = CZRNGenerator()
|
||||
|
||||
def transform(self, data: pl.DataFrame) -> pl.DataFrame:
|
||||
"""Transform LiteLLM data to CBF format, dropping records with zero successful_requests or invalid CZRNs."""
|
||||
if data.is_empty():
|
||||
return pl.DataFrame()
|
||||
|
||||
# Filter out records with zero successful_requests first
|
||||
original_count = len(data)
|
||||
if "successful_requests" in data.columns:
|
||||
filtered_data = data.filter(pl.col("successful_requests") > 0)
|
||||
zero_requests_dropped = original_count - len(filtered_data)
|
||||
else:
|
||||
filtered_data = data
|
||||
zero_requests_dropped = 0
|
||||
|
||||
cbf_data = []
|
||||
czrn_dropped_count = 0
|
||||
filtered_count = len(filtered_data)
|
||||
|
||||
for row in filtered_data.iter_rows(named=True):
|
||||
try:
|
||||
cbf_record = self._create_cbf_record(row)
|
||||
# Only include the record if CZRN generation was successful
|
||||
cbf_data.append(cbf_record)
|
||||
except Exception:
|
||||
# Skip records that fail CZRN generation
|
||||
czrn_dropped_count += 1
|
||||
continue
|
||||
|
||||
# Print summary of dropped records if any
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
|
||||
if zero_requests_dropped > 0:
|
||||
console.print(
|
||||
f"[yellow]⚠️ Dropped {zero_requests_dropped:,} of {original_count:,} records with zero successful_requests[/yellow]"
|
||||
)
|
||||
|
||||
if czrn_dropped_count > 0:
|
||||
console.print(
|
||||
f"[yellow]⚠️ Dropped {czrn_dropped_count:,} of {filtered_count:,} filtered records due to invalid CZRNs[/yellow]"
|
||||
)
|
||||
|
||||
if len(cbf_data) > 0:
|
||||
console.print(
|
||||
f"[green]✓ Successfully transformed {len(cbf_data):,} records[/green]"
|
||||
)
|
||||
|
||||
return pl.DataFrame(cbf_data)
|
||||
|
||||
def _create_cbf_record(self, row: dict[str, Any]) -> CBFRecord:
|
||||
"""Create a single CBF record from LiteLLM daily spend row."""
|
||||
|
||||
# Parse date (daily spend tables use date strings like '2025-04-19')
|
||||
usage_date = self._parse_date(row.get("date"))
|
||||
|
||||
# Calculate total tokens
|
||||
prompt_tokens = int(row.get("prompt_tokens", 0))
|
||||
completion_tokens = int(row.get("completion_tokens", 0))
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
# Create CloudZero Resource Name (CZRN) as resource_id
|
||||
resource_id = self.czrn_generator.create_from_litellm_data(row)
|
||||
|
||||
# Build dimensions for CloudZero
|
||||
model = str(row.get("model", ""))
|
||||
api_key_hash = str(row.get("api_key", ""))[
|
||||
:8
|
||||
] # First 8 chars for identification
|
||||
|
||||
# Handle team information with fallbacks
|
||||
team_id = row.get("team_id")
|
||||
team_alias = row.get("team_alias")
|
||||
user_email = row.get("user_email")
|
||||
|
||||
# Use team_alias if available, otherwise team_id, otherwise fallback to 'unknown'
|
||||
entity_id = (
|
||||
str(team_alias) if team_alias else (str(team_id) if team_id else "unknown")
|
||||
)
|
||||
|
||||
# Get alias fields if they exist
|
||||
api_key_alias = row.get("api_key_alias")
|
||||
organization_alias = row.get("organization_alias")
|
||||
project_alias = row.get("project_alias")
|
||||
user_alias = row.get("user_alias")
|
||||
|
||||
dimensions = {
|
||||
"entity_type": CZEntityType.TEAM.value,
|
||||
"entity_id": entity_id,
|
||||
"team_alias": str(team_alias) if team_alias else "unknown",
|
||||
"model": model,
|
||||
"model_group": str(row.get("model_group", "")),
|
||||
"provider": str(row.get("custom_llm_provider", "")),
|
||||
"api_key_prefix": api_key_hash,
|
||||
"api_key_alias": str(row.get("api_key_alias", "")),
|
||||
"user_email": str(user_email) if user_email else "",
|
||||
"api_requests": str(row.get("api_requests", 0)),
|
||||
"successful_requests": str(row.get("successful_requests", 0)),
|
||||
"failed_requests": str(row.get("failed_requests", 0)),
|
||||
"cache_creation_tokens": str(row.get("cache_creation_input_tokens", 0)),
|
||||
"cache_read_tokens": str(row.get("cache_read_input_tokens", 0)),
|
||||
"organization_alias": str(organization_alias) if organization_alias else "",
|
||||
"project_alias": str(project_alias) if project_alias else "",
|
||||
"user_alias": str(user_alias) if user_alias else "",
|
||||
}
|
||||
|
||||
# Extract CZRN components to populate corresponding CBF columns
|
||||
czrn_components = self.czrn_generator.extract_components(resource_id)
|
||||
(
|
||||
service_type,
|
||||
provider,
|
||||
region,
|
||||
owner_account_id,
|
||||
resource_type,
|
||||
cloud_local_id,
|
||||
) = czrn_components
|
||||
|
||||
# Build resource/account as concat of api_key_alias and api_key_prefix
|
||||
resource_account = (
|
||||
f"{api_key_alias}|{api_key_hash}" if api_key_alias else api_key_hash
|
||||
)
|
||||
|
||||
# CloudZero CBF format with proper column names
|
||||
cbf_record = {
|
||||
# Required CBF fields
|
||||
"time/usage_start": usage_date.isoformat()
|
||||
if usage_date
|
||||
else None, # Required: ISO-formatted UTC datetime
|
||||
"cost/cost": float(row.get("spend", 0.0)), # Required: billed cost
|
||||
"resource/id": resource_id, # CZRN (CloudZero Resource Name)
|
||||
# Usage metrics for token consumption
|
||||
"usage/amount": total_tokens, # Numeric value of tokens consumed
|
||||
"usage/units": "tokens", # Description of token units
|
||||
# CBF fields - updated per LIT-1907
|
||||
"resource/service": str(row.get("model_group", "")), # Send model_group
|
||||
"resource/account": resource_account, # Send api_key_alias|api_key_prefix
|
||||
"resource/region": region, # Maps to CZRN region (cross-region)
|
||||
"resource/usage_family": str(
|
||||
row.get("custom_llm_provider", "")
|
||||
), # Send provider
|
||||
# Action field
|
||||
"action/operation": str(team_id) if team_id else "", # Send team_id
|
||||
# Line item details
|
||||
"lineitem/type": "Usage", # Standard usage line item
|
||||
}
|
||||
|
||||
# Add CZRN components that don't have direct CBF column mappings as resource tags
|
||||
cbf_record["resource/tag:provider"] = provider # CZRN provider component
|
||||
cbf_record[
|
||||
"resource/tag:model"
|
||||
] = cloud_local_id # CZRN cloud-local-id component (model)
|
||||
|
||||
# Add resource tags for all dimensions (using resource/tag:<key> format)
|
||||
for key, value in dimensions.items():
|
||||
if (
|
||||
value and value != "N/A" and value != "unknown"
|
||||
): # Only add meaningful tags
|
||||
cbf_record[f"resource/tag:{key}"] = str(value)
|
||||
|
||||
# Add token breakdown as resource tags for analysis (excluding total_tokens per LIT-1907)
|
||||
if prompt_tokens > 0:
|
||||
cbf_record["resource/tag:prompt_tokens"] = str(prompt_tokens)
|
||||
if completion_tokens > 0:
|
||||
cbf_record["resource/tag:completion_tokens"] = str(completion_tokens)
|
||||
|
||||
return CBFRecord(cbf_record)
|
||||
|
||||
def _parse_date(self, date_str) -> Optional[datetime]:
|
||||
"""Parse date string from daily spend tables (e.g., '2025-04-19')."""
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
if isinstance(date_str, datetime):
|
||||
return date_str
|
||||
|
||||
if isinstance(date_str, str):
|
||||
try:
|
||||
# Parse date string and set to midnight UTC for daily aggregation
|
||||
return pl.Series([date_str]).str.to_datetime("%Y-%m-%d").item()
|
||||
except Exception:
|
||||
try:
|
||||
# Fallback: try ISO format parsing
|
||||
return pl.Series([date_str]).str.to_datetime().item()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user