chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,278 @@
# Copyright 2025 CloudZero
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# CHANGELOG: 2025-01-19 - Added pathlib for filesystem operations (erik.peterson)
# CHANGELOG: 2025-01-19 - Migrated from pandas to polars and requests to httpx (erik.peterson)
# CHANGELOG: 2025-01-19 - Initial output module for CSV and CloudZero API (erik.peterson)
"""Output modules for writing CBF data to various destinations."""
import zoneinfo
from datetime import datetime, timezone
from typing import Any, Optional, Union
import httpx
import polars as pl
from rich.console import Console
class CloudZeroStreamer:
"""Stream CBF data to CloudZero AnyCost API with proper batching and timezone handling."""
def __init__(
self, api_key: str, connection_id: str, user_timezone: Optional[str] = None
):
"""Initialize CloudZero streamer with credentials."""
self.api_key = api_key
self.connection_id = connection_id
self.base_url = "https://api.cloudzero.com"
self.console = Console()
# Set timezone - default to UTC
self.user_timezone: Union[zoneinfo.ZoneInfo, timezone]
if user_timezone:
try:
self.user_timezone = zoneinfo.ZoneInfo(user_timezone)
except zoneinfo.ZoneInfoNotFoundError:
self.console.print(
f"[yellow]Warning: Unknown timezone '{user_timezone}', using UTC[/yellow]"
)
self.user_timezone = timezone.utc
else:
self.user_timezone = timezone.utc
def send_batched(
self, data: pl.DataFrame, operation: str = "replace_hourly"
) -> None:
"""Send CBF data in daily batches to CloudZero AnyCost API."""
if data.is_empty():
self.console.print("[yellow]No data to send to CloudZero[/yellow]")
return
# Group data by date and send each day as a batch
daily_batches = self._group_by_date(data)
if not daily_batches:
self.console.print("[yellow]No valid daily batches to send[/yellow]")
return
self.console.print(
f"[blue]Sending {len(daily_batches)} daily batch(es) with operation '{operation}'[/blue]"
)
for batch_date, batch_data in daily_batches.items():
self._send_daily_batch(batch_date, batch_data, operation)
def _group_by_date(self, data: pl.DataFrame) -> dict[str, pl.DataFrame]:
"""Group data by date, converting to UTC and validating dates."""
daily_batches: dict[str, list[dict[str, Any]]] = {}
# Ensure we have the required columns
if "time/usage_start" not in data.columns:
self.console.print(
"[red]Error: Missing 'time/usage_start' column for date grouping[/red]"
)
return {}
timestamp_str: Optional[str] = None
for row in data.iter_rows(named=True):
try:
# Parse the timestamp and convert to UTC
timestamp_str = row.get("time/usage_start")
if not timestamp_str:
continue
# Parse timestamp and handle timezone conversion
dt = self._parse_and_convert_timestamp(timestamp_str)
batch_date = dt.strftime("%Y-%m-%d")
if batch_date not in daily_batches:
daily_batches[batch_date] = []
daily_batches[batch_date].append(row)
except Exception as e:
self.console.print(
f"[yellow]Warning: Could not process timestamp '{timestamp_str}': {e}[/yellow]"
)
continue
# Convert lists back to DataFrames
return {
date_key: pl.DataFrame(records)
for date_key, records in daily_batches.items()
if records
}
def _parse_and_convert_timestamp(self, timestamp_str: str) -> datetime:
"""Parse timestamp string and convert to UTC."""
# Try to parse the timestamp string
try:
# Handle various ISO 8601 formats
if timestamp_str.endswith("Z"):
dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
elif "+" in timestamp_str or timestamp_str.endswith(
(
"-00:00",
"-01:00",
"-02:00",
"-03:00",
"-04:00",
"-05:00",
"-06:00",
"-07:00",
"-08:00",
"-09:00",
"-10:00",
"-11:00",
"-12:00",
"+01:00",
"+02:00",
"+03:00",
"+04:00",
"+05:00",
"+06:00",
"+07:00",
"+08:00",
"+09:00",
"+10:00",
"+11:00",
"+12:00",
)
):
dt = datetime.fromisoformat(timestamp_str)
else:
# Assume user timezone if no timezone info
dt = datetime.fromisoformat(timestamp_str)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=self.user_timezone)
# Convert to UTC
return dt.astimezone(timezone.utc)
except ValueError as e:
raise ValueError(f"Could not parse timestamp '{timestamp_str}': {e}")
def _send_daily_batch(
self, batch_date: str, batch_data: pl.DataFrame, operation: str
) -> None:
"""Send a single daily batch to CloudZero API."""
if batch_data.is_empty():
return
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
# Use the correct API endpoint format from documentation
url = f"{self.base_url}/v2/connections/billing/anycost/{self.connection_id}/billing_drops"
# Prepare the batch payload according to AnyCost API format
payload = self._prepare_batch_payload(batch_date, batch_data, operation)
try:
with httpx.Client(timeout=30.0) as client:
self.console.print(
f"[blue]Sending batch for {batch_date} ({len(batch_data)} records)[/blue]"
)
response = client.post(url, headers=headers, json=payload)
response.raise_for_status()
self.console.print(
f"[green]✓ Successfully sent batch for {batch_date} ({len(batch_data)} records)[/green]"
)
except httpx.RequestError as e:
self.console.print(
f"[red]✗ Network error sending batch for {batch_date}: {e}[/red]"
)
raise
except httpx.HTTPStatusError as e:
self.console.print(
f"[red]✗ HTTP error sending batch for {batch_date}: {e.response.status_code} {e.response.text}[/red]"
)
raise
def _prepare_batch_payload(
self, batch_date: str, batch_data: pl.DataFrame, operation: str
) -> dict[str, Any]:
"""Prepare batch payload according to CloudZero AnyCost API format."""
# Convert batch_date to month for the API (YYYY-MM format)
try:
date_obj = datetime.strptime(batch_date, "%Y-%m-%d")
month_str = date_obj.strftime("%Y-%m")
except ValueError:
# Fallback to current month
month_str = datetime.now().strftime("%Y-%m")
# Convert DataFrame rows to API format
data_records = []
for row in batch_data.iter_rows(named=True):
record = self._convert_cbf_to_api_format(row)
if record:
data_records.append(record)
payload = {"month": month_str, "operation": operation, "data": data_records}
return payload
def _convert_cbf_to_api_format(
self, row: dict[str, Any]
) -> Optional[dict[str, Any]]:
"""Convert CBF row to CloudZero API format - keeping CBF field names as CloudZero expects them."""
try:
# CloudZero expects CBF format field names directly, not converted names
api_record = {}
# Copy all CBF fields, converting numeric values to strings as required by CloudZero
for key, value in row.items():
if value is not None:
# CloudZero requires numeric values to be strings, but NOT in scientific notation
if isinstance(value, (int, float)):
# Format floats to avoid scientific notation
if isinstance(value, float):
# Use a reasonable precision that avoids scientific notation
api_record[key] = f"{value:.10f}".rstrip("0").rstrip(".")
else:
api_record[key] = str(value)
else:
api_record[key] = value
# Ensure timestamp is in UTC format
if "time/usage_start" in api_record:
api_record["time/usage_start"] = self._ensure_utc_timestamp(
api_record["time/usage_start"]
)
return api_record
except Exception as e:
self.console.print(
f"[yellow]Warning: Could not convert record to API format: {e}[/yellow]"
)
return None
def _ensure_utc_timestamp(self, timestamp_str: str) -> str:
"""Ensure timestamp is in UTC format for API."""
if not timestamp_str:
return datetime.now(timezone.utc).isoformat()
try:
dt = self._parse_and_convert_timestamp(timestamp_str)
return dt.isoformat().replace("+00:00", "Z")
except Exception:
# Fallback to current time in UTC
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")