chore: initial public snapshot for github upload

This commit is contained in:
Your Name
2026-03-26 20:06:14 +08:00
commit 0e5ecd930e
3497 changed files with 1586236 additions and 0 deletions

View File

@@ -0,0 +1,394 @@
# LiteLLM Proxy Client
A Python client library for interacting with the LiteLLM proxy server. This client provides a clean, typed interface for managing models, keys, credentials, and making chat completions.
## Installation
```bash
pip install litellm
```
## Quick Start
```python
from litellm.proxy.client import Client
# Initialize the client
client = Client(
base_url="http://localhost:4000", # Your LiteLLM proxy server URL
api_key="sk-api-key" # Optional: API key for authentication
)
# Make a chat completion request
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": "Hello, how are you?"}
]
)
print(response.choices[0].message.content)
```
## Features
The client is organized into several resource clients for different functionality:
- `chat`: Chat completions
- `models`: Model management
- `model_groups`: Model group management
- `keys`: API key management
- `credentials`: Credential management
- `users`: User management
## Chat Completions
Make chat completion requests to your LiteLLM proxy:
```python
# Basic chat completion
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What's the capital of France?"}
]
)
# Stream responses
for chunk in client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Tell me a story"}],
stream=True
):
print(chunk.choices[0].delta.content or "", end="")
```
## Model Management
Manage available models on your proxy:
```python
# List available models
models = client.models.list()
# Add a new model
client.models.add(
model_name="gpt-4",
litellm_params={
"api_key": "your-openai-key",
"api_base": "https://api.openai.com/v1"
}
)
# Delete a model
client.models.delete(model_name="gpt-4")
```
## API Key Management
Manage virtual API keys:
```python
# Generate a new API key
key = client.keys.generate(
models=["gpt-4", "gpt-3.5-turbo"],
aliases={"gpt4": "gpt-4"},
duration="24h",
key_alias="my-key",
team_id="team123"
)
# List all keys
keys = client.keys.list(
page=1,
size=10,
return_full_object=True
)
# Delete keys
client.keys.delete(
keys=["sk-key1", "sk-key2"],
key_aliases=["alias1", "alias2"]
)
```
## Credential Management
Manage model credentials:
```python
# Create new credentials
client.credentials.create(
credential_name="azure1",
credential_info={"api_type": "azure"},
credential_values={
"api_key": "your-azure-key",
"api_base": "https://example.azure.openai.com"
}
)
# List all credentials
credentials = client.credentials.list()
# Get a specific credential
credential = client.credentials.get(credential_name="azure1")
# Delete credentials
client.credentials.delete(credential_name="azure1")
```
## Model Groups
Manage model groups for load balancing and fallbacks:
```python
# Create a model group
client.model_groups.create(
name="gpt4-group",
models=[
{"model_name": "gpt-4", "litellm_params": {"api_key": "key1"}},
{"model_name": "gpt-4-backup", "litellm_params": {"api_key": "key2"}}
]
)
# List model groups
groups = client.model_groups.list()
# Delete a model group
client.model_groups.delete(name="gpt4-group")
```
## Users Management
Manage users on your proxy:
```python
from litellm.proxy.client import UsersManagementClient
users = UsersManagementClient(base_url="http://localhost:4000", api_key="sk-test")
# List users
user_list = users.list_users()
# Get user info
user_info = users.get_user(user_id="u1")
# Create a new user
created = users.create_user({
"user_email": "a@b.com",
"user_role": "internal_user",
"user_alias": "Alice",
"teams": ["team1"],
"max_budget": 100.0
})
# Delete users
users.delete_user(["u1", "u2"])
```
## Low-Level HTTP Client
The client provides access to a low-level HTTP client for making direct requests
to the LiteLLM proxy server. This is useful when you need more control or when
working with endpoints that don't yet have a high-level interface.
```python
# Access the HTTP client
client = Client(
base_url="http://localhost:4000",
api_key="sk-api-key"
)
# Make a custom request
response = client.http.request(
method="POST",
uri="/health/test_connection",
json={
"litellm_params": {
"model": "gpt-4",
"api_key": "your-api-key",
"api_base": "https://api.openai.com/v1"
},
"mode": "chat"
}
)
# The response is automatically parsed from JSON
print(response)
```
### HTTP Client Features
- Automatic URL handling (handles trailing/leading slashes)
- Built-in authentication (adds Bearer token if `api_key` is provided)
- JSON request/response handling
- Configurable timeout (default: 30 seconds)
- Comprehensive error handling
- Support for custom headers and request parameters
### HTTP Client `request` method parameters
- `method`: HTTP method (GET, POST, PUT, DELETE, etc.)
- `uri`: URI path (will be appended to base_url)
- `data`: (optional) Data to send in the request body
- `json`: (optional) JSON data to send in the request body
- `headers`: (optional) Custom HTTP headers
- Additional keyword arguments are passed to the underlying requests library
## Error Handling
The client provides clear error handling with custom exceptions:
```python
from litellm.proxy.client.exceptions import UnauthorizedError
try:
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
except UnauthorizedError as e:
print("Authentication failed:", e)
except Exception as e:
print("Request failed:", e)
```
## Advanced Usage
### Request Customization
All methods support returning the raw request object for inspection or modification:
```python
# Get the prepared request without sending it
request = client.models.list(return_request=True)
print(request.method) # GET
print(request.url) # http://localhost:8000/models
print(request.headers) # {'Content-Type': 'application/json', ...}
```
### Pagination
Methods that return lists support pagination:
```python
# Get the first page of keys
page1 = client.keys.list(page=1, size=10)
# Get the second page
page2 = client.keys.list(page=2, size=10)
```
### Filtering
Many list methods support filtering:
```python
# Filter keys by user and team
keys = client.keys.list(
user_id="user123",
team_id="team456",
include_team_keys=True
)
```
## Contributing
Contributions are welcome! Please check out our [contributing guidelines](../../CONTRIBUTING.md) for details.
## License
This project is licensed under the MIT License - see the [LICENSE](../../LICENSE) file for details.
## CLI Authentication Flow
The LiteLLM CLI supports SSO authentication through a polling-based approach that works with any OAuth-compatible SSO provider.
### How CLI Authentication Works
```mermaid
sequenceDiagram
participant CLI as CLI
participant Browser as Browser
participant Proxy as LiteLLM Proxy
participant SSO as SSO Provider
CLI->>CLI: Generate key ID (sk-uuid)
CLI->>Browser: Open /sso/key/generate?source=litellm-cli&key=sk-uuid
Browser->>Proxy: GET /sso/key/generate?source=litellm-cli&key=sk-uuid
Proxy->>Proxy: Set cli_state = litellm-session-token:sk-uuid
Proxy->>SSO: Redirect with state=litellm-session-token:sk-uuid
SSO->>Browser: Show login page
Browser->>SSO: User authenticates
SSO->>Proxy: Redirect to /sso/callback?state=litellm-session-token:sk-uuid
Proxy->>Proxy: Check if state starts with "litellm-session-token:"
Proxy->>Proxy: Generate API key with ID=sk-uuid
Proxy->>Browser: Show success page
CLI->>Proxy: Poll /sso/cli/poll/sk-uuid
Proxy->>CLI: Return {"status": "ready", "key": "sk-uuid"}
CLI->>CLI: Save key to ~/.litellm/token.json
```
### Authentication Commands
The CLI provides three authentication commands:
- **`litellm-proxy login`** - Start SSO authentication flow
- **`litellm-proxy logout`** - Clear stored authentication token
- **`litellm-proxy whoami`** - Show current authentication status
### Authentication Flow Steps
1. **Generate Session ID**: CLI generates a unique key ID (`sk-{uuid}`)
2. **Open Browser**: CLI opens browser to `/sso/key/generate` with CLI source and key parameters
3. **SSO Redirect**: Proxy sets the formatted state (`litellm-session-token:sk-uuid`) as OAuth state parameter and redirects to SSO provider
4. **User Authentication**: User completes SSO authentication in browser
5. **Callback Processing**: SSO provider redirects back to proxy with state parameter
6. **Key Generation**: Proxy detects CLI login (state starts with "litellm-session-token:") and generates API key with pre-specified ID
7. **Polling**: CLI polls `/sso/cli/poll/{key_id}` endpoint until key is ready
8. **Token Storage**: CLI saves the authentication token to `~/.litellm/token.json`
### Benefits of This Approach
- **No Local Server**: No need to run a local callback server
- **Standard OAuth**: Uses OAuth 2.0 state parameter correctly
- **Remote Compatible**: Works with remote proxy servers
- **Secure**: Uses UUID session identifiers
- **Simple Setup**: No additional OAuth redirect URL configuration needed
### Token Storage
Authentication tokens are stored in `~/.litellm/token.json` with restricted file permissions (600). The stored token includes:
```json
{
"key": "sk-...",
"user_id": "cli-user",
"user_email": "user@example.com",
"user_role": "cli",
"auth_header_name": "Authorization",
"timestamp": 1234567890
}
```
### Usage
Once authenticated, the CLI will automatically use the stored token for all requests. You no longer need to specify `--api-key` for subsequent commands.
```bash
# Login
litellm-proxy login
# Use CLI without specifying API key
litellm-proxy models list
# Check authentication status
litellm-proxy whoami
# Logout
litellm-proxy logout
```

View File

@@ -0,0 +1,17 @@
from .client import Client
from .chat import ChatClient
from .models import ModelsManagementClient
from .model_groups import ModelGroupsManagementClient
from .exceptions import UnauthorizedError
from .users import UsersManagementClient
from .health import HealthManagementClient
__all__ = [
"Client",
"ChatClient",
"ModelsManagementClient",
"ModelGroupsManagementClient",
"UsersManagementClient",
"UnauthorizedError",
"HealthManagementClient",
]

View File

@@ -0,0 +1,185 @@
import json
from typing import Any, Dict, Iterator, List, Optional, Union
import requests
from .exceptions import UnauthorizedError
class ChatClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
"""
Initialize the ChatClient.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
"""
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
self._api_key = api_key
def _get_headers(self) -> Dict[str, str]:
"""
Get the headers for API requests, including authorization if api_key is set.
Returns:
Dict[str, str]: Headers to use for API requests
"""
headers = {"Content-Type": "application/json"}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
return headers
def completions(
self,
model: str,
messages: List[Dict[str, str]],
temperature: Optional[float] = None,
top_p: Optional[float] = None,
n: Optional[int] = None,
max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
user: Optional[str] = None,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Create a chat completion.
Args:
model (str): The model to use for completion
messages (List[Dict[str, str]]): The messages to generate a completion for
temperature (Optional[float]): Sampling temperature between 0 and 2
top_p (Optional[float]): Nucleus sampling parameter between 0 and 1
n (Optional[int]): Number of completions to generate
max_tokens (Optional[int]): Maximum number of tokens to generate
presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0
frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0
user (Optional[str]): Unique identifier for the end user
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the completion response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/chat/completions"
# Build request data with required fields
data: Dict[str, Any] = {"model": model, "messages": messages}
# Add optional parameters if provided
if temperature is not None:
data["temperature"] = temperature
if top_p is not None:
data["top_p"] = top_p
if n is not None:
data["n"] = n
if max_tokens is not None:
data["max_tokens"] = max_tokens
if presence_penalty is not None:
data["presence_penalty"] = presence_penalty
if frequency_penalty is not None:
data["frequency_penalty"] = frequency_penalty
if user is not None:
data["user"] = user
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
if return_request:
return request
# Prepare and send the request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def completions_stream(
self,
model: str,
messages: List[Dict[str, str]],
temperature: Optional[float] = None,
top_p: Optional[float] = None,
n: Optional[int] = None,
max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
user: Optional[str] = None,
) -> Iterator[Dict[str, Any]]:
"""
Create a streaming chat completion.
Args:
model (str): The model to use for completion
messages (List[Dict[str, str]]): The messages to generate a completion for
temperature (Optional[float]): Sampling temperature between 0 and 2
top_p (Optional[float]): Nucleus sampling parameter between 0 and 1
n (Optional[int]): Number of completions to generate
max_tokens (Optional[int]): Maximum number of tokens to generate
presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0
frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0
user (Optional[str]): Unique identifier for the end user
Yields:
Dict[str, Any]: Streaming response chunks from the server
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/chat/completions"
# Build request data with required fields
data: Dict[str, Any] = {"model": model, "messages": messages, "stream": True}
# Add optional parameters if provided
if temperature is not None:
data["temperature"] = temperature
if top_p is not None:
data["top_p"] = top_p
if n is not None:
data["n"] = n
if max_tokens is not None:
data["max_tokens"] = max_tokens
if presence_penalty is not None:
data["presence_penalty"] = presence_penalty
if frequency_penalty is not None:
data["frequency_penalty"] = frequency_penalty
if user is not None:
data["user"] = user
# Make streaming request
session = requests.Session()
try:
response = session.post(
url, headers=self._get_headers(), json=data, stream=True
)
response.raise_for_status()
# Parse SSE stream
for line in response.iter_lines():
if line:
line = line.decode("utf-8")
if line.startswith("data: "):
data_str = line[6:] # Remove 'data: ' prefix
if data_str.strip() == "[DONE]":
break
try:
chunk = json.loads(data_str)
yield chunk
except json.JSONDecodeError:
continue
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise

View File

@@ -0,0 +1,536 @@
# LiteLLM Proxy CLI
The LiteLLM Proxy CLI is a command-line tool for managing your LiteLLM proxy server. It provides commands for managing models, viewing server status, and interacting with the proxy server.
## Installation
```bash
pip install 'litellm[proxy]'
```
## Configuration
The CLI can be configured using environment variables or command-line options:
- `LITELLM_PROXY_URL`: Base URL of the LiteLLM proxy server (default: http://localhost:4000)
- `LITELLM_PROXY_API_KEY`: API key for authentication
## Global Options
- `--version`, `-v`: Print the LiteLLM Proxy client and server version and exit.
Example:
```bash
litellm-proxy version
# or
litellm-proxy --version
# or
litellm-proxy -v
```
## Commands
### Models Management
The CLI provides several commands for managing models on your LiteLLM proxy server:
#### List Models
View all available models:
```bash
litellm-proxy models list [--format table|json]
```
Options:
- `--format`: Output format (table or json, default: table)
#### Model Information
Get detailed information about all models:
```bash
litellm-proxy models info [options]
```
Options:
- `--format`: Output format (table or json, default: table)
- `--columns`: Comma-separated list of columns to display. Valid columns:
- `public_model`
- `upstream_model`
- `credential_name`
- `created_at`
- `updated_at`
- `id`
- `input_cost`
- `output_cost`
Default columns: `public_model`, `upstream_model`, `updated_at`
#### Add Model
Add a new model to the proxy:
```bash
litellm-proxy models add <model-name> [options]
```
Options:
- `--param`, `-p`: Model parameters in key=value format (can be specified multiple times)
- `--info`, `-i`: Model info in key=value format (can be specified multiple times)
Example:
```bash
litellm-proxy models add gpt-4 -p api_key=sk-123 -p api_base=https://api.openai.com -i description="GPT-4 model"
```
#### Get Model Info
Get information about a specific model:
```bash
litellm-proxy models get [--id MODEL_ID] [--name MODEL_NAME]
```
Options:
- `--id`: ID of the model to retrieve
- `--name`: Name of the model to retrieve
#### Delete Model
Delete a model from the proxy:
```bash
litellm-proxy models delete <model-id>
```
#### Update Model
Update an existing model's configuration:
```bash
litellm-proxy models update <model-id> [options]
```
Options:
- `--param`, `-p`: Model parameters in key=value format (can be specified multiple times)
- `--info`, `-i`: Model info in key=value format (can be specified multiple times)
#### Import Models
Import models from a YAML file:
```bash
litellm-proxy models import models.yaml
```
Options:
- `--dry-run`: Show what would be imported without making any changes.
- `--only-models-matching-regex <regex>`: Only import models where `litellm_params.model` matches the given regex.
- `--only-access-groups-matching-regex <regex>`: Only import models where at least one item in `model_info.access_groups` matches the given regex.
Examples:
1. Import all models from a YAML file:
```bash
litellm-proxy models import models.yaml
```
2. Dry run (show what would be imported):
```bash
litellm-proxy models import models.yaml --dry-run
```
3. Only import models where the model name contains 'gpt':
```bash
litellm-proxy models import models.yaml --only-models-matching-regex gpt
```
4. Only import models with access group containing 'beta':
```bash
litellm-proxy models import models.yaml --only-access-groups-matching-regex beta
```
5. Combine both filters:
```bash
litellm-proxy models import models.yaml --only-models-matching-regex gpt --only-access-groups-matching-regex beta
```
### Credentials Management
The CLI provides commands for managing credentials on your LiteLLM proxy server:
#### List Credentials
View all available credentials:
```bash
litellm-proxy credentials list [--format table|json]
```
Options:
- `--format`: Output format (table or json, default: table)
The table format displays:
- Credential Name
- Custom LLM Provider
#### Create Credential
Create a new credential:
```bash
litellm-proxy credentials create <credential-name> --info <json-string> --values <json-string>
```
Options:
- `--info`: JSON string containing credential info (e.g., custom_llm_provider)
- `--values`: JSON string containing credential values (e.g., api_key)
Example:
```bash
litellm-proxy credentials create azure-cred \
--info '{"custom_llm_provider": "azure"}' \
--values '{"api_key": "sk-123", "api_base": "https://example.azure.openai.com"}'
```
#### Get Credential
Get information about a specific credential:
```bash
litellm-proxy credentials get <credential-name>
```
#### Delete Credential
Delete a credential:
```bash
litellm-proxy credentials delete <credential-name>
```
### Keys Management
The CLI provides commands for managing API keys on your LiteLLM proxy server:
#### List Keys
View all API keys:
```bash
litellm-proxy keys list [--format table|json] [options]
```
Options:
- `--format`: Output format (table or json, default: table)
- `--page`: Page number for pagination
- `--size`: Number of items per page
- `--user-id`: Filter keys by user ID
- `--team-id`: Filter keys by team ID
- `--organization-id`: Filter keys by organization ID
- `--key-hash`: Filter by specific key hash
- `--key-alias`: Filter by key alias
- `--return-full-object`: Return the full key object
- `--include-team-keys`: Include team keys in the response
#### Generate Key
Generate a new API key:
```bash
litellm-proxy keys generate [options]
```
Options:
- `--models`: Comma-separated list of allowed models
- `--aliases`: JSON string of model alias mappings
- `--spend`: Maximum spend limit for this key
- `--duration`: Duration for which the key is valid (e.g. '24h', '7d')
- `--key-alias`: Alias/name for the key
- `--team-id`: Team ID to associate the key with
- `--user-id`: User ID to associate the key with
- `--budget-id`: Budget ID to associate the key with
- `--config`: JSON string of additional configuration parameters
Example:
```bash
litellm-proxy keys generate --models gpt-4,gpt-3.5-turbo --spend 100 --duration 24h --key-alias my-key --team-id team123
```
#### Delete Keys
Delete API keys by key or alias:
```bash
litellm-proxy keys delete [--keys <comma-separated-keys>] [--key-aliases <comma-separated-aliases>]
```
Options:
- `--keys`: Comma-separated list of API keys to delete
- `--key-aliases`: Comma-separated list of key aliases to delete
Example:
```bash
litellm-proxy keys delete --keys sk-key1,sk-key2 --key-aliases alias1,alias2
```
#### Get Key Info
Get information about a specific API key:
```bash
litellm-proxy keys info --key <key-hash>
```
Options:
- `--key`: The key hash to get information about
Example:
```bash
litellm-proxy keys info --key sk-key1
```
### User Management
The CLI provides commands for managing users on your LiteLLM proxy server:
#### List Users
View all users:
```bash
litellm-proxy users list
```
#### Get User Info
Get information about a specific user:
```bash
litellm-proxy users get --id <user-id>
```
#### Create User
Create a new user:
```bash
litellm-proxy users create --email user@example.com --role internal_user --alias "Alice" --team team1 --max-budget 100.0
```
#### Delete User
Delete one or more users by user_id:
```bash
litellm-proxy users delete <user-id-1> <user-id-2>
```
### Chat Commands
The CLI provides commands for interacting with chat models through your LiteLLM proxy server:
#### Chat Completions
Create a chat completion:
```bash
litellm-proxy chat completions <model> [options]
```
Arguments:
- `model`: The model to use (e.g., gpt-4, claude-2)
Options:
- `--message`, `-m`: Messages in 'role:content' format. Can be specified multiple times to create a conversation.
- `--temperature`, `-t`: Sampling temperature between 0 and 2
- `--top-p`: Nucleus sampling parameter between 0 and 1
- `--n`: Number of completions to generate
- `--max-tokens`: Maximum number of tokens to generate
- `--presence-penalty`: Presence penalty between -2.0 and 2.0
- `--frequency-penalty`: Frequency penalty between -2.0 and 2.0
- `--user`: Unique identifier for the end user
Examples:
1. Simple completion:
```bash
litellm-proxy chat completions gpt-4 -m "user:Hello, how are you?"
```
2. Multi-message conversation:
```bash
litellm-proxy chat completions gpt-4 \
-m "system:You are a helpful assistant" \
-m "user:What's the capital of France?" \
-m "assistant:The capital of France is Paris." \
-m "user:What's its population?"
```
3. With generation parameters:
```bash
litellm-proxy chat completions gpt-4 \
-m "user:Write a story" \
--temperature 0.7 \
--max-tokens 500 \
--top-p 0.9
```
### HTTP Commands
The CLI provides commands for making direct HTTP requests to your LiteLLM proxy server:
#### Make HTTP Request
Make an HTTP request to any endpoint:
```bash
litellm-proxy http request <method> <uri> [options]
```
Arguments:
- `method`: HTTP method (GET, POST, PUT, DELETE, etc.)
- `uri`: URI path (will be appended to base_url)
Options:
- `--data`, `-d`: Data to send in the request body (as JSON string)
- `--json`, `-j`: JSON data to send in the request body (as JSON string)
- `--header`, `-H`: HTTP headers in 'key:value' format. Can be specified multiple times.
Examples:
1. List models:
```bash
litellm-proxy http request GET /models
```
2. Create a chat completion:
```bash
litellm-proxy http request POST /chat/completions -j '{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}'
```
3. Test connection with custom headers:
```bash
litellm-proxy http request GET /health/test_connection -H "X-Custom-Header:value"
```
## Environment Variables
The CLI respects the following environment variables:
- `LITELLM_PROXY_URL`: Base URL of the proxy server
- `LITELLM_PROXY_API_KEY`: API key for authentication
## Examples
1. List all models in table format:
```bash
litellm-proxy models list
```
2. Add a new model with parameters:
```bash
litellm-proxy models add gpt-4 -p api_key=sk-123 -p max_tokens=2048
```
3. Get model information in JSON format:
```bash
litellm-proxy models info --format json
```
4. Update model parameters:
```bash
litellm-proxy models update model-123 -p temperature=0.7 -i description="Updated model"
```
5. List all credentials in table format:
```bash
litellm-proxy credentials list
```
6. Create a new credential for Azure:
```bash
litellm-proxy credentials create azure-prod \
--info '{"custom_llm_provider": "azure"}' \
--values '{"api_key": "sk-123", "api_base": "https://prod.azure.openai.com"}'
```
7. Make a custom HTTP request:
```bash
litellm-proxy http request POST /chat/completions \
-j '{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}' \
-H "X-Custom-Header:value"
```
8. User management:
```bash
# List users
litellm-proxy users list
# Get user info
litellm-proxy users get --id u1
# Create a user
litellm-proxy users create --email a@b.com --role internal_user --alias "Alice" --team team1 --max-budget 100.0
# Delete users
litellm-proxy users delete u1 u2
```
9. Import models from a YAML file (with filters):
```bash
# Only import models where the model name contains 'gpt'
litellm-proxy models import models.yaml --only-models-matching-regex gpt
# Only import models with access group containing 'beta'
litellm-proxy models import models.yaml --only-access-groups-matching-regex beta
# Combine both filters
litellm-proxy models import models.yaml --only-models-matching-regex gpt --only-access-groups-matching-regex beta
```
## Error Handling
The CLI will display appropriate error messages when:
- The proxy server is not accessible
- Authentication fails
- Invalid parameters are provided
- The requested model or credential doesn't exist
- Invalid JSON is provided for credential creation
- Any other operation fails
For detailed debugging, use the `--debug` flag with any command.

View File

@@ -0,0 +1,5 @@
"""CLI package for LiteLLM Proxy Client."""
from .main import cli
__all__ = ["cli"]

View File

@@ -0,0 +1 @@
"""Command groups for the LiteLLM proxy CLI."""

View File

@@ -0,0 +1,623 @@
import json
import os
import sys
import time
import webbrowser
from pathlib import Path
from typing import Any, Dict, List, Optional
import click
import requests
from rich.console import Console
from rich.table import Table
from litellm.constants import CLI_JWT_EXPIRATION_HOURS
# Token storage utilities
def get_token_file_path() -> str:
"""Get the path to store the authentication token"""
home_dir = Path.home()
config_dir = home_dir / ".litellm"
config_dir.mkdir(exist_ok=True)
return str(config_dir / "token.json")
def save_token(token_data: Dict[str, Any]) -> None:
"""Save token data to file"""
token_file = get_token_file_path()
with open(token_file, "w") as f:
json.dump(token_data, f, indent=2)
# Set file permissions to be readable only by owner
os.chmod(token_file, 0o600)
def load_token() -> Optional[Dict[str, Any]]:
"""Load token data from file"""
token_file = get_token_file_path()
if not os.path.exists(token_file):
return None
try:
with open(token_file, "r") as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
return None
def clear_token() -> None:
"""Clear stored token"""
token_file = get_token_file_path()
if os.path.exists(token_file):
os.remove(token_file)
def get_stored_api_key() -> Optional[str]:
"""Get the stored API key from token file"""
# Use the SDK-level utility
from litellm.litellm_core_utils.cli_token_utils import get_litellm_gateway_api_key
return get_litellm_gateway_api_key()
# Team selection utilities
def display_teams_table(teams: List[Dict[str, Any]]) -> None:
"""Display teams in a formatted table"""
console = Console()
if not teams:
console.print("❌ No teams found for your user.")
return
table = Table(title="Available Teams")
table.add_column("Index", style="cyan", no_wrap=True)
table.add_column("Team Alias", style="magenta")
table.add_column("Team ID", style="green")
table.add_column("Models", style="yellow")
table.add_column("Max Budget", style="blue")
for i, team in enumerate(teams):
team_alias = team.get("team_alias") or "N/A"
team_id = team.get("team_id", "N/A")
models = team.get("models", [])
max_budget = team.get("max_budget")
# Format models list
if models:
if len(models) > 3:
models_str = ", ".join(models[:3]) + f" (+{len(models) - 3} more)"
else:
models_str = ", ".join(models)
else:
models_str = "All models"
# Format budget
budget_str = f"${max_budget}" if max_budget else "Unlimited"
table.add_row(str(i + 1), team_alias, team_id, models_str, budget_str)
console.print(table)
def get_key_input():
"""Get a single key input from the user (cross-platform)"""
try:
if sys.platform == "win32":
import msvcrt
key = msvcrt.getch()
if key == b"\xe0": # Arrow keys on Windows
key = msvcrt.getch()
if key == b"H": # Up arrow
return "up"
elif key == b"P": # Down arrow
return "down"
elif key == b"\r": # Enter key
return "enter"
elif key == b"\x1b": # Escape key
return "escape"
elif key == b"q":
return "quit"
return None
else:
import termios
import tty
fd = sys.stdin.fileno()
old_settings = termios.tcgetattr(fd)
try:
tty.setraw(sys.stdin.fileno())
key = sys.stdin.read(1)
if key == "\x1b": # Escape sequence
key += sys.stdin.read(2)
if key == "\x1b[A": # Up arrow
return "up"
elif key == "\x1b[B": # Down arrow
return "down"
elif key == "\x1b": # Just escape
return "escape"
elif key == "\r" or key == "\n": # Enter key
return "enter"
elif key == "q":
return "quit"
return None
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
except ImportError:
# Fallback to simple input if termios/msvcrt not available
return None
def display_interactive_team_selection(
teams: List[Dict[str, Any]], selected_index: int = 0
) -> None:
"""Display teams with one highlighted for selection"""
console = Console()
# Clear the screen using Rich's method
console.clear()
console.print("🎯 Select a Team (Use ↑↓ arrows, Enter to select, 'q' to skip):\n")
for i, team in enumerate(teams):
team_alias = team.get("team_alias") or "N/A"
team_id = team.get("team_id", "N/A")
models = team.get("models", [])
max_budget = team.get("max_budget")
# Format models list
if models:
if len(models) > 3:
models_str = ", ".join(models[:3]) + f" (+{len(models) - 3} more)"
else:
models_str = ", ".join(models)
else:
models_str = "All models"
# Format budget
budget_str = f"${max_budget}" if max_budget else "Unlimited"
# Highlight the selected item
if i == selected_index:
console.print(f"➤ [bold cyan]{team_alias}[/bold cyan] ({team_id})")
console.print(f" Models: [yellow]{models_str}[/yellow]")
console.print(f" Budget: [blue]{budget_str}[/blue]\n")
else:
console.print(f" [dim]{team_alias}[/dim] ({team_id})")
console.print(f" Models: [dim]{models_str}[/dim]")
console.print(f" Budget: [dim]{budget_str}[/dim]\n")
def prompt_team_selection(teams: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""Interactive team selection with arrow keys"""
if not teams:
return None
selected_index = 0
try:
# Check if we can use interactive mode
if not sys.stdin.isatty():
# Fallback to simple selection for non-interactive environments
return prompt_team_selection_fallback(teams)
while True:
display_interactive_team_selection(teams, selected_index)
key = get_key_input()
if key == "up":
selected_index = (selected_index - 1) % len(teams)
elif key == "down":
selected_index = (selected_index + 1) % len(teams)
elif key == "enter":
selected_team = teams[selected_index]
# Clear screen and show selection
console = Console()
console.clear()
click.echo(
f"✅ Selected team: {selected_team.get('team_alias', 'N/A')} ({selected_team.get('team_id')})"
)
return selected_team
elif key == "quit" or key == "escape":
# Clear screen
console = Console()
console.clear()
click.echo(" Team selection skipped.")
return None
elif key is None:
# If we can't get key input, fall back to simple selection
return prompt_team_selection_fallback(teams)
except KeyboardInterrupt:
console = Console()
console.clear()
click.echo("\n❌ Team selection cancelled.")
return None
except Exception:
# If interactive mode fails, fall back to simple selection
return prompt_team_selection_fallback(teams)
def prompt_team_selection_fallback(
teams: List[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
"""Fallback team selection for non-interactive environments"""
if not teams:
return None
while True:
try:
choice = click.prompt(
"\nSelect a team by entering the index number (or 'skip' to continue without a team)",
type=str,
).strip()
if choice.lower() == "skip":
return None
index = int(choice) - 1
if 0 <= index < len(teams):
selected_team = teams[index]
click.echo(
f"\n✅ Selected team: {selected_team.get('team_alias', 'N/A')} ({selected_team.get('team_id')})"
)
return selected_team
else:
click.echo(
f"❌ Invalid selection. Please enter a number between 1 and {len(teams)}"
)
except ValueError:
click.echo("❌ Invalid input. Please enter a number or 'skip'")
except KeyboardInterrupt:
click.echo("\n❌ Team selection cancelled.")
return None
# Polling-based authentication - no local server needed
def _poll_for_ready_data(
url: str,
*,
total_timeout: int = 300,
poll_interval: int = 2,
request_timeout: int = 10,
pending_message: Optional[str] = None,
pending_log_every: int = 10,
other_status_message: Optional[str] = None,
other_status_log_every: int = 10,
http_error_log_every: int = 10,
connection_error_log_every: int = 10,
) -> Optional[Dict[str, Any]]:
for attempt in range(total_timeout // poll_interval):
try:
response = requests.get(url, timeout=request_timeout)
if response.status_code == 200:
data = response.json()
status = data.get("status")
if status == "ready":
return data
if status == "pending":
if (
pending_message
and pending_log_every > 0
and attempt % pending_log_every == 0
):
click.echo(pending_message)
elif (
other_status_message
and other_status_log_every > 0
and attempt % other_status_log_every == 0
):
click.echo(other_status_message)
elif http_error_log_every > 0 and attempt % http_error_log_every == 0:
click.echo(f"Polling error: HTTP {response.status_code}")
except requests.RequestException as e:
if (
connection_error_log_every > 0
and attempt % connection_error_log_every == 0
):
click.echo(f"Connection error (will retry): {e}")
time.sleep(poll_interval)
return None
def _normalize_teams(teams, team_details):
"""If team_details are a
Args:
teams (_type_): _description_
team_details (_type_): _description_
Returns:
_type_: _description_
"""
if isinstance(team_details, list) and team_details:
return [
{
"team_id": i.get("team_id") or i.get("id"),
"team_alias": i.get("team_alias"),
}
for i in team_details
if isinstance(i, dict) and (i.get("team_id") or i.get("id"))
]
if isinstance(teams, list):
return [{"team_id": str(t), "team_alias": None} for t in teams]
return []
def _poll_for_authentication(base_url: str, key_id: str) -> Optional[dict]:
"""
Poll the server for authentication completion and handle team selection.
Returns:
Dictionary with authentication data if successful, None otherwise
"""
poll_url = f"{base_url}/sso/cli/poll/{key_id}"
data = _poll_for_ready_data(
poll_url,
pending_message="Still waiting for authentication...",
)
if not data:
return None
if data.get("requires_team_selection"):
teams = data.get("teams", [])
team_details = data.get("team_details")
user_id = data.get("user_id")
normalized_teams: List[Dict[str, Any]] = _normalize_teams(teams, team_details)
if not normalized_teams:
click.echo("⚠️ No teams available for selection.")
return None
# User has multiple teams - let them select
jwt_with_team = _handle_team_selection_during_polling(
base_url=base_url,
key_id=key_id,
teams=normalized_teams,
)
# Use the team-specific JWT if selection succeeded
if jwt_with_team:
return {
"api_key": jwt_with_team,
"user_id": user_id,
"teams": teams,
"team_id": None, # Set by server in JWT
}
click.echo("❌ Team selection cancelled or JWT generation failed.")
return None
# JWT is ready (single team or team already selected)
api_key = data.get("key")
user_id = data.get("user_id")
teams = data.get("teams", [])
team_id = data.get("team_id")
# Show which team was assigned
if team_id and len(teams) == 1:
click.echo(f"\n✅ Automatically assigned to team: {team_id}")
if api_key:
return {
"api_key": api_key,
"user_id": user_id,
"teams": teams,
"team_id": team_id,
}
return None
def _handle_team_selection_during_polling(
base_url: str, key_id: str, teams: List[Dict[str, Any]]
) -> Optional[str]:
"""
Handle team selection and re-poll with selected team_id.
Args:
teams: List of team IDs (strings)
Returns:
The JWT token with the selected team, or None if selection was skipped
"""
if not teams:
click.echo(
" No teams found. You can create or join teams using the web interface."
)
return None
click.echo("\n" + "=" * 60)
click.echo("📋 Select a team for your CLI session...")
team_id = _render_and_prompt_for_team_selection(teams)
if not team_id:
click.echo(" No team selected.")
return None
click.echo(f"\n🔄 Generating JWT for team: {team_id}")
poll_url = f"{base_url}/sso/cli/poll/{key_id}?team_id={team_id}"
data = _poll_for_ready_data(
poll_url,
pending_message="Still waiting for team authentication...",
other_status_message="Waiting for team authentication to complete...",
http_error_log_every=10,
)
if not data:
return None
jwt_token = data.get("key")
if jwt_token:
click.echo(f"✅ Successfully generated JWT for team: {team_id}")
return jwt_token
return None
def _render_and_prompt_for_team_selection(teams: List[Dict[str, Any]]) -> Optional[str]:
"""Render teams table and prompt user for a team selection.
Returns the selected team_id as a string, or None if selection was
cancelled or skipped without any teams available.
"""
# Display teams as a simple list, but prefer showing aliases where
# available while still keeping the underlying IDs intact.
console = Console()
table = Table(title="Available Teams")
table.add_column("Index", style="cyan", no_wrap=True)
table.add_column("Team Name", style="magenta")
table.add_column("Team ID", style="green")
for i, team in enumerate(teams):
team_id = str(team.get("team_id"))
team_alias = team.get("team_alias") or team_id
table.add_row(str(i + 1), team_alias, team_id)
console.print(table)
# Simple selection
while True:
try:
choice = click.prompt(
"\nSelect a team by entering the index number (or 'skip' to use first team)",
type=str,
).strip()
if choice.lower() == "skip":
# Default to the first team's ID if the user skips an
# explicit selection.
if teams:
first_team = teams[0]
return str(first_team.get("team_id"))
return None
index = int(choice) - 1
if 0 <= index < len(teams):
selected_team = teams[index]
team_id = str(selected_team.get("team_id"))
team_alias = selected_team.get("team_alias") or team_id
click.echo(f"\n✅ Selected team: {team_alias} ({team_id})")
return team_id
click.echo(
f"❌ Invalid selection. Please enter a number between 1 and {len(teams)}"
)
except ValueError:
click.echo("❌ Invalid input. Please enter a number or 'skip'")
except KeyboardInterrupt:
click.echo("\n❌ Team selection cancelled.")
return None
@click.command(name="login")
@click.pass_context
def login(ctx: click.Context):
"""Login to LiteLLM proxy using SSO authentication"""
from litellm._uuid import uuid
from litellm.constants import LITELLM_CLI_SOURCE_IDENTIFIER
from litellm.proxy.client.cli.interface import show_commands
base_url = ctx.obj["base_url"]
# Check if we have an existing key to regenerate
existing_key = get_stored_api_key()
# Generate unique key ID for this login session
key_id = f"sk-{str(uuid.uuid4())}"
try:
# Construct SSO login URL with CLI source and pre-generated key
sso_url = f"{base_url}/sso/key/generate?source={LITELLM_CLI_SOURCE_IDENTIFIER}&key={key_id}"
# If we have an existing key, include it as a parameter to the login endpoint
# The server will encode it in the OAuth state parameter for the SSO flow
if existing_key:
sso_url += f"&existing_key={existing_key}"
click.echo(f"Opening browser to: {sso_url}")
click.echo("Please complete the SSO authentication in your browser...")
click.echo(f"Session ID: {key_id}")
# Open browser
webbrowser.open(sso_url)
# Poll for authentication completion
click.echo("Waiting for authentication...")
auth_result = _poll_for_authentication(base_url=base_url, key_id=key_id)
if auth_result:
api_key = auth_result["api_key"]
user_id = auth_result["user_id"]
# Save token data (simplified for CLI - we just need the key)
save_token(
{
"key": api_key,
"user_id": user_id or "cli-user",
"user_email": "unknown",
"user_role": "cli",
"auth_header_name": "Authorization",
"jwt_token": "",
"timestamp": time.time(),
}
)
click.echo("\n✅ Login successful!")
click.echo(f"JWT Token: {api_key[:20]}...")
click.echo("You can now use the CLI without specifying --api-key")
# Show available commands after successful login
click.echo("\n" + "=" * 60)
show_commands()
return
else:
click.echo("❌ Authentication timed out. Please try again.")
return
except KeyboardInterrupt:
click.echo("\n❌ Authentication cancelled by user.")
return
except Exception as e:
click.echo(f"❌ Authentication failed: {e}")
return
@click.command(name="logout")
def logout():
"""Logout and clear stored authentication"""
clear_token()
click.echo("✅ Logged out successfully. Authentication token cleared.")
@click.command(name="whoami")
def whoami():
"""Show current authentication status"""
token_data = load_token()
if not token_data:
click.echo("❌ Not authenticated. Run 'litellm-proxy login' to authenticate.")
return
click.echo("✅ Authenticated")
click.echo(f"User Email: {token_data.get('user_email', 'Unknown')}")
click.echo(f"User ID: {token_data.get('user_id', 'Unknown')}")
click.echo(f"User Role: {token_data.get('user_role', 'Unknown')}")
# Check if token is still valid (basic timestamp check)
timestamp = token_data.get("timestamp", 0)
age_hours = (time.time() - timestamp) / 3600
click.echo(f"Token age: {age_hours:.1f} hours")
if age_hours > CLI_JWT_EXPIRATION_HOURS:
click.echo(
f"⚠️ Warning: Token is more than {CLI_JWT_EXPIRATION_HOURS} hours old and may have expired."
)
# Export functions for use by other CLI commands
__all__ = ["login", "logout", "whoami", "prompt_team_selection"]
# Export individual commands instead of grouping them
# login, logout, and whoami will be added as top-level commands

View File

@@ -0,0 +1,406 @@
import json
import sys
from typing import Any, Dict, List, Optional
import click
import requests
from rich.console import Console
from rich.panel import Panel
from rich.prompt import Prompt
from rich.table import Table
from ... import Client
from ...chat import ChatClient
def _get_available_models(ctx: click.Context) -> List[Dict[str, Any]]:
"""Get list of available models from the proxy server"""
try:
client = Client(base_url=ctx.obj["base_url"], api_key=ctx.obj["api_key"])
models_list = client.models.list()
# Ensure we return a list of dictionaries
if isinstance(models_list, list):
# Filter to ensure all items are dictionaries
return [model for model in models_list if isinstance(model, dict)]
return []
except Exception as e:
click.echo(f"Warning: Could not fetch models list: {e}", err=True)
return []
def _select_model(
console: Console, available_models: List[Dict[str, Any]]
) -> Optional[str]:
"""Interactive model selection"""
if not available_models:
console.print(
"[yellow]No models available or could not fetch models list.[/yellow]"
)
model_name = Prompt.ask("Please enter a model name")
return model_name if model_name.strip() else None
# Display available models in a table
table = Table(title="Available Models")
table.add_column("Index", style="cyan", no_wrap=True)
table.add_column("Model ID", style="green")
table.add_column("Owned By", style="yellow")
MAX_MODELS_TO_DISPLAY = 200
models_to_display: List[Dict[str, Any]] = available_models[:MAX_MODELS_TO_DISPLAY]
for i, model in enumerate(models_to_display): # Limit to first 200 models
table.add_row(
str(i + 1), str(model.get("id", "")), str(model.get("owned_by", ""))
)
if len(available_models) > MAX_MODELS_TO_DISPLAY:
console.print(
f"\n[dim]... and {len(available_models) - MAX_MODELS_TO_DISPLAY} more models[/dim]"
)
console.print(table)
while True:
try:
choice = Prompt.ask(
"\nSelect a model by entering the index number (or type a model name directly)",
default="1",
).strip()
# Try to parse as index
try:
index = int(choice) - 1
if 0 <= index < len(available_models):
return available_models[index]["id"]
else:
console.print(
f"[red]Invalid index. Please enter a number between 1 and {len(available_models)}[/red]"
)
continue
except ValueError:
# Not a number, treat as model name
if choice:
return choice
else:
console.print("[red]Please enter a valid model name or index[/red]")
continue
except KeyboardInterrupt:
console.print("\n[yellow]Model selection cancelled.[/yellow]")
return None
@click.command()
@click.argument("model", required=False)
@click.option(
"--temperature",
"-t",
type=float,
default=0.7,
help="Sampling temperature between 0 and 2 (default: 0.7)",
)
@click.option(
"--max-tokens",
type=int,
help="Maximum number of tokens to generate",
)
@click.option(
"--system",
"-s",
type=str,
help="System message to set the behavior of the assistant",
)
@click.pass_context
def chat(
ctx: click.Context,
model: Optional[str],
temperature: float,
max_tokens: Optional[int] = None,
system: Optional[str] = None,
):
"""Interactive chat with streaming responses
Examples:
# Chat with a specific model
litellm-proxy chat gpt-4
# Chat without specifying model (will show model selection)
litellm-proxy chat
# Chat with custom settings
litellm-proxy chat gpt-4 --temperature 0.9 --system "You are a helpful coding assistant"
"""
console = Console()
# If no model specified, show model selection
if not model:
available_models = _get_available_models(ctx)
model = _select_model(console, available_models)
if not model:
console.print("[red]No model selected. Exiting.[/red]")
return
client = ChatClient(ctx.obj["base_url"], ctx.obj["api_key"])
# Initialize conversation history
messages: List[Dict[str, Any]] = []
# Add system message if provided
if system:
messages.append({"role": "system", "content": system})
# Display welcome message
console.print(
Panel.fit(
f"[bold blue]LiteLLM Interactive Chat[/bold blue]\n"
f"Model: [green]{model}[/green]\n"
f"Temperature: [yellow]{temperature}[/yellow]\n"
f"Max Tokens: [yellow]{max_tokens or 'unlimited'}[/yellow]\n\n"
f"Type your messages and press Enter. Type '/quit' or '/exit' to end the session.\n"
f"Type '/help' for more commands.",
title="🤖 Chat Session",
)
)
try:
while True:
# Get user input
try:
user_input = console.input("\n[bold cyan]You:[/bold cyan] ").strip()
except (EOFError, KeyboardInterrupt):
console.print("\n[yellow]Chat session ended.[/yellow]")
break
# Handle special commands
should_exit, messages, new_model = _handle_special_commands(
console, user_input, messages, system, ctx
)
if should_exit:
break
if new_model:
model = new_model
# Check if this was a special command that was handled (not a normal message)
if (
user_input.lower().startswith(
(
"/quit",
"/exit",
"/q",
"/help",
"/clear",
"/history",
"/save",
"/load",
"/model",
)
)
or not user_input
):
continue
# Add user message to conversation
messages.append({"role": "user", "content": user_input})
# Display assistant label
console.print("\n[bold green]Assistant:[/bold green]")
# Stream the response
assistant_content = _stream_response(
console=console,
client=client,
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
)
# Add assistant message to conversation history
if assistant_content:
messages.append({"role": "assistant", "content": assistant_content})
else:
console.print("[red]Error: No content received from the model[/red]")
except KeyboardInterrupt:
console.print("\n[yellow]Chat session interrupted.[/yellow]")
def _show_help(console: Console):
"""Show help for interactive chat commands"""
help_text = """
[bold]Interactive Chat Commands:[/bold]
[cyan]/help[/cyan] - Show this help message
[cyan]/quit[/cyan] - Exit the chat session (also /exit, /q)
[cyan]/clear[/cyan] - Clear conversation history
[cyan]/history[/cyan] - Show conversation history
[cyan]/model[/cyan] - Switch to a different model
[cyan]/save <name>[/cyan] - Save conversation to file
[cyan]/load <name>[/cyan] - Load conversation from file
[bold]Tips:[/bold]
- Your conversation history is maintained during the session
- Use Ctrl+C to interrupt at any time
- Responses are streamed in real-time
- You can switch models mid-conversation with /model
"""
console.print(Panel(help_text, title="Help"))
def _show_history(console: Console, messages: List[Dict[str, Any]]):
"""Show conversation history"""
if not messages:
console.print("[yellow]No conversation history.[/yellow]")
return
console.print(Panel.fit("[bold]Conversation History[/bold]", title="History"))
for i, message in enumerate(messages, 1):
role = message["role"]
content = message["content"]
if role == "system":
console.print(
f"[dim]{i}. [bold magenta]System:[/bold magenta] {content}[/dim]"
)
elif role == "user":
console.print(f"{i}. [bold cyan]You:[/bold cyan] {content}")
elif role == "assistant":
console.print(
f"{i}. [bold green]Assistant:[/bold green] {content[:100]}{'...' if len(content) > 100 else ''}"
)
def _save_conversation(console: Console, messages: List[Dict[str, Any]], command: str):
"""Save conversation to a file"""
parts = command.split()
if len(parts) < 2:
console.print("[red]Usage: /save <filename>[/red]")
return
filename = parts[1]
if not filename.endswith(".json"):
filename += ".json"
try:
with open(filename, "w") as f:
json.dump(messages, f, indent=2)
console.print(f"[green]Conversation saved to {filename}[/green]")
except Exception as e:
console.print(f"[red]Error saving conversation: {e}[/red]")
def _load_conversation(
console: Console, command: str, system: Optional[str]
) -> List[Dict[str, Any]]:
"""Load conversation from a file"""
parts = command.split()
if len(parts) < 2:
console.print("[red]Usage: /load <filename>[/red]")
return []
filename = parts[1]
if not filename.endswith(".json"):
filename += ".json"
try:
with open(filename, "r") as f:
messages = json.load(f)
console.print(f"[green]Conversation loaded from {filename}[/green]")
return messages
except FileNotFoundError:
console.print(f"[red]File not found: {filename}[/red]")
except Exception as e:
console.print(f"[red]Error loading conversation: {e}[/red]")
# Return empty list or just system message if load failed
if system:
return [{"role": "system", "content": system}]
return []
def _handle_special_commands(
console: Console,
user_input: str,
messages: List[Dict[str, Any]],
system: Optional[str],
ctx: click.Context,
) -> tuple[bool, List[Dict[str, Any]], Optional[str]]:
"""Handle special chat commands. Returns (should_exit, updated_messages, updated_model)"""
if user_input.lower() in ["/quit", "/exit", "/q"]:
console.print("[yellow]Chat session ended.[/yellow]")
return True, messages, None
elif user_input.lower() == "/help":
_show_help(console)
return False, messages, None
elif user_input.lower() == "/clear":
new_messages = []
if system:
new_messages.append({"role": "system", "content": system})
console.print("[green]Conversation history cleared.[/green]")
return False, new_messages, None
elif user_input.lower() == "/history":
_show_history(console, messages)
return False, messages, None
elif user_input.lower().startswith("/save"):
_save_conversation(console, messages, user_input)
return False, messages, None
elif user_input.lower().startswith("/load"):
new_messages = _load_conversation(console, user_input, system)
return False, new_messages, None
elif user_input.lower() == "/model":
available_models = _get_available_models(ctx)
new_model = _select_model(console, available_models)
if new_model:
console.print(f"[green]Switched to model: {new_model}[/green]")
return False, messages, new_model
return False, messages, None
elif not user_input:
return False, messages, None
# Not a special command
return False, messages, None
def _stream_response(
console: Console,
client: ChatClient,
model: str,
messages: List[Dict[str, Any]],
temperature: float,
max_tokens: Optional[int],
) -> Optional[str]:
"""Stream the model response and return the complete content"""
try:
assistant_content = ""
for chunk in client.completions_stream(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
):
if "choices" in chunk and len(chunk["choices"]) > 0:
delta = chunk["choices"][0].get("delta", {})
content = delta.get("content", "")
if content:
assistant_content += content
console.print(content, end="")
sys.stdout.flush()
console.print() # Add newline after streaming
return assistant_content if assistant_content else None
except requests.exceptions.HTTPError as e:
console.print(f"\n[red]Error: HTTP {e.response.status_code}[/red]")
try:
error_body = e.response.json()
console.print(
f"[red]{error_body.get('error', {}).get('message', 'Unknown error')}[/red]"
)
except json.JSONDecodeError:
console.print(f"[red]{e.response.text}[/red]")
return None
except Exception as e:
console.print(f"\n[red]Error: {str(e)}[/red]")
return None

View File

@@ -0,0 +1,116 @@
import json
from typing import Literal
import click
import rich
import requests
from rich.table import Table
from ...credentials import CredentialsManagementClient
@click.group()
def credentials():
"""Manage credentials for the LiteLLM proxy server"""
pass
@credentials.command()
@click.option(
"--format",
"output_format",
type=click.Choice(["table", "json"]),
default="table",
help="Output format (table or json)",
)
@click.pass_context
def list(ctx: click.Context, output_format: Literal["table", "json"]):
"""List all credentials"""
client = CredentialsManagementClient(ctx.obj["base_url"], ctx.obj["api_key"])
response = client.list()
assert isinstance(response, dict)
if output_format == "json":
rich.print_json(data=response)
else: # table format
table = Table(title="Credentials")
# Add columns
table.add_column("Credential Name", style="cyan")
table.add_column("Custom LLM Provider", style="green")
# Add rows
for cred in response.get("credentials", []):
info = cred.get("credential_info", {})
table.add_row(
str(cred.get("credential_name", "")),
str(info.get("custom_llm_provider", "")),
)
rich.print(table)
@credentials.command()
@click.argument("credential_name")
@click.option(
"--info",
type=str,
help="JSON string containing credential info",
required=True,
)
@click.option(
"--values",
type=str,
help="JSON string containing credential values",
required=True,
)
@click.pass_context
def create(ctx: click.Context, credential_name: str, info: str, values: str):
"""Create a new credential"""
client = CredentialsManagementClient(ctx.obj["base_url"], ctx.obj["api_key"])
try:
credential_info = json.loads(info)
credential_values = json.loads(values)
except json.JSONDecodeError as e:
raise click.BadParameter(f"Invalid JSON: {str(e)}")
try:
response = client.create(credential_name, credential_info, credential_values)
rich.print_json(data=response)
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
try:
error_body = e.response.json()
rich.print_json(data=error_body)
except json.JSONDecodeError:
click.echo(e.response.text, err=True)
raise click.Abort()
@credentials.command()
@click.argument("credential_name")
@click.pass_context
def delete(ctx: click.Context, credential_name: str):
"""Delete a credential by name"""
client = CredentialsManagementClient(ctx.obj["base_url"], ctx.obj["api_key"])
try:
response = client.delete(credential_name)
rich.print_json(data=response)
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
try:
error_body = e.response.json()
rich.print_json(data=error_body)
except json.JSONDecodeError:
click.echo(e.response.text, err=True)
raise click.Abort()
@credentials.command()
@click.argument("credential_name")
@click.pass_context
def get(ctx: click.Context, credential_name: str):
"""Get a credential by name"""
client = CredentialsManagementClient(ctx.obj["base_url"], ctx.obj["api_key"])
response = client.get(credential_name)
rich.print_json(data=response)

View File

@@ -0,0 +1,102 @@
import json as json_lib
from typing import Optional
import click
import rich
import requests
from ...http_client import HTTPClient
@click.group()
def http():
"""Make HTTP requests to the LiteLLM proxy server"""
pass
@http.command()
@click.argument("method")
@click.argument("uri")
@click.option(
"--data",
"-d",
type=str,
help="Data to send in the request body (as JSON string)",
)
@click.option(
"--json",
"-j",
type=str,
help="JSON data to send in the request body (as JSON string)",
)
@click.option(
"--header",
"-H",
multiple=True,
help="HTTP headers in 'key:value' format. Can be specified multiple times.",
)
@click.pass_context
def request(
ctx: click.Context,
method: str,
uri: str,
data: Optional[str] = None,
json: Optional[str] = None,
header: tuple[str, ...] = (),
):
"""Make an HTTP request to the LiteLLM proxy server
METHOD: HTTP method (GET, POST, PUT, DELETE, etc.)
URI: URI path (will be appended to base_url)
Examples:
litellm http request GET /models
litellm http request POST /chat/completions -j '{"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}'
litellm http request GET /health/test_connection -H "X-Custom-Header:value"
"""
# Parse headers from key:value format
headers = {}
for h in header:
try:
key, value = h.split(":", 1)
headers[key.strip()] = value.strip()
except ValueError:
raise click.BadParameter(
f"Invalid header format: {h}. Expected format: 'key:value'"
)
# Parse JSON data if provided
json_data = None
if json:
try:
json_data = json_lib.loads(json)
except ValueError as e:
raise click.BadParameter(f"Invalid JSON format: {e}")
# Parse data if provided
request_data = None
if data:
try:
request_data = json_lib.loads(data)
except ValueError:
# If not JSON, use as raw data
request_data = data
client = HTTPClient(ctx.obj["base_url"], ctx.obj["api_key"])
try:
response = client.request(
method=method,
uri=uri,
data=request_data,
json=json_data,
headers=headers,
)
rich.print_json(data=response)
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
try:
error_body = e.response.json()
rich.print_json(data=error_body)
except json_lib.JSONDecodeError:
click.echo(e.response.text, err=True)
raise click.Abort()

View File

@@ -0,0 +1,415 @@
import json
from datetime import datetime
from typing import Literal, Optional, List, Dict, Any
import click
import rich
import requests
from rich.table import Table
from ...keys import KeysManagementClient
@click.group()
def keys():
"""Manage API keys for the LiteLLM proxy server"""
pass
@keys.command()
@click.option("--page", type=int, help="Page number for pagination")
@click.option("--size", type=int, help="Number of items per page")
@click.option("--user-id", type=str, help="Filter keys by user ID")
@click.option("--team-id", type=str, help="Filter keys by team ID")
@click.option("--organization-id", type=str, help="Filter keys by organization ID")
@click.option("--key-hash", type=str, help="Filter by specific key hash")
@click.option("--key-alias", type=str, help="Filter by key alias")
@click.option(
"--return-full-object",
is_flag=True,
default=True,
help="Return the full key object",
)
@click.option(
"--include-team-keys", is_flag=True, help="Include team keys in the response"
)
@click.option(
"--format",
"output_format",
type=click.Choice(["table", "json"]),
default="table",
help="Output format (table or json)",
)
@click.pass_context
def list(
ctx: click.Context,
page: Optional[int],
size: Optional[int],
user_id: Optional[str],
team_id: Optional[str],
organization_id: Optional[str],
key_hash: Optional[str],
key_alias: Optional[str],
include_team_keys: bool,
output_format: Literal["table", "json"],
return_full_object: bool,
):
"""List all API keys"""
client = KeysManagementClient(ctx.obj["base_url"], ctx.obj["api_key"])
response = client.list(
page=page,
size=size,
user_id=user_id,
team_id=team_id,
organization_id=organization_id,
key_hash=key_hash,
key_alias=key_alias,
return_full_object=return_full_object,
include_team_keys=include_team_keys,
)
assert isinstance(response, dict)
if output_format == "json":
rich.print_json(data=response)
else:
rich.print(
f"Showing {len(response.get('keys', []))} keys out of {response.get('total_count', 0)}"
)
table = Table(title="API Keys")
table.add_column("Key Hash", style="cyan")
table.add_column("Alias", style="green")
table.add_column("User ID", style="magenta")
table.add_column("Team ID", style="yellow")
table.add_column("Spend", style="red")
for key in response.get("keys", []):
table.add_row(
str(key.get("token", "")),
str(key.get("key_alias", "")),
str(key.get("user_id", "")),
str(key.get("team_id", "")),
str(key.get("spend", "")),
)
rich.print(table)
@keys.command()
@click.option("--models", type=str, help="Comma-separated list of allowed models")
@click.option("--aliases", type=str, help="JSON string of model alias mappings")
@click.option("--spend", type=float, help="Maximum spend limit for this key")
@click.option(
"--duration",
type=str,
help="Duration for which the key is valid (e.g. '24h', '7d')",
)
@click.option("--key-alias", type=str, help="Alias/name for the key")
@click.option("--team-id", type=str, help="Team ID to associate the key with")
@click.option("--user-id", type=str, help="User ID to associate the key with")
@click.option("--budget-id", type=str, help="Budget ID to associate the key with")
@click.option(
"--config", type=str, help="JSON string of additional configuration parameters"
)
@click.pass_context
def generate(
ctx: click.Context,
models: Optional[str],
aliases: Optional[str],
spend: Optional[float],
duration: Optional[str],
key_alias: Optional[str],
team_id: Optional[str],
user_id: Optional[str],
budget_id: Optional[str],
config: Optional[str],
):
"""Generate a new API key"""
client = KeysManagementClient(ctx.obj["base_url"], ctx.obj["api_key"])
try:
models_list = [m.strip() for m in models.split(",")] if models else None
aliases_dict = json.loads(aliases) if aliases else None
config_dict = json.loads(config) if config else None
except json.JSONDecodeError as e:
raise click.BadParameter(f"Invalid JSON: {str(e)}")
try:
response = client.generate(
models=models_list,
aliases=aliases_dict,
spend=spend,
duration=duration,
key_alias=key_alias,
team_id=team_id,
user_id=user_id,
budget_id=budget_id,
config=config_dict,
)
rich.print_json(data=response)
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
try:
error_body = e.response.json()
rich.print_json(data=error_body)
except json.JSONDecodeError:
click.echo(e.response.text, err=True)
raise click.Abort()
@keys.command()
@click.option("--keys", type=str, help="Comma-separated list of API keys to delete")
@click.option(
"--key-aliases", type=str, help="Comma-separated list of key aliases to delete"
)
@click.pass_context
def delete(ctx: click.Context, keys: Optional[str], key_aliases: Optional[str]):
"""Delete API keys by key or alias"""
client = KeysManagementClient(ctx.obj["base_url"], ctx.obj["api_key"])
keys_list = [k.strip() for k in keys.split(",")] if keys else None
aliases_list = [a.strip() for a in key_aliases.split(",")] if key_aliases else None
try:
response = client.delete(keys=keys_list, key_aliases=aliases_list)
rich.print_json(data=response)
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
try:
error_body = e.response.json()
rich.print_json(data=error_body)
except json.JSONDecodeError:
click.echo(e.response.text, err=True)
raise click.Abort()
def _parse_created_since_filter(created_since: Optional[str]) -> Optional[datetime]:
"""Parse and validate the created_since date filter."""
if not created_since:
return None
try:
# Support formats: YYYY-MM-DD_HH:MM or YYYY-MM-DD
if "_" in created_since:
return datetime.strptime(created_since, "%Y-%m-%d_%H:%M")
else:
return datetime.strptime(created_since, "%Y-%m-%d")
except ValueError:
click.echo(
f"Error: Invalid date format '{created_since}'. Use YYYY-MM-DD_HH:MM or YYYY-MM-DD",
err=True,
)
raise click.Abort()
def _fetch_all_keys_with_pagination(
source_client: KeysManagementClient, source_base_url: str
) -> List[Dict[str, Any]]:
"""Fetch all keys from source instance using pagination."""
click.echo(f"Fetching keys from source server: {source_base_url}")
source_keys = []
page = 1
page_size = 100 # Use a larger page size to minimize API calls
while True:
source_response = source_client.list(
return_full_object=True, page=page, size=page_size
)
# source_client.list() returns Dict[str, Any] when return_request is False (default)
assert isinstance(source_response, dict), "Expected dict response from list API"
page_keys = source_response.get("keys", [])
if not page_keys:
break
source_keys.extend(page_keys)
click.echo(f"Fetched page {page}: {len(page_keys)} keys")
# Check if we got fewer keys than the page size, indicating last page
if len(page_keys) < page_size:
break
page += 1
return source_keys
def _filter_keys_by_created_since(
source_keys: List[Dict[str, Any]],
created_since_dt: Optional[datetime],
created_since: str,
) -> List[Dict[str, Any]]:
"""Filter keys by created_since date if specified."""
if not created_since_dt:
return source_keys
filtered_keys = []
for key in source_keys:
key_created_at = key.get("created_at")
if key_created_at:
# Parse the key's created_at timestamp
if isinstance(key_created_at, str):
if "T" in key_created_at:
key_dt = datetime.fromisoformat(
key_created_at.replace("Z", "+00:00")
)
else:
key_dt = datetime.fromisoformat(key_created_at)
# Convert to naive datetime for comparison (assuming UTC)
if key_dt.tzinfo:
key_dt = key_dt.replace(tzinfo=None)
if key_dt >= created_since_dt:
filtered_keys.append(key)
click.echo(
f"Filtered {len(source_keys)} keys to {len(filtered_keys)} keys created since {created_since}"
)
return filtered_keys
def _display_dry_run_table(source_keys: List[Dict[str, Any]]) -> None:
"""Display a table of keys that would be imported in dry-run mode."""
click.echo("\n--- DRY RUN MODE ---")
table = Table(title="Keys that would be imported")
table.add_column("Key Alias", style="green")
table.add_column("User ID", style="magenta")
table.add_column("Created", style="cyan")
for key in source_keys:
created_at = key.get("created_at", "")
# Format the timestamp if it exists
if created_at:
# Try to parse and format the timestamp for better readability
if isinstance(created_at, str):
# Handle common timestamp formats
if "T" in created_at:
dt = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
created_at = dt.strftime("%Y-%m-%d %H:%M")
table.add_row(
str(key.get("key_alias", "")), str(key.get("user_id", "")), str(created_at)
)
rich.print(table)
def _prepare_key_import_data(key: Dict[str, Any]) -> Dict[str, Any]:
"""Prepare key data for import by extracting relevant fields."""
import_data = {}
# Copy relevant fields if they exist
for field in [
"models",
"aliases",
"spend",
"key_alias",
"team_id",
"user_id",
"budget_id",
"config",
]:
if key.get(field):
import_data[field] = key[field]
return import_data
def _import_keys_to_destination(
source_keys: List[Dict[str, Any]], dest_client: KeysManagementClient
) -> tuple[int, int]:
"""Import each key to the destination instance and return counts."""
imported_count = 0
failed_count = 0
for key in source_keys:
try:
# Prepare key data for import
import_data = _prepare_key_import_data(key)
# Generate the key in destination instance
response = dest_client.generate(**import_data)
click.echo(f"Generated key: {response}")
# The generate method returns JSON data directly, not a Response object
imported_count += 1
key_alias = key.get("key_alias", "N/A")
click.echo(f"✓ Imported key: {key_alias}")
except Exception as e:
failed_count += 1
key_alias = key.get("key_alias", "N/A")
click.echo(f"✗ Failed to import key {key_alias}: {str(e)}", err=True)
return imported_count, failed_count
@keys.command(name="import")
@click.option(
"--source-base-url",
required=True,
help="Base URL of the source LiteLLM proxy server to import keys from",
)
@click.option(
"--source-api-key", help="API key for authentication to the source server"
)
@click.option(
"--dry-run",
is_flag=True,
help="Show what would be imported without actually importing",
)
@click.option(
"--created-since",
help="Only import keys created after this date/time (format: YYYY-MM-DD_HH:MM or YYYY-MM-DD)",
)
@click.pass_context
def import_keys(
ctx: click.Context,
source_base_url: str,
source_api_key: Optional[str],
dry_run: bool,
created_since: Optional[str],
):
"""Import API keys from another LiteLLM instance"""
# Parse created_since filter if provided
created_since_dt = _parse_created_since_filter(created_since)
# Create clients for both source and destination
source_client = KeysManagementClient(source_base_url, source_api_key)
dest_client = KeysManagementClient(ctx.obj["base_url"], ctx.obj["api_key"])
try:
# Get all keys from source instance with pagination
source_keys = _fetch_all_keys_with_pagination(source_client, source_base_url)
# Filter keys by created_since if specified
if created_since:
source_keys = _filter_keys_by_created_since(
source_keys, created_since_dt, created_since
)
if not source_keys:
click.echo("No keys found in source instance.")
return
click.echo(f"Found {len(source_keys)} keys in source instance.")
if dry_run:
_display_dry_run_table(source_keys)
return
# Import each key
imported_count, failed_count = _import_keys_to_destination(
source_keys, dest_client
)
# Summary
click.echo("\nImport completed:")
click.echo(f" Successfully imported: {imported_count}")
click.echo(f" Failed to import: {failed_count}")
click.echo(f" Total keys processed: {len(source_keys)}")
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
try:
error_body = e.response.json()
rich.print_json(data=error_body)
except json.JSONDecodeError:
click.echo(e.response.text, err=True)
raise click.Abort()
except Exception as e:
click.echo(f"Error: {str(e)}", err=True)
raise click.Abort()

View File

@@ -0,0 +1,485 @@
# stdlib imports
from datetime import datetime
import re
from typing import Optional, Literal, Any
import yaml
from dataclasses import dataclass
from collections import defaultdict
# third party imports
import click
import rich
# local imports
from ... import Client
@dataclass
class ModelYamlInfo:
model_name: str
model_params: dict[str, Any]
model_info: dict[str, Any]
model_id: str
access_groups: list[str]
provider: str
@property
def access_groups_str(self) -> str:
return ", ".join(self.access_groups) if self.access_groups else ""
def _get_model_info_obj_from_yaml(model: dict[str, Any]) -> ModelYamlInfo:
"""Extract model info from a model dict and return as ModelYamlInfo dataclass."""
model_name: str = model["model_name"]
model_params: dict[str, Any] = model["litellm_params"]
model_info: dict[str, Any] = model.get("model_info", {})
model_id: str = model_params["model"]
access_groups = model_info.get("access_groups", [])
provider = model_id.split("/", 1)[0] if "/" in model_id else model_id
return ModelYamlInfo(
model_name=model_name,
model_params=model_params,
model_info=model_info,
model_id=model_id,
access_groups=access_groups,
provider=provider,
)
def format_iso_datetime_str(iso_datetime_str: Optional[str]) -> str:
"""Format an ISO format datetime string to human-readable date with minute resolution."""
if not iso_datetime_str:
return ""
try:
# Parse ISO format datetime string
dt = datetime.fromisoformat(iso_datetime_str.replace("Z", "+00:00"))
return dt.strftime("%Y-%m-%d %H:%M")
except (TypeError, ValueError):
return str(iso_datetime_str)
def format_timestamp(timestamp: Optional[int]) -> str:
"""Format a Unix timestamp (integer) to human-readable date with minute resolution."""
if timestamp is None:
return ""
try:
dt = datetime.fromtimestamp(timestamp)
return dt.strftime("%Y-%m-%d %H:%M")
except (TypeError, ValueError):
return str(timestamp)
def format_cost_per_1k_tokens(cost: Optional[float]) -> str:
"""Format a per-token cost to cost per 1000 tokens."""
if cost is None:
return ""
try:
# Convert string to float if needed
cost_float = float(cost)
# Multiply by 1000 and format to 4 decimal places
return f"${cost_float * 1000:.4f}"
except (TypeError, ValueError):
return str(cost)
def create_client(ctx: click.Context) -> Client:
"""Helper function to create a client from context."""
return Client(base_url=ctx.obj["base_url"], api_key=ctx.obj["api_key"])
@click.group()
def models() -> None:
"""Manage models on your LiteLLM proxy server"""
pass
@models.command("list")
@click.option(
"--format",
"output_format",
type=click.Choice(["table", "json"]),
default="table",
help="Output format (table or json)",
)
@click.pass_context
def list_models(ctx: click.Context, output_format: Literal["table", "json"]) -> None:
"""List all available models"""
client = create_client(ctx)
models_list = client.models.list()
assert isinstance(models_list, list)
if output_format == "json":
rich.print_json(data=models_list)
else: # table format
table = rich.table.Table(title="Available Models")
# Add columns based on the data structure
table.add_column("ID", style="cyan")
table.add_column("Object", style="green")
table.add_column("Created", style="magenta")
table.add_column("Owned By", style="yellow")
# Add rows
for model in models_list:
created = model.get("created")
# Convert string timestamp to integer if needed
if isinstance(created, str) and created.isdigit():
created = int(created)
table.add_row(
str(model.get("id", "")),
str(model.get("object", "model")),
format_timestamp(created)
if isinstance(created, int)
else format_iso_datetime_str(created),
str(model.get("owned_by", "")),
)
rich.print(table)
@models.command("add")
@click.argument("model-name")
@click.option(
"--param",
"-p",
multiple=True,
help="Model parameters in key=value format (can be specified multiple times)",
)
@click.option(
"--info",
"-i",
multiple=True,
help="Model info in key=value format (can be specified multiple times)",
)
@click.pass_context
def add_model(
ctx: click.Context, model_name: str, param: tuple[str, ...], info: tuple[str, ...]
) -> None:
"""Add a new model to the proxy"""
# Convert parameters from key=value format to dict
model_params = dict(p.split("=", 1) for p in param)
model_info = dict(i.split("=", 1) for i in info) if info else None
client = create_client(ctx)
result = client.models.new(
model_name=model_name,
model_params=model_params,
model_info=model_info,
)
rich.print_json(data=result)
@models.command("delete")
@click.argument("model-id")
@click.pass_context
def delete_model(ctx: click.Context, model_id: str) -> None:
"""Delete a model from the proxy"""
client = create_client(ctx)
result = client.models.delete(model_id=model_id)
rich.print_json(data=result)
@models.command("get")
@click.option("--id", "model_id", help="ID of the model to retrieve")
@click.option("--name", "model_name", help="Name of the model to retrieve")
@click.pass_context
def get_model(
ctx: click.Context, model_id: Optional[str], model_name: Optional[str]
) -> None:
"""Get information about a specific model"""
if not model_id and not model_name:
raise click.UsageError("Either --id or --name must be provided")
client = create_client(ctx)
result = client.models.get(model_id=model_id, model_name=model_name)
rich.print_json(data=result)
@models.command("info")
@click.option(
"--format",
"output_format",
type=click.Choice(["table", "json"]),
default="table",
help="Output format (table or json)",
)
@click.option(
"--columns",
"columns",
default="public_model,upstream_model,updated_at",
help="Comma-separated list of columns to display. Valid columns: public_model, upstream_model, credential_name, created_at, updated_at, id, input_cost, output_cost. Default: public_model,upstream_model,updated_at",
)
@click.pass_context
def get_models_info(
ctx: click.Context, output_format: Literal["table", "json"], columns: str
) -> None:
"""Get detailed information about all models"""
client = create_client(ctx)
models_info = client.models.info()
assert isinstance(models_info, list)
if output_format == "json":
rich.print_json(data=models_info)
else: # table format
table = rich.table.Table(title="Models Information")
# Define all possible columns with their configurations
column_configs: dict[str, dict[str, Any]] = {
"public_model": {
"header": "Public Model",
"style": "cyan",
"get_value": lambda m: str(m.get("model_name", "")),
},
"upstream_model": {
"header": "Upstream Model",
"style": "green",
"get_value": lambda m: str(
m.get("litellm_params", {}).get("model", "")
),
},
"credential_name": {
"header": "Credential Name",
"style": "yellow",
"get_value": lambda m: str(
m.get("litellm_params", {}).get("litellm_credential_name", "")
),
},
"created_at": {
"header": "Created At",
"style": "magenta",
"get_value": lambda m: format_iso_datetime_str(
m.get("model_info", {}).get("created_at")
),
},
"updated_at": {
"header": "Updated At",
"style": "magenta",
"get_value": lambda m: format_iso_datetime_str(
m.get("model_info", {}).get("updated_at")
),
},
"id": {
"header": "ID",
"style": "blue",
"get_value": lambda m: str(m.get("model_info", {}).get("id", "")),
},
"input_cost": {
"header": "Input Cost",
"style": "green",
"justify": "right",
"get_value": lambda m: format_cost_per_1k_tokens(
m.get("model_info", {}).get("input_cost_per_token")
),
},
"output_cost": {
"header": "Output Cost",
"style": "green",
"justify": "right",
"get_value": lambda m: format_cost_per_1k_tokens(
m.get("model_info", {}).get("output_cost_per_token")
),
},
}
# Add requested columns
requested_columns = [col.strip() for col in columns.split(",")]
for col_name in requested_columns:
if col_name in column_configs:
config = column_configs[col_name]
table.add_column(
config["header"],
style=config["style"],
justify=config.get("justify", "left"),
)
else:
click.echo(f"Warning: Unknown column '{col_name}'", err=True)
# Add rows with only the requested columns
for model in models_info:
row_values = []
for col_name in requested_columns:
if col_name in column_configs:
row_values.append(column_configs[col_name]["get_value"](model))
if row_values:
table.add_row(*row_values)
rich.print(table)
@models.command("update")
@click.argument("model-id")
@click.option(
"--param",
"-p",
multiple=True,
help="Model parameters in key=value format (can be specified multiple times)",
)
@click.option(
"--info",
"-i",
multiple=True,
help="Model info in key=value format (can be specified multiple times)",
)
@click.pass_context
def update_model(
ctx: click.Context, model_id: str, param: tuple[str, ...], info: tuple[str, ...]
) -> None:
"""Update an existing model's configuration"""
# Convert parameters from key=value format to dict
model_params = dict(p.split("=", 1) for p in param)
model_info = dict(i.split("=", 1) for i in info) if info else None
client = create_client(ctx)
result = client.models.update(
model_id=model_id,
model_params=model_params,
model_info=model_info,
)
rich.print_json(data=result)
def _filter_model(model, model_regex, access_group_regex):
model_name = model.get("model_name")
model_params = model.get("litellm_params")
model_info = model.get("model_info", {})
if not model_name or not model_params:
return False
model_id = model_params.get("model")
if not model_id or not isinstance(model_id, str):
return False
if model_regex and not model_regex.search(model_id):
return False
access_groups = model_info.get("access_groups", [])
if access_group_regex:
if not isinstance(access_groups, list):
return False
if not any(
isinstance(group, str) and access_group_regex.search(group)
for group in access_groups
):
return False
return True
def _print_models_table(added_models: list[ModelYamlInfo], table_title: str):
if not added_models:
return
table = rich.table.Table(title=table_title)
table.add_column("Model Name", style="cyan")
table.add_column("Upstream Model", style="green")
table.add_column("Access Groups", style="magenta")
for m in added_models:
table.add_row(m.model_name, m.model_id, m.access_groups_str)
rich.print(table)
def _print_summary_table(provider_counts):
summary_table = rich.table.Table(title="Model Import Summary")
summary_table.add_column("Provider", style="cyan")
summary_table.add_column("Count", style="green")
for provider, count in provider_counts.items():
summary_table.add_row(str(provider), str(count))
total = sum(provider_counts.values())
summary_table.add_row("[bold]Total[/bold]", f"[bold]{total}[/bold]")
rich.print(summary_table)
def get_model_list_from_yaml_file(yaml_file: str) -> list[dict[str, Any]]:
"""Load and validate the model list from a YAML file."""
with open(yaml_file, "r") as f:
data = yaml.safe_load(f)
if not data or "model_list" not in data:
raise click.ClickException(
"YAML file must contain a 'model_list' key with a list of models."
)
model_list = data["model_list"]
if not isinstance(model_list, list):
raise click.ClickException("'model_list' must be a list of model definitions.")
return model_list
def _get_filtered_model_list(
model_list, only_models_matching_regex, only_access_groups_matching_regex
):
"""Return a list of models that pass the filter criteria."""
model_regex = (
re.compile(only_models_matching_regex) if only_models_matching_regex else None
)
access_group_regex = (
re.compile(only_access_groups_matching_regex)
if only_access_groups_matching_regex
else None
)
return [
model
for model in model_list
if _filter_model(model, model_regex, access_group_regex)
]
def _import_models_get_table_title(dry_run: bool) -> str:
if dry_run:
return "Models that would be imported if [yellow]--dry-run[/yellow] was not provided"
else:
return "Models Imported"
@models.command("import")
@click.argument(
"yaml_file", type=click.Path(exists=True, dir_okay=False, readable=True)
)
@click.option(
"--dry-run",
is_flag=True,
help="Show what would be imported without making any changes.",
)
@click.option(
"--only-models-matching-regex",
default=None,
help="Only import models where litellm_params.model matches the given regex.",
)
@click.option(
"--only-access-groups-matching-regex",
default=None,
help="Only import models where at least one item in model_info.access_groups matches the given regex.",
)
@click.pass_context
def import_models(
ctx: click.Context,
yaml_file: str,
dry_run: bool,
only_models_matching_regex: Optional[str],
only_access_groups_matching_regex: Optional[str],
) -> None:
"""Import models from a YAML file and add them to the proxy."""
provider_counts: dict[str, int] = defaultdict(int)
added_models: list[ModelYamlInfo] = []
model_list = get_model_list_from_yaml_file(yaml_file)
filtered_model_list = _get_filtered_model_list(
model_list, only_models_matching_regex, only_access_groups_matching_regex
)
if not dry_run:
client = create_client(ctx)
for model in filtered_model_list:
model_info_obj = _get_model_info_obj_from_yaml(model)
if not dry_run:
try:
client.models.new(
model_name=model_info_obj.model_name,
model_params=model_info_obj.model_params,
model_info=model_info_obj.model_info,
)
except Exception:
pass # For summary, ignore errors
added_models.append(model_info_obj)
provider_counts[model_info_obj.provider] += 1
table_title = _import_models_get_table_title(dry_run)
_print_models_table(added_models, table_title)
_print_summary_table(provider_counts)

View File

@@ -0,0 +1,167 @@
"""Team management commands for LiteLLM CLI."""
from typing import Any, Dict, List, Optional
import click
import requests
from rich.console import Console
from rich.table import Table
from litellm.proxy.client import Client
@click.group()
def teams():
"""Manage teams and team assignments"""
pass
def display_teams_table(teams: List[Dict[str, Any]]) -> None:
"""Display teams in a formatted table"""
console = Console()
if not teams:
console.print("❌ No teams found for your user.")
return
table = Table(title="Available Teams")
table.add_column("Index", style="cyan", no_wrap=True)
table.add_column("Team Alias", style="magenta")
table.add_column("Team ID", style="green")
table.add_column("Models", style="yellow")
table.add_column("Max Budget", style="blue")
table.add_column("Role", style="red")
for i, team in enumerate(teams):
team_alias = team.get("team_alias") or "N/A"
team_id = team.get("team_id", "N/A")
models = team.get("models", [])
max_budget = team.get("max_budget")
# Format models list
if models:
if len(models) > 3:
models_str = ", ".join(models[:3]) + f" (+{len(models) - 3} more)"
else:
models_str = ", ".join(models)
else:
models_str = "All models"
# Format budget
budget_str = f"${max_budget}" if max_budget else "Unlimited"
# Try to determine role (this might vary based on API response structure)
role = "Member" # Default role
if (
isinstance(team, dict)
and "members_with_roles" in team
and team["members_with_roles"]
):
# This would need to be implemented based on actual API response structure
pass
table.add_row(str(i + 1), team_alias, team_id, models_str, budget_str, role)
console.print(table)
@teams.command()
@click.pass_context
def list(ctx: click.Context):
"""List teams that you belong to"""
client = Client(ctx.obj["base_url"], ctx.obj["api_key"])
try:
# Use list() for simpler response structure (returns array directly)
teams = client.teams.list()
display_teams_table(teams)
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
error_body = e.response.json()
click.echo(f"Details: {error_body.get('detail', 'Unknown error')}", err=True)
raise click.Abort()
except Exception as e:
click.echo(f"Error: {str(e)}", err=True)
raise click.Abort()
@teams.command()
@click.pass_context
def available(ctx: click.Context):
"""List teams that are available to join"""
client = Client(ctx.obj["base_url"], ctx.obj["api_key"])
try:
teams = client.teams.get_available()
if teams:
console = Console()
console.print("\n🎯 Available Teams to Join:")
display_teams_table(teams)
else:
click.echo(" No available teams to join.")
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
error_body = e.response.json()
click.echo(f"Details: {error_body.get('detail', 'Unknown error')}", err=True)
except Exception as e:
click.echo(f"Error: {str(e)}", err=True)
raise click.Abort()
@teams.command()
@click.option("--team-id", type=str, help="Team ID to assign the key to")
@click.pass_context
def assign_key(ctx: click.Context, team_id: Optional[str]):
"""Assign your current CLI key to a team"""
client = Client(ctx.obj["base_url"], ctx.obj["api_key"])
api_key = ctx.obj["api_key"]
if not api_key:
click.echo("❌ No API key found. Please login first using 'litellm login'")
raise click.Abort()
try:
# If no team_id provided, show teams and let user select
if not team_id:
teams = client.teams.list()
if not teams:
click.echo("❌ No teams found for your user.")
return
# Use interactive selection from auth module
from .auth import prompt_team_selection
selected_team = prompt_team_selection(teams)
if selected_team:
team_id = selected_team.get("team_id")
else:
click.echo("❌ Operation cancelled.")
return
# Update the key with the selected team
if team_id:
click.echo(f"\n🔄 Assigning your key to team: {team_id}")
client.keys.update(key=api_key, team_id=team_id)
click.echo(f"✅ Successfully assigned key to team: {team_id}")
# Show team details if available
teams = client.teams.list()
for team in teams:
if team.get("team_id") == team_id:
models = team.get("models", [])
if models:
click.echo(f"🎯 You can now access models: {', '.join(models)}")
else:
click.echo("🎯 You can now access all available models")
break
except requests.exceptions.HTTPError as e:
click.echo(f"Error: HTTP {e.response.status_code}", err=True)
error_body = e.response.json()
click.echo(f"Details: {error_body.get('detail', 'Unknown error')}", err=True)
raise click.Abort()
except Exception as e:
click.echo(f"Error: {str(e)}", err=True)
raise click.Abort()

View File

@@ -0,0 +1,91 @@
import click
import rich
from ... import UsersManagementClient
@click.group()
def users():
"""Manage users on your LiteLLM proxy server"""
pass
@users.command("list")
@click.pass_context
def list_users(ctx: click.Context):
"""List all users"""
client = UsersManagementClient(
base_url=ctx.obj["base_url"], api_key=ctx.obj["api_key"]
)
users = client.list_users()
if isinstance(users, dict) and "users" in users:
users = users["users"]
if not users:
click.echo("No users found.")
return
from rich.table import Table
from rich.console import Console
table = Table(title="Users")
table.add_column("User ID", style="cyan")
table.add_column("Email", style="green")
table.add_column("Role", style="magenta")
table.add_column("Teams", style="yellow")
for user in users:
table.add_row(
str(user.get("user_id", "")),
str(user.get("user_email", "")),
str(user.get("user_role", "")),
", ".join(user.get("teams", []) or []),
)
console = Console()
console.print(table)
@users.command("get")
@click.option("--id", "user_id", help="ID of the user to retrieve")
@click.pass_context
def get_user(ctx: click.Context, user_id: str):
"""Get information about a specific user"""
client = UsersManagementClient(
base_url=ctx.obj["base_url"], api_key=ctx.obj["api_key"]
)
result = client.get_user(user_id=user_id)
rich.print_json(data=result)
@users.command("create")
@click.option("--email", required=True, help="User email")
@click.option("--role", default="internal_user", help="User role")
@click.option("--alias", default=None, help="User alias")
@click.option("--team", multiple=True, help="Team IDs (can specify multiple)")
@click.option("--max-budget", type=float, default=None, help="Max budget for user")
@click.pass_context
def create_user(ctx: click.Context, email, role, alias, team, max_budget):
"""Create a new user"""
client = UsersManagementClient(
base_url=ctx.obj["base_url"], api_key=ctx.obj["api_key"]
)
user_data = {
"user_email": email,
"user_role": role,
}
if alias:
user_data["user_alias"] = alias
if team:
user_data["teams"] = list(team)
if max_budget is not None:
user_data["max_budget"] = max_budget
result = client.create_user(user_data)
rich.print_json(data=result)
@users.command("delete")
@click.argument("user_ids", nargs=-1)
@click.pass_context
def delete_user(ctx: click.Context, user_ids):
"""Delete one or more users by user_id"""
client = UsersManagementClient(
base_url=ctx.obj["base_url"], api_key=ctx.obj["api_key"]
)
result = client.delete_user(list(user_ids))
rich.print_json(data=result)

View File

@@ -0,0 +1,207 @@
# stdlib imports
import os
import sys
from typing import TYPE_CHECKING
# third party imports
import click
from litellm._logging import verbose_logger
if TYPE_CHECKING:
pass
def styled_prompt():
"""Create a styled blue box prompt for user input."""
# Get terminal height to ensure we have enough space
try:
terminal_height = os.get_terminal_size().lines
# Ensure we have at least 5 lines of space (for the box + some buffer)
if terminal_height < 10:
# If terminal is too small, just add some newlines to push content up
click.echo("\n" * 3)
except Exception as e:
# Fallback if we can't get terminal size
verbose_logger.debug(f"Error getting terminal size: {e}")
click.echo("\n" * 3)
# Unicode box drawing characters
top_left = ""
top_right = ""
bottom_left = ""
bottom_right = ""
horizontal = ""
vertical = ""
# Create the box with increased width
width = 80
top_line = top_left + horizontal * (width - 2) + top_right
bottom_line = bottom_left + horizontal * (width - 2) + bottom_right
# Create styled elements
left_border = click.style(vertical, fg="blue", bold=True)
right_border = click.style(vertical, fg="blue", bold=True)
prompt_text = click.style("> ", fg="cyan", bold=True)
# Display the complete box structure first to reserve space
click.echo(click.style(top_line, fg="blue", bold=True))
# Create empty space in the box for input
empty_space = " " * (width - 4)
click.echo(f"{left_border} {empty_space} {right_border}")
# Display bottom border to complete the box
click.echo(click.style(bottom_line, fg="blue", bold=True))
# Now move cursor up to the input line and get input
click.echo("\033[2A", nl=False) # Move cursor up 2 lines
click.echo(
f"\r{left_border} {prompt_text}", nl=False
) # Position at start of input line
try:
# Get user input
user_input = input().strip()
# Move cursor down to after the box
click.echo("\033[1B") # Move cursor down 1 line
click.echo("") # Add some space after
except (KeyboardInterrupt, EOFError):
# Move cursor down and add space
click.echo("\033[1B")
click.echo("")
raise
return user_input
def show_commands():
"""Display available commands."""
commands = [
("login", "Authenticate with the LiteLLM proxy server"),
("logout", "Clear stored authentication"),
("whoami", "Show current authentication status"),
("models", "Manage and view model configurations"),
("credentials", "Manage API credentials"),
("chat", "Interactive streaming chat with models"),
("http", "Make HTTP requests to the proxy"),
("keys", "Manage API keys"),
("teams", "Manage teams and team assignments"),
("users", "Manage users"),
("version", "Show version information"),
("help", "Show this help message"),
("quit", "Exit the interactive session"),
]
click.echo("Available commands:")
for cmd, description in commands:
click.echo(f" {cmd:<20} {description}")
click.echo()
def setup_shell(ctx: click.Context):
"""Set up the interactive shell with banner and initial info."""
from litellm.proxy.common_utils.banner import show_banner
show_banner()
# Show server connection info
base_url = ctx.obj.get("base_url")
click.secho(f"Connected to LiteLLM server: {base_url}\n", fg="green")
show_commands()
def handle_special_commands(user_input: str) -> bool:
"""Handle special commands like exit, help, clear. Returns True if command was handled."""
if user_input.lower() in ["exit", "quit"]:
click.echo("Goodbye!")
return True
elif user_input.lower() == "help":
click.echo("") # Add space before help
show_commands()
return True
elif user_input.lower() == "clear":
click.clear()
from litellm.proxy.common_utils.banner import show_banner
show_banner()
show_commands()
return True
return False
def execute_command(user_input: str, ctx: click.Context):
"""Parse and execute a command."""
# Parse command and arguments
parts = user_input.split()
command = parts[0]
args = parts[1:] if len(parts) > 1 else []
# Import cli here to avoid circular import
from . import main
cli = main.cli
# Check if command exists
if command not in cli.commands:
click.echo(f"Unknown command: {command}")
click.echo("Type 'help' to see available commands.")
return
# Execute the command
try:
# Create a new argument list for click to parse
sys.argv = ["litellm-proxy"] + [command] + args
# Get the command object and invoke it
cmd = cli.commands[command]
# Create a new context for the subcommand
with ctx.scope():
cmd.main(args, parent=ctx, standalone_mode=False)
except click.ClickException as e:
e.show()
except click.Abort:
click.echo("Command aborted.")
except SystemExit:
# Prevent the interactive shell from exiting on command errors
pass
except Exception as e:
click.echo(f"Error executing command: {e}")
def interactive_shell(ctx: click.Context):
"""Run the interactive shell."""
setup_shell(ctx)
while True:
try:
# Add some space before the input box to ensure it's positioned well
click.echo("\n") # Extra spacing
# Show styled prompt
user_input = styled_prompt()
if not user_input:
continue
# Handle special commands
if handle_special_commands(user_input):
if user_input.lower() in ["exit", "quit"]:
break
continue
# Execute regular commands
execute_command(user_input, ctx)
except (KeyboardInterrupt, EOFError):
click.echo("\nGoodbye!")
break
except Exception as e:
click.echo(f"Error: {e}")

View File

@@ -0,0 +1,115 @@
# stdlib imports
from typing import Optional
# third party imports
import click
from litellm._version import version as litellm_version
from litellm.proxy.client.health import HealthManagementClient
from .commands.auth import get_stored_api_key, login, logout, whoami
from .commands.chat import chat
from .commands.credentials import credentials
from .commands.http import http
from .commands.keys import keys
# local imports
from .commands.models import models
from .commands.teams import teams
from .commands.users import users
from .interface import interactive_shell
def print_version(base_url: str, api_key: Optional[str]):
"""Print CLI and server version info."""
click.echo(f"LiteLLM Proxy CLI Version: {litellm_version}")
if base_url:
click.echo(f"LiteLLM Proxy Server URL: {base_url}")
try:
health_client = HealthManagementClient(base_url=base_url, api_key=api_key)
server_version = health_client.get_server_version()
if server_version:
click.echo(f"LiteLLM Proxy Server Version: {server_version}")
else:
click.echo("LiteLLM Proxy Server Version: (unavailable)")
except Exception as e:
click.echo(f"Could not retrieve server version: {e}")
@click.group(invoke_without_command=True)
@click.option(
"--version",
"-v",
is_flag=True,
is_eager=True,
expose_value=False,
help="Show the LiteLLM Proxy CLI and server version and exit.",
callback=lambda ctx, param, value: (
print_version(
ctx.params.get("base_url") or "http://localhost:4000",
ctx.params.get("api_key"),
)
or ctx.exit()
)
if value and not ctx.resilient_parsing
else None,
)
@click.option(
"--base-url",
envvar="LITELLM_PROXY_URL",
show_envvar=True,
default="http://localhost:4000",
help="Base URL of the LiteLLM proxy server",
)
@click.option(
"--api-key",
envvar="LITELLM_PROXY_API_KEY",
show_envvar=True,
help="API key for authentication",
)
@click.pass_context
def cli(ctx: click.Context, base_url: str, api_key: Optional[str]) -> None:
"""LiteLLM Proxy CLI - Manage your LiteLLM proxy server"""
ctx.ensure_object(dict)
# If no API key provided via flag or environment variable, try to load from saved token
if api_key is None:
api_key = get_stored_api_key()
ctx.obj["base_url"] = base_url
ctx.obj["api_key"] = api_key
# If no subcommand was invoked, start interactive mode
if ctx.invoked_subcommand is None:
interactive_shell(ctx)
@cli.command()
@click.pass_context
def version(ctx: click.Context):
"""Show the LiteLLM Proxy CLI and server version."""
print_version(ctx.obj.get("base_url"), ctx.obj.get("api_key"))
# Add authentication commands as top-level commands
cli.add_command(login)
cli.add_command(logout)
cli.add_command(whoami)
# Add the models command group
cli.add_command(models)
# Add the credentials command group
cli.add_command(credentials)
# Add the chat command group
cli.add_command(chat)
# Add the http command group
cli.add_command(http)
# Add the keys command group
cli.add_command(keys)
# Add the teams command group
cli.add_command(teams)
# Add the users command group
cli.add_command(users)
if __name__ == "__main__":
cli()

View File

@@ -0,0 +1,50 @@
from typing import Optional
from litellm.litellm_core_utils.cli_token_utils import get_litellm_gateway_api_key
from .chat import ChatClient
from .credentials import CredentialsManagementClient
from .http_client import HTTPClient
from .keys import KeysManagementClient
from .model_groups import ModelGroupsManagementClient
from .models import ModelsManagementClient
from .teams import TeamsManagementClient
class Client:
"""Main client for interacting with the LiteLLM proxy API."""
def __init__(
self,
base_url: str,
api_key: Optional[str] = None,
timeout: int = 30,
):
"""
Initialize the LiteLLM proxy client.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:4000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
timeout: Request timeout in seconds (default: 30)
"""
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
self._api_key = get_litellm_gateway_api_key() or api_key
# Initialize resource clients
self.http = HTTPClient(base_url=base_url, api_key=api_key, timeout=timeout)
self.models = ModelsManagementClient(
base_url=self._base_url, api_key=self._api_key
)
self.model_groups = ModelGroupsManagementClient(
base_url=self._base_url, api_key=self._api_key
)
self.chat = ChatClient(base_url=self._base_url, api_key=self._api_key)
self.keys = KeysManagementClient(base_url=self._base_url, api_key=self._api_key)
self.credentials = CredentialsManagementClient(
base_url=self._base_url, api_key=self._api_key
)
self.teams = TeamsManagementClient(
base_url=self._base_url, api_key=self._api_key
)

View File

@@ -0,0 +1,185 @@
import requests
from typing import Dict, Any, Optional, Union
from .exceptions import UnauthorizedError
class CredentialsManagementClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
"""
Initialize the CredentialsManagementClient.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
"""
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
self._api_key = api_key
def _get_headers(self) -> Dict[str, str]:
"""
Get the headers for API requests, including authorization if api_key is set.
Returns:
Dict[str, str]: Headers to use for API requests
"""
headers = {"Content-Type": "application/json"}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
return headers
def list(
self,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
List all credentials.
Args:
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/credentials"
request = requests.Request("GET", url, headers=self._get_headers())
if return_request:
return request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def create(
self,
credential_name: str,
credential_info: Dict[str, Any],
credential_values: Dict[str, Any],
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Create a new credential.
Args:
credential_name (str): Name of the credential
credential_info (Dict[str, Any]): Additional information about the credential
credential_values (Dict[str, Any]): Values for the credential
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/credentials"
data = {
"credential_name": credential_name,
"credential_info": credential_info,
"credential_values": credential_values,
}
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
if return_request:
return request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def delete(
self,
credential_name: str,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Delete a credential by name.
Args:
credential_name (str): Name of the credential to delete
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/credentials/{credential_name}"
request = requests.Request("DELETE", url, headers=self._get_headers())
if return_request:
return request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def get(
self,
credential_name: str,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Get a credential by name.
Args:
credential_name (str): Name of the credential to retrieve
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/credentials/by_name/{credential_name}"
request = requests.Request("GET", url, headers=self._get_headers())
if return_request:
return request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise

View File

@@ -0,0 +1,19 @@
from typing import Union
import requests
class UnauthorizedError(Exception):
"""Exception raised when the API returns a 401 Unauthorized response."""
def __init__(self, orig_exception: Union[requests.exceptions.HTTPError, str]):
self.orig_exception = orig_exception
super().__init__(str(orig_exception))
class NotFoundError(Exception):
"""Exception raised when the API returns a 404 Not Found response or indicates a resource was not found."""
def __init__(self, orig_exception: Union[requests.exceptions.HTTPError, str]):
self.orig_exception = orig_exception
super().__init__(str(orig_exception))

View File

@@ -0,0 +1,42 @@
from typing import Optional, Dict, Any
from .http_client import HTTPClient
class HealthManagementClient:
"""
Client for interacting with the health endpoints of the LiteLLM proxy server.
"""
def __init__(self, base_url: str, api_key: Optional[str] = None, timeout: int = 30):
"""
Initialize the HealthManagementClient.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:4000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
timeout (int): Request timeout in seconds (default: 30)
"""
self._http = HTTPClient(base_url=base_url, api_key=api_key, timeout=timeout)
def get_readiness(self) -> Dict[str, Any]:
"""
Check the readiness of the LiteLLM proxy server.
Returns:
Dict[str, Any]: The readiness status and details from the server.
Raises:
requests.exceptions.RequestException: If the request fails
ValueError: If the response is not valid JSON
"""
return self._http.request("GET", "/health/readiness")
def get_server_version(self) -> Optional[str]:
"""
Get the LiteLLM server version from the readiness endpoint.
Returns:
Optional[str]: The server version if available, otherwise None.
"""
readiness = self.get_readiness()
return readiness.get("litellm_version")

View File

@@ -0,0 +1,95 @@
"""HTTP client for making requests to the LiteLLM proxy server."""
from typing import Any, Dict, Optional, Union
import requests
class HTTPClient:
"""HTTP client for making requests to the LiteLLM proxy server."""
def __init__(self, base_url: str, api_key: Optional[str] = None, timeout: int = 30):
"""Initialize the HTTP client.
Args:
base_url: Base URL of the LiteLLM proxy server
api_key: Optional API key for authentication
timeout: Request timeout in seconds (default: 30)
"""
self._base_url = base_url.rstrip("/")
self._api_key = api_key
self._timeout = timeout
def request(
self,
method: str,
uri: str,
*,
data: Optional[Union[Dict[str, Any], list, bytes]] = None,
json: Optional[Union[Dict[str, Any], list]] = None,
headers: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> Any:
"""Make an HTTP request to the LiteLLM proxy server.
This method is used to make generic requests to the LiteLLM proxy
server, when there is not a specific client or method for the request.
Args:
method: HTTP method (GET, POST, PUT, DELETE, etc.)
uri: URI path (will be appended to base_url) (e.g., "/credentials")
data: (optional) Dictionary, list of tuples, bytes, or file-like
object to send in the body of the request.
json: (optional) A JSON serializable Python object to send in the body
of the request.
headers: (optional) Dictionary of HTTP headers to send with the request.
**kwargs: Additional keyword arguments to pass to the request.
Returns:
Parsed JSON response from the server
Raises:
requests.exceptions.RequestException: If the request fails
ValueError: If the response is not valid JSON
Example:
>>> client.http.request("POST", "/health/test_connection", json={
"litellm_params": {
"model": "gpt-4",
"custom_llm_provider": "azure_ai",
"litellm_credential_name": None,
"api_key": "6xxxxxxx",
"api_base": "https://litellm8397336933.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21",
},
"mode": "chat",
})
{'status': 'error',
'result': {'model': 'gpt-4',
'custom_llm_provider': 'azure_ai',
'litellm_credential_name': None,
...
"""
# Build complete URL
url = f"{self._base_url}/{uri.lstrip('/')}"
# Prepare headers
request_headers = {}
if headers:
request_headers.update(headers)
if self._api_key:
request_headers["Authorization"] = f"Bearer {self._api_key}"
response = requests.request(
method=method,
url=url,
data=data,
json=json,
headers=request_headers,
timeout=self._timeout,
**kwargs,
)
# Raise for HTTP errors
response.raise_for_status()
# Parse and return JSON response
return response.json()

View File

@@ -0,0 +1,319 @@
from typing import Any, Dict, List, Optional, Union
import requests
from .exceptions import UnauthorizedError
class KeysManagementClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
"""
Initialize the KeysManagementClient.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
"""
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
self._api_key = api_key
def _get_headers(self) -> Dict[str, str]:
"""
Get the headers for API requests, including authorization if api_key is set.
Returns:
Dict[str, str]: Headers to use for API requests
"""
headers = {"Content-Type": "application/json"}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
return headers
def list(
self,
page: Optional[int] = None,
size: Optional[int] = None,
user_id: Optional[str] = None,
team_id: Optional[str] = None,
organization_id: Optional[str] = None,
key_hash: Optional[str] = None,
key_alias: Optional[str] = None,
return_full_object: Optional[bool] = None,
include_team_keys: Optional[bool] = None,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
List all API keys with optional filtering and pagination.
Args:
page (Optional[int]): Page number for pagination
size (Optional[int]): Number of items per page
user_id (Optional[str]): Filter keys by user ID
team_id (Optional[str]): Filter keys by team ID
organization_id (Optional[str]): Filter keys by organization ID
key_hash (Optional[str]): Filter by specific key hash
key_alias (Optional[str]): Filter by key alias
return_full_object (Optional[bool]): Whether to return the full key object
include_team_keys (Optional[bool]): Whether to include team keys in the response
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True. The response contains a list
of API keys with their configurations.
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/key/list"
params: Dict[str, Any] = {}
# Add optional query parameters
if page is not None:
params["page"] = page
if size is not None:
params["size"] = size
if user_id is not None:
params["user_id"] = user_id
if team_id is not None:
params["team_id"] = team_id
if organization_id is not None:
params["organization_id"] = organization_id
if key_hash is not None:
params["key_hash"] = key_hash
if key_alias is not None:
params["key_alias"] = key_alias
if return_full_object is not None:
params["return_full_object"] = str(return_full_object).lower()
if include_team_keys is not None:
params["include_team_keys"] = str(include_team_keys).lower()
request = requests.Request(
"GET", url, headers=self._get_headers(), params=params
)
if return_request:
return request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def generate(
self,
models: Optional[List[str]] = None,
aliases: Optional[Dict[str, str]] = None,
spend: Optional[float] = None,
duration: Optional[str] = None,
key_alias: Optional[str] = None,
team_id: Optional[str] = None,
user_id: Optional[str] = None,
budget_id: Optional[str] = None,
config: Optional[Dict[str, Any]] = None,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Generate an API key based on the provided data.
Docs: https://docs.litellm.ai/docs/proxy/virtual_keys
Args:
models (Optional[List[str]]): List of allowed models for this key
aliases (Optional[Dict[str, str]]): Model alias mappings
spend (Optional[float]): Maximum spend limit for this key
duration (Optional[str]): Duration for which the key is valid (e.g. "24h", "7d")
key_alias (Optional[str]): Alias/name for the key for easier identification
team_id (Optional[str]): Team ID to associate the key with
user_id (Optional[str]): User ID to associate the key with
budget_id (Optional[str]): Budget ID to associate the key with
config (Optional[Dict[str, Any]]): Additional configuration parameters
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/key/generate"
data: Dict[str, Any] = {}
if models is not None:
data["models"] = models
if aliases is not None:
data["aliases"] = aliases
if spend is not None:
data["spend"] = spend
if duration is not None:
data["duration"] = duration
if key_alias is not None:
data["key_alias"] = key_alias
if team_id is not None:
data["team_id"] = team_id
if user_id is not None:
data["user_id"] = user_id
if budget_id is not None:
data["budget_id"] = budget_id
if config is not None:
data["config"] = config
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
if return_request:
return request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def delete(
self,
keys: Optional[List[str]] = None,
key_aliases: Optional[List[str]] = None,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Delete existing keys
Args:
keys (List[str]): List of API keys to delete
key_aliases (List[str]): List of key aliases to delete
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/key/delete"
data = {
"keys": keys,
"key_aliases": key_aliases,
}
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
if return_request:
return request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def update(
self,
key: str,
models: Optional[List[str]] = None,
aliases: Optional[Dict[str, str]] = None,
spend: Optional[float] = None,
duration: Optional[str] = None,
key_alias: Optional[str] = None,
team_id: Optional[str] = None,
user_id: Optional[str] = None,
) -> Union[Dict[str, Any], requests.Request]:
"""
Update an existing API key's parameters.
Args:
models: Optional[List[str]] = None,
aliases: Optional[Dict[str, str]] = None,
spend: Optional[float] = None,
duration: Optional[str] = None,
key_alias: Optional[str] = None,
team_id: Optional[str] = None,
user_id: Optional[str] = None,
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/key/update"
data: Dict[str, Any] = {"key": key}
if key_alias is not None:
data["key_alias"] = key_alias
if user_id is not None:
data["user_id"] = user_id
if team_id is not None:
data["team_id"] = team_id
if models is not None:
data["models"] = models
if spend is not None:
data["spend"] = spend
if duration is not None:
data["duration"] = duration
if aliases is not None:
data["aliases"] = aliases
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
session = requests.Session()
response_text: Optional[str] = None
try:
response = session.send(request.prepare())
response_text = response.text
response.raise_for_status()
return response.json()
except Exception:
raise Exception(f"Error updating key: {response_text}")
def info(
self, key: str, return_request: bool = False
) -> Union[Dict[str, Any], requests.Request]:
"""
Get information about API keys.
Args:
key (str): The key hash to get information about
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/key/info?key={key}"
request = requests.Request("GET", url, headers=self._get_headers())
if return_request:
return request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise

View File

@@ -0,0 +1,62 @@
import requests
from typing import List, Dict, Any, Optional, Union
from .exceptions import UnauthorizedError
class ModelGroupsManagementClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
"""
Initialize the ModelGroupsManagementClient.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
"""
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
self._api_key = api_key
def _get_headers(self) -> Dict[str, str]:
"""
Get the headers for API requests, including authorization if api_key is set.
Returns:
Dict[str, str]: Headers to use for API requests
"""
headers = {}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
return headers
def info(
self, return_request: bool = False
) -> Union[List[Dict[str, Any]], requests.Request]:
"""
Get detailed information about all model groups from the server.
Args:
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[List[Dict[str, Any]], requests.Request]: Either a list of model group information dictionaries
or a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/model_group/info"
request = requests.Request("GET", url, headers=self._get_headers())
if return_request:
return request
# Prepare and send the request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()["data"]
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise

View File

@@ -0,0 +1,298 @@
import requests
from typing import List, Dict, Any, Optional, Union
from .exceptions import UnauthorizedError, NotFoundError
class ModelsManagementClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
"""
Initialize the ModelsManagementClient.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
"""
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
self._api_key = api_key
def _get_headers(self) -> Dict[str, str]:
"""
Get the headers for API requests, including authorization if api_key is set.
Returns:
Dict[str, str]: Headers to use for API requests
"""
headers = {}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
return headers
def list(
self, return_request: bool = False
) -> Union[List[Dict[str, Any]], requests.Request]:
"""
Get the list of models supported by the server.
Args:
return_request (bool): If True, returns the prepared request object instead of executing it.
Useful for inspection or modification before sending.
Returns:
Union[List[Dict[str, Any]], requests.Request]: Either a list of model information dictionaries
or a prepared request object if return_request is True.
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/models"
request = requests.Request("GET", url, headers=self._get_headers())
if return_request:
return request
# Prepare and send the request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()["data"]
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def new(
self,
model_name: str,
model_params: Dict[str, Any],
model_info: Optional[Dict[str, Any]] = None,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Add a new model to the proxy.
Args:
model_name (str): Name of the model to add
model_params (Dict[str, Any]): Parameters for the model (e.g., model type, api_base, api_key)
model_info (Optional[Dict[str, Any]]): Additional information about the model
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/model/new"
data = {
"model_name": model_name,
"litellm_params": model_params,
}
if model_info:
data["model_info"] = model_info
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
if return_request:
return request
# Prepare and send the request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def delete(
self, model_id: str, return_request: bool = False
) -> Union[Dict[str, Any], requests.Request]:
"""
Delete a model from the proxy.
Args:
model_id (str): ID of the model to delete (e.g., "2f23364f-4579-4d79-a43a-2d48dd551c2e")
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
NotFoundError: If the request fails with a 404 status code or indicates the model was not found
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/model/delete"
data = {"id": model_id}
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
if return_request:
return request
# Prepare and send the request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
if e.response.status_code == 404 or "not found" in e.response.text.lower():
raise NotFoundError(e)
raise
def get(
self,
model_id: Optional[str] = None,
model_name: Optional[str] = None,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Get information about a specific model by its ID or name.
Args:
model_id (Optional[str]): ID of the model to retrieve
model_name (Optional[str]): Name of the model to retrieve
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the model information from the server or
a prepared request object if return_request is True
Raises:
ValueError: If neither model_id nor model_name is provided, or if both are provided
UnauthorizedError: If the request fails with a 401 status code
NotFoundError: If the model is not found
requests.exceptions.RequestException: If the request fails with any other error
"""
if (model_id is None and model_name is None) or (
model_id is not None and model_name is not None
):
raise ValueError("Exactly one of model_id or model_name must be provided")
# If return_request is True, delegate to info
if return_request:
result = self.info(return_request=True)
assert isinstance(result, requests.Request)
return result
# Get all models and filter
models = self.info()
assert isinstance(models, List)
# Find the matching model
for model in models:
if (model_id and model.get("model_info", {}).get("id") == model_id) or (
model_name and model.get("model_name") == model_name
):
return model
# If we get here, no model was found
if model_id:
msg = f"Model with id={model_id} not found"
elif model_name:
msg = f"Model with model_name={model_name} not found"
else:
msg = "Unknown error trying to find model"
raise NotFoundError(
requests.exceptions.HTTPError(
msg,
response=requests.Response(), # Empty response since we didn't make a direct request
)
)
def info(
self, return_request: bool = False
) -> Union[List[Dict[str, Any]], requests.Request]:
"""
Get detailed information about all models from the server.
Args:
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[List[Dict[str, Any]], requests.Request]: Either a list of model information dictionaries
or a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/v1/model/info"
request = requests.Request("GET", url, headers=self._get_headers())
if return_request:
return request
# Prepare and send the request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()["data"]
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
def update(
self,
model_id: str,
model_params: Dict[str, Any],
model_info: Optional[Dict[str, Any]] = None,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Update an existing model's configuration.
Args:
model_id (str): ID of the model to update
model_params (Dict[str, Any]): New parameters for the model (e.g., model type, api_base, api_key)
model_info (Optional[Dict[str, Any]]): Additional information about the model
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
NotFoundError: If the model is not found
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/model/update"
data = {
"id": model_id,
"litellm_params": model_params,
}
if model_info:
data["model_info"] = model_info
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
if return_request:
return request
# Prepare and send the request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
if e.response.status_code == 404 or "not found" in e.response.text.lower():
raise NotFoundError(e)
raise

View File

@@ -0,0 +1,146 @@
"""Teams management client for LiteLLM proxy."""
from typing import Any, Dict, List, Optional, Union
import requests
from .exceptions import UnauthorizedError
class TeamsManagementClient:
"""Client for managing teams in LiteLLM proxy."""
def __init__(self, base_url: str, api_key: Optional[str] = None):
"""
Initialize the TeamsManagementClient.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:4000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
"""
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
self._api_key = api_key
def _get_headers(self) -> Dict[str, str]:
"""
Get the headers for API requests, including authorization if api_key is set.
Returns:
Dict[str, str]: Headers to use for API requests
"""
headers = {"Content-Type": "application/json"}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
return headers
def list(
self,
user_id: Optional[str] = None,
organization_id: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""
List teams that the user belongs to.
Args:
user_id (Optional[str]): Only return teams which this user belongs to
organization_id (Optional[str]): Only return teams which belong to this organization
Returns:
List[Dict[str, Any]]: List of team objects
Raises:
requests.exceptions.HTTPError: If the request fails
UnauthorizedError: If authentication fails
"""
url = f"{self._base_url}/team/list"
params = {}
if user_id:
params["user_id"] = user_id
if organization_id:
params["organization_id"] = organization_id
response = requests.get(url, headers=self._get_headers(), params=params)
if response.status_code == 401:
raise UnauthorizedError("Authentication failed. Check your API key.")
response.raise_for_status()
return response.json()
def list_v2(
self,
user_id: Optional[str] = None,
organization_id: Optional[str] = None,
team_id: Optional[str] = None,
team_alias: Optional[str] = None,
page: int = 1,
page_size: int = 10,
sort_by: Optional[str] = None,
sort_order: str = "asc",
) -> Dict[str, Any]:
"""
Get a paginated list of teams with filtering and sorting options.
Args:
user_id (Optional[str]): Only return teams which this user belongs to
organization_id (Optional[str]): Only return teams which belong to this organization
team_id (Optional[str]): Filter teams by exact team_id match
team_alias (Optional[str]): Filter teams by partial team_alias match
page (int): Page number for pagination
page_size (int): Number of teams per page
sort_by (Optional[str]): Column to sort by (e.g. 'team_id', 'team_alias', 'created_at')
sort_order (str): Sort order ('asc' or 'desc')
Returns:
Dict[str, Any]: Paginated response containing teams and pagination info
Raises:
requests.exceptions.HTTPError: If the request fails
UnauthorizedError: If authentication fails
"""
url = f"{self._base_url}/v2/team/list"
params: Dict[str, Union[str, int]] = {
"page": page,
"page_size": page_size,
"sort_order": sort_order,
}
if user_id:
params["user_id"] = user_id
if organization_id:
params["organization_id"] = organization_id
if team_id:
params["team_id"] = team_id
if team_alias:
params["team_alias"] = team_alias
if sort_by:
params["sort_by"] = sort_by
response = requests.get(url, headers=self._get_headers(), params=params)
if response.status_code == 401:
raise UnauthorizedError("Authentication failed. Check your API key.")
response.raise_for_status()
return response.json()
def get_available(self) -> List[Dict[str, Any]]:
"""
Get list of available teams that the user can join.
Returns:
List[Dict[str, Any]]: List of available team objects
Raises:
requests.exceptions.HTTPError: If the request fails
UnauthorizedError: If authentication fails
"""
url = f"{self._base_url}/team/available"
response = requests.get(url, headers=self._get_headers())
if response.status_code == 401:
raise UnauthorizedError("Authentication failed. Check your API key.")
response.raise_for_status()
return response.json()

View File

@@ -0,0 +1,58 @@
import requests
from typing import List, Dict, Any, Optional
from .exceptions import UnauthorizedError, NotFoundError
class UsersManagementClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
self.base_url = base_url.rstrip("/")
self.api_key = api_key
def _get_headers(self) -> Dict[str, str]:
headers = {"Content-Type": "application/json"}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
return headers
def list_users(
self, params: Optional[Dict[str, Any]] = None
) -> List[Dict[str, Any]]:
"""List users (GET /user/list)"""
url = f"{self.base_url}/user/list"
response = requests.get(url, headers=self._get_headers(), params=params)
if response.status_code == 401:
raise UnauthorizedError(response.text)
response.raise_for_status()
return response.json().get("users", response.json())
def get_user(self, user_id: Optional[str] = None) -> Dict[str, Any]:
"""Get user info (GET /user/info)"""
url = f"{self.base_url}/user/info"
params = {"user_id": user_id} if user_id else {}
response = requests.get(url, headers=self._get_headers(), params=params)
if response.status_code == 401:
raise UnauthorizedError(response.text)
if response.status_code == 404:
raise NotFoundError(response.text)
response.raise_for_status()
return response.json()
def create_user(self, user_data: Dict[str, Any]) -> Dict[str, Any]:
"""Create a new user (POST /user/new)"""
url = f"{self.base_url}/user/new"
response = requests.post(url, headers=self._get_headers(), json=user_data)
if response.status_code == 401:
raise UnauthorizedError(response.text)
response.raise_for_status()
return response.json()
def delete_user(self, user_ids: List[str]) -> Dict[str, Any]:
"""Delete users (POST /user/delete)"""
url = f"{self.base_url}/user/delete"
response = requests.post(
url, headers=self._get_headers(), json={"user_ids": user_ids}
)
if response.status_code == 401:
raise UnauthorizedError(response.text)
response.raise_for_status()
return response.json()