110 lines
3.3 KiB
YAML
110 lines
3.3 KiB
YAML
model_list:
|
|
- model_name: claude-3-5-sonnet
|
|
litellm_params:
|
|
model: claude-3-haiku-20240307
|
|
# - model_name: gemini-1.5-flash-gemini
|
|
# litellm_params:
|
|
# model: vertex_ai_beta/gemini-1.5-flash
|
|
# api_base: https://gateway.ai.cloudflare.com/v1/fa4cdcab1f32b95ca3b53fd36043d691/test/google-vertex-ai/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.5-flash
|
|
- litellm_params:
|
|
api_base: http://0.0.0.0:8080
|
|
api_key: ''
|
|
model: gpt-4o
|
|
rpm: 800
|
|
input_cost_per_token: 300
|
|
model_name: gpt-4o
|
|
- model_name: llama3-70b-8192
|
|
litellm_params:
|
|
model: groq/llama3-70b-8192
|
|
- model_name: fake-openai-endpoint
|
|
litellm_params:
|
|
model: predibase/llama-3-8b-instruct
|
|
api_key: os.environ/PREDIBASE_API_KEY
|
|
tenant_id: os.environ/PREDIBASE_TENANT_ID
|
|
max_new_tokens: 256
|
|
# - litellm_params:
|
|
# api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
|
|
# api_key: os.environ/AZURE_EUROPE_API_KEY
|
|
# model: azure/gpt-35-turbo
|
|
# rpm: 10
|
|
# model_name: gpt-3.5-turbo-fake-model
|
|
- litellm_params:
|
|
api_base: https://openai-gpt-4-test-v-1.openai.azure.com
|
|
api_key: os.environ/AZURE_API_KEY
|
|
api_version: 2024-02-15-preview
|
|
model: azure/chatgpt-v-2
|
|
tpm: 100
|
|
model_name: gpt-3.5-turbo
|
|
- litellm_params:
|
|
model: anthropic.claude-3-sonnet-20240229-v1:0
|
|
model_name: bedrock-anthropic-claude-3
|
|
- litellm_params:
|
|
model: claude-3-haiku-20240307
|
|
model_name: anthropic-claude-3
|
|
- litellm_params:
|
|
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
|
api_key: os.environ/AZURE_API_KEY
|
|
api_version: 2024-02-15-preview
|
|
model: azure/chatgpt-v-2
|
|
drop_params: True
|
|
tpm: 100
|
|
model_name: gpt-3.5-turbo
|
|
- model_name: tts
|
|
litellm_params:
|
|
model: openai/tts-1
|
|
- model_name: gpt-4-turbo-preview
|
|
litellm_params:
|
|
api_base: https://openai-france-1234.openai.azure.com
|
|
api_key: os.environ/AZURE_FRANCE_API_KEY
|
|
api_version: 2024-02-15-preview
|
|
model: azure/gpt-turbo
|
|
- model_name: text-embedding
|
|
litellm_params:
|
|
model: textembedding-gecko-multilingual@001
|
|
vertex_project: my-project-9d5c
|
|
vertex_location: us-central1
|
|
- model_name: lbl/command-r-plus
|
|
litellm_params:
|
|
model: openai/lbl/command-r-plus
|
|
api_key: "os.environ/VLLM_API_KEY"
|
|
api_base: http://vllm-command:8000/v1
|
|
rpm: 1000
|
|
input_cost_per_token: 0
|
|
output_cost_per_token: 0
|
|
model_info:
|
|
max_input_tokens: 80920
|
|
|
|
# litellm_settings:
|
|
# callbacks: ["dynamic_rate_limiter"]
|
|
# # success_callback: ["langfuse"]
|
|
# # failure_callback: ["langfuse"]
|
|
# # default_team_settings:
|
|
# # - team_id: proj1
|
|
# # success_callback: ["langfuse"]
|
|
# # langfuse_public_key: os.environ/LANGFUSE_PUBLIC_KEY
|
|
# # langfuse_secret: os.environ/LANGFUSE_SECRET
|
|
# # langfuse_host: https://us.cloud.langfuse.com
|
|
# # - team_id: proj2
|
|
# # success_callback: ["langfuse"]
|
|
# # langfuse_public_key: os.environ/LANGFUSE_PUBLIC_KEY
|
|
# # langfuse_secret: os.environ/LANGFUSE_SECRET
|
|
# # langfuse_host: https://us.cloud.langfuse.com
|
|
|
|
assistant_settings:
|
|
custom_llm_provider: openai
|
|
litellm_params:
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
|
|
|
|
router_settings:
|
|
enable_pre_call_checks: true
|
|
|
|
|
|
litellm_settings:
|
|
callbacks: ["s3"]
|
|
|
|
# general_settings:
|
|
# # alerting: ["slack"]
|
|
# enable_jwt_auth: True
|
|
# litellm_jwtauth:
|
|
# team_id_jwt_field: "client_id" |