chore: initial public snapshot for github upload
This commit is contained in:
@@ -0,0 +1,347 @@
|
||||
"""
|
||||
Google AI Studio /batchEmbedContents Embeddings Endpoint
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Literal, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.types.llms.openai import EmbeddingInput
|
||||
from litellm.types.llms.vertex_ai import (
|
||||
VertexAIBatchEmbeddingsRequestBody,
|
||||
VertexAIBatchEmbeddingsResponseObject,
|
||||
)
|
||||
from litellm.types.utils import EmbeddingResponse
|
||||
|
||||
from ..gemini.vertex_and_google_ai_studio_gemini import VertexLLM
|
||||
from .batch_embed_content_transformation import (
|
||||
_is_file_reference,
|
||||
_is_multimodal_input,
|
||||
process_embed_content_response,
|
||||
process_response,
|
||||
transform_openai_input_gemini_content,
|
||||
transform_openai_input_gemini_embed_content,
|
||||
)
|
||||
|
||||
|
||||
class GoogleBatchEmbeddings(VertexLLM):
|
||||
def _resolve_file_references(
|
||||
self,
|
||||
input: EmbeddingInput,
|
||||
api_key: str,
|
||||
sync_handler: HTTPHandler,
|
||||
) -> Dict[str, Dict[str, str]]:
|
||||
"""
|
||||
Resolve Gemini file references (files/...) to get mime_type and uri.
|
||||
|
||||
Args:
|
||||
input: EmbeddingInput that may contain file references
|
||||
api_key: Gemini API key
|
||||
sync_handler: HTTP client
|
||||
|
||||
Returns:
|
||||
Dict mapping file name to {mime_type, uri}
|
||||
"""
|
||||
input_list = [input] if isinstance(input, str) else input
|
||||
resolved_files: Dict[str, Dict[str, str]] = {}
|
||||
|
||||
for element in input_list:
|
||||
if isinstance(element, str) and _is_file_reference(element):
|
||||
url = f"https://generativelanguage.googleapis.com/v1beta/{element}"
|
||||
headers = {"x-goog-api-key": api_key}
|
||||
response = sync_handler.get(url=url, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(
|
||||
f"Error fetching file {element}: {response.status_code} {response.text}"
|
||||
)
|
||||
|
||||
file_data = response.json()
|
||||
resolved_files[element] = {
|
||||
"mime_type": file_data.get("mimeType", ""),
|
||||
"uri": file_data.get("uri", element),
|
||||
}
|
||||
|
||||
return resolved_files
|
||||
|
||||
async def _async_resolve_file_references(
|
||||
self,
|
||||
input: EmbeddingInput,
|
||||
api_key: str,
|
||||
async_handler: AsyncHTTPHandler,
|
||||
) -> Dict[str, Dict[str, str]]:
|
||||
"""
|
||||
Async version of _resolve_file_references.
|
||||
|
||||
Args:
|
||||
input: EmbeddingInput that may contain file references
|
||||
api_key: Gemini API key
|
||||
async_handler: Async HTTP client
|
||||
|
||||
Returns:
|
||||
Dict mapping file name to {mime_type, uri}
|
||||
"""
|
||||
input_list = [input] if isinstance(input, str) else input
|
||||
resolved_files: Dict[str, Dict[str, str]] = {}
|
||||
|
||||
for element in input_list:
|
||||
if isinstance(element, str) and _is_file_reference(element):
|
||||
url = f"https://generativelanguage.googleapis.com/v1beta/{element}"
|
||||
headers = {"x-goog-api-key": api_key}
|
||||
response = await async_handler.get(url=url, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(
|
||||
f"Error fetching file {element}: {response.status_code} {response.text}"
|
||||
)
|
||||
|
||||
file_data = response.json()
|
||||
resolved_files[element] = {
|
||||
"mime_type": file_data.get("mimeType", ""),
|
||||
"uri": file_data.get("uri", element),
|
||||
}
|
||||
|
||||
return resolved_files
|
||||
|
||||
def batch_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
input: EmbeddingInput,
|
||||
print_verbose,
|
||||
model_response: EmbeddingResponse,
|
||||
custom_llm_provider: Literal["gemini", "vertex_ai"],
|
||||
optional_params: dict,
|
||||
logging_obj: Any,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
encoding=None,
|
||||
vertex_project=None,
|
||||
vertex_location=None,
|
||||
vertex_credentials=None,
|
||||
aembedding: Optional[bool] = False,
|
||||
timeout=300,
|
||||
client=None,
|
||||
extra_headers: Optional[dict] = None,
|
||||
) -> EmbeddingResponse:
|
||||
_auth_header, vertex_project = self._ensure_access_token(
|
||||
credentials=vertex_credentials,
|
||||
project_id=vertex_project,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
if client is None:
|
||||
_params = {}
|
||||
if timeout is not None:
|
||||
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||
_httpx_timeout = httpx.Timeout(timeout)
|
||||
_params["timeout"] = _httpx_timeout
|
||||
else:
|
||||
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
|
||||
|
||||
sync_handler: HTTPHandler = HTTPHandler(**_params) # type: ignore
|
||||
else:
|
||||
sync_handler = client # type: ignore
|
||||
|
||||
optional_params = optional_params or {}
|
||||
|
||||
is_multimodal = _is_multimodal_input(input)
|
||||
use_embed_content = is_multimodal or (custom_llm_provider == "vertex_ai")
|
||||
mode: Literal["embedding", "batch_embedding"]
|
||||
if use_embed_content:
|
||||
mode = "embedding"
|
||||
else:
|
||||
mode = "batch_embedding"
|
||||
|
||||
auth_header, url = self._get_token_and_url(
|
||||
model=model,
|
||||
auth_header=_auth_header,
|
||||
gemini_api_key=api_key,
|
||||
vertex_project=vertex_project,
|
||||
vertex_location=vertex_location,
|
||||
vertex_credentials=vertex_credentials,
|
||||
stream=None,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_base=api_base,
|
||||
should_use_v1beta1_features=False,
|
||||
mode=mode,
|
||||
)
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
}
|
||||
if auth_header is not None:
|
||||
if isinstance(auth_header, dict):
|
||||
headers.update(auth_header)
|
||||
else:
|
||||
headers["Authorization"] = f"Bearer {auth_header}"
|
||||
if extra_headers is not None:
|
||||
headers.update(extra_headers)
|
||||
|
||||
if aembedding is True:
|
||||
return self.async_batch_embeddings( # type: ignore
|
||||
model=model,
|
||||
api_base=api_base,
|
||||
url=url,
|
||||
data=None,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
input=input,
|
||||
use_embed_content=use_embed_content,
|
||||
api_key=api_key,
|
||||
optional_params=optional_params,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
### TRANSFORMATION (sync path) ###
|
||||
request_data: Any
|
||||
if use_embed_content:
|
||||
resolved_files = {}
|
||||
if api_key:
|
||||
resolved_files = self._resolve_file_references(
|
||||
input=input, api_key=api_key, sync_handler=sync_handler
|
||||
)
|
||||
request_data = transform_openai_input_gemini_embed_content(
|
||||
input=input,
|
||||
model=model,
|
||||
optional_params=optional_params,
|
||||
resolved_files=resolved_files,
|
||||
)
|
||||
else:
|
||||
request_data = transform_openai_input_gemini_content(
|
||||
input=input, model=model, optional_params=optional_params
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_data,
|
||||
"api_base": url,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
response = sync_handler.post(
|
||||
url=url,
|
||||
headers=headers,
|
||||
data=json.dumps(request_data),
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Error: {response.status_code} {response.text}")
|
||||
|
||||
_json_response = response.json()
|
||||
|
||||
if use_embed_content:
|
||||
return process_embed_content_response(
|
||||
input=input,
|
||||
model_response=model_response,
|
||||
model=model,
|
||||
response_json=_json_response,
|
||||
)
|
||||
else:
|
||||
_predictions = VertexAIBatchEmbeddingsResponseObject(**_json_response) # type: ignore
|
||||
return process_response(
|
||||
model=model,
|
||||
model_response=model_response,
|
||||
_predictions=_predictions,
|
||||
input=input,
|
||||
)
|
||||
|
||||
async def async_batch_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
api_base: Optional[str],
|
||||
url: str,
|
||||
data: Optional[Union[VertexAIBatchEmbeddingsRequestBody, dict]],
|
||||
model_response: EmbeddingResponse,
|
||||
input: EmbeddingInput,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
headers={},
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
use_embed_content: bool = False,
|
||||
api_key: Optional[str] = None,
|
||||
optional_params: Optional[dict] = None,
|
||||
logging_obj: Optional[Any] = None,
|
||||
) -> EmbeddingResponse:
|
||||
if client is None:
|
||||
_params = {}
|
||||
if timeout is not None:
|
||||
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||
_httpx_timeout = httpx.Timeout(timeout)
|
||||
_params["timeout"] = _httpx_timeout
|
||||
else:
|
||||
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
|
||||
|
||||
async_handler: AsyncHTTPHandler = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.VERTEX_AI,
|
||||
params={"timeout": timeout},
|
||||
)
|
||||
else:
|
||||
async_handler = client # type: ignore
|
||||
|
||||
### TRANSFORMATION (async path) ###
|
||||
if use_embed_content:
|
||||
resolved_files = {}
|
||||
if api_key:
|
||||
resolved_files = await self._async_resolve_file_references(
|
||||
input=input, api_key=api_key, async_handler=async_handler
|
||||
)
|
||||
data = transform_openai_input_gemini_embed_content(
|
||||
input=input,
|
||||
model=model,
|
||||
optional_params=optional_params or {},
|
||||
resolved_files=resolved_files,
|
||||
)
|
||||
else:
|
||||
data = transform_openai_input_gemini_content(
|
||||
input=input, model=model, optional_params=optional_params or {}
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
if logging_obj is not None:
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
"api_base": url,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
response = await async_handler.post(
|
||||
url=url,
|
||||
headers=headers,
|
||||
data=json.dumps(data),
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Error: {response.status_code} {response.text}")
|
||||
|
||||
_json_response = response.json()
|
||||
|
||||
if use_embed_content:
|
||||
return process_embed_content_response(
|
||||
input=input,
|
||||
model_response=model_response,
|
||||
model=model,
|
||||
response_json=_json_response,
|
||||
)
|
||||
else:
|
||||
_predictions = VertexAIBatchEmbeddingsResponseObject(**_json_response) # type: ignore
|
||||
return process_response(
|
||||
model=model,
|
||||
model_response=model_response,
|
||||
_predictions=_predictions,
|
||||
input=input,
|
||||
)
|
||||
@@ -0,0 +1,308 @@
|
||||
"""
|
||||
Transformation logic from OpenAI /v1/embeddings format to Google AI Studio /batchEmbedContents format.
|
||||
|
||||
Why separate file? Make it easy to see how transformation works
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from litellm.types.llms.openai import EmbeddingInput
|
||||
from litellm.types.llms.vertex_ai import (
|
||||
BlobType,
|
||||
ContentType,
|
||||
EmbedContentRequest,
|
||||
FileDataType,
|
||||
PartType,
|
||||
VertexAIBatchEmbeddingsRequestBody,
|
||||
VertexAIBatchEmbeddingsResponseObject,
|
||||
)
|
||||
from litellm.types.utils import Embedding, EmbeddingResponse, Usage
|
||||
from litellm.utils import get_formatted_prompt, token_counter
|
||||
|
||||
SUPPORTED_EMBEDDING_MIME_TYPES = {
|
||||
"image/png",
|
||||
"image/jpeg",
|
||||
"audio/mpeg",
|
||||
"audio/wav",
|
||||
"video/mp4",
|
||||
"video/quicktime",
|
||||
"application/pdf",
|
||||
}
|
||||
|
||||
|
||||
def _is_file_reference(s: str) -> bool:
|
||||
"""Check if string is a Gemini file reference (files/...)."""
|
||||
return isinstance(s, str) and s.startswith("files/")
|
||||
|
||||
|
||||
def _is_gcs_url(s: str) -> bool:
|
||||
"""Check if string is a GCS URL (gs://...)."""
|
||||
return isinstance(s, str) and s.startswith("gs://")
|
||||
|
||||
|
||||
def _infer_mime_type_from_gcs_url(gcs_url: str) -> str:
|
||||
"""
|
||||
Infer MIME type from GCS URL file extension.
|
||||
|
||||
Args:
|
||||
gcs_url: GCS URL like gs://bucket/path/to/file.png
|
||||
|
||||
Returns:
|
||||
str: Inferred MIME type
|
||||
|
||||
Raises:
|
||||
ValueError: If file extension is not supported
|
||||
"""
|
||||
extension_to_mime = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".mp3": "audio/mpeg",
|
||||
".wav": "audio/wav",
|
||||
".mp4": "video/mp4",
|
||||
".mov": "video/quicktime",
|
||||
".pdf": "application/pdf",
|
||||
}
|
||||
|
||||
gcs_url_lower = gcs_url.lower()
|
||||
for ext, mime_type in extension_to_mime.items():
|
||||
if gcs_url_lower.endswith(ext):
|
||||
return mime_type
|
||||
|
||||
raise ValueError(
|
||||
f"Unable to infer MIME type from GCS URL: {gcs_url}. "
|
||||
f"Supported extensions: {', '.join(extension_to_mime.keys())}"
|
||||
)
|
||||
|
||||
|
||||
def _parse_data_url(data_url: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Parse a data URL to extract the media type and base64 data.
|
||||
|
||||
Args:
|
||||
data_url: Data URL in format: data:image/jpeg;base64,/9j/4AAQ...
|
||||
|
||||
Returns:
|
||||
tuple: (media_type, base64_data)
|
||||
media_type: e.g., "image/jpeg", "video/mp4", "audio/mpeg"
|
||||
base64_data: The base64-encoded data without the prefix
|
||||
|
||||
Raises:
|
||||
ValueError: If data URL format is invalid or MIME type is unsupported
|
||||
"""
|
||||
if not data_url.startswith("data:"):
|
||||
raise ValueError(f"Invalid data URL format: {data_url[:50]}...")
|
||||
|
||||
if "," not in data_url:
|
||||
raise ValueError(f"Invalid data URL format (missing comma): {data_url[:50]}...")
|
||||
|
||||
metadata, base64_data = data_url.split(",", 1)
|
||||
|
||||
metadata = metadata[5:]
|
||||
|
||||
if ";" in metadata:
|
||||
media_type = metadata.split(";")[0]
|
||||
else:
|
||||
media_type = metadata
|
||||
|
||||
if media_type not in SUPPORTED_EMBEDDING_MIME_TYPES:
|
||||
raise ValueError(
|
||||
f"Unsupported MIME type for embedding: {media_type}. "
|
||||
f"Supported types: {', '.join(sorted(SUPPORTED_EMBEDDING_MIME_TYPES))}"
|
||||
)
|
||||
|
||||
return media_type, base64_data
|
||||
|
||||
|
||||
def _is_multimodal_input(input: EmbeddingInput) -> bool:
|
||||
"""
|
||||
Check if the input contains multimodal data (data URIs, file references, or GCS URLs).
|
||||
|
||||
Args:
|
||||
input: EmbeddingInput (str or List[str])
|
||||
|
||||
Returns:
|
||||
bool: True if any element is a data URI, file reference, or GCS URL
|
||||
"""
|
||||
if isinstance(input, str):
|
||||
input_list = [input]
|
||||
else:
|
||||
input_list = input
|
||||
|
||||
for element in input_list:
|
||||
if isinstance(element, str):
|
||||
if element.startswith("data:") and ";base64," in element:
|
||||
return True
|
||||
if _is_file_reference(element):
|
||||
return True
|
||||
if _is_gcs_url(element):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def transform_openai_input_gemini_content(
|
||||
input: EmbeddingInput, model: str, optional_params: dict
|
||||
) -> VertexAIBatchEmbeddingsRequestBody:
|
||||
"""
|
||||
The content to embed. Only the parts.text fields will be counted.
|
||||
"""
|
||||
gemini_model_name = "models/{}".format(model)
|
||||
|
||||
gemini_params = optional_params.copy()
|
||||
if "dimensions" in gemini_params:
|
||||
gemini_params["outputDimensionality"] = gemini_params.pop("dimensions")
|
||||
|
||||
requests: List[EmbedContentRequest] = []
|
||||
if isinstance(input, str):
|
||||
request = EmbedContentRequest(
|
||||
model=gemini_model_name,
|
||||
content=ContentType(parts=[PartType(text=input)]),
|
||||
**gemini_params,
|
||||
)
|
||||
requests.append(request)
|
||||
else:
|
||||
for i in input:
|
||||
request = EmbedContentRequest(
|
||||
model=gemini_model_name,
|
||||
content=ContentType(parts=[PartType(text=i)]),
|
||||
**gemini_params,
|
||||
)
|
||||
requests.append(request)
|
||||
|
||||
return VertexAIBatchEmbeddingsRequestBody(requests=requests)
|
||||
|
||||
|
||||
def transform_openai_input_gemini_embed_content(
|
||||
input: EmbeddingInput,
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
resolved_files: Optional[Dict[str, Dict[str, str]]] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Transform OpenAI embedding input to Gemini embedContent format (multimodal).
|
||||
|
||||
Args:
|
||||
input: EmbeddingInput (str or List[str]) with text, data URIs, or file references
|
||||
model: Model name
|
||||
optional_params: Additional parameters (taskType, outputDimensionality, etc.)
|
||||
resolved_files: Dict mapping file names (files/abc) to {mime_type, uri}
|
||||
|
||||
Returns:
|
||||
dict: Gemini embedContent request body with content.parts
|
||||
"""
|
||||
resolved_files = resolved_files or {}
|
||||
|
||||
gemini_params = optional_params.copy()
|
||||
if "dimensions" in gemini_params:
|
||||
gemini_params["outputDimensionality"] = gemini_params.pop("dimensions")
|
||||
|
||||
input_list = [input] if isinstance(input, str) else input
|
||||
parts: List[PartType] = []
|
||||
|
||||
for element in input_list:
|
||||
if not isinstance(element, str):
|
||||
raise ValueError(f"Unsupported input type: {type(element)}")
|
||||
|
||||
if element.startswith("data:") and ";base64," in element:
|
||||
mime_type, base64_data = _parse_data_url(element)
|
||||
blob: BlobType = {"mime_type": mime_type, "data": base64_data}
|
||||
parts.append(PartType(inline_data=blob))
|
||||
elif _is_gcs_url(element):
|
||||
mime_type = _infer_mime_type_from_gcs_url(element)
|
||||
file_data: FileDataType = {
|
||||
"mime_type": mime_type,
|
||||
"file_uri": element,
|
||||
}
|
||||
parts.append(PartType(file_data=file_data))
|
||||
elif _is_file_reference(element):
|
||||
if element not in resolved_files:
|
||||
raise ValueError(f"File reference {element} not resolved")
|
||||
file_info = resolved_files[element]
|
||||
file_data_ref: FileDataType = {
|
||||
"mime_type": file_info["mime_type"],
|
||||
"file_uri": file_info["uri"],
|
||||
}
|
||||
parts.append(PartType(file_data=file_data_ref))
|
||||
else:
|
||||
parts.append(PartType(text=element))
|
||||
|
||||
request_body: dict = {
|
||||
"content": ContentType(parts=parts),
|
||||
**gemini_params,
|
||||
}
|
||||
|
||||
return request_body
|
||||
|
||||
|
||||
def process_embed_content_response(
|
||||
input: EmbeddingInput,
|
||||
model_response: EmbeddingResponse,
|
||||
model: str,
|
||||
response_json: dict,
|
||||
) -> EmbeddingResponse:
|
||||
"""
|
||||
Process Gemini embedContent response (single embedding for multimodal input).
|
||||
|
||||
Args:
|
||||
input: Original input
|
||||
model_response: EmbeddingResponse to populate
|
||||
model: Model name
|
||||
response_json: Raw JSON response from embedContent endpoint
|
||||
|
||||
Returns:
|
||||
EmbeddingResponse with single embedding
|
||||
"""
|
||||
if "embedding" not in response_json:
|
||||
raise ValueError(
|
||||
f"embedContent response missing 'embedding' field: {response_json}"
|
||||
)
|
||||
|
||||
embedding_data = response_json["embedding"]
|
||||
|
||||
openai_embedding = Embedding(
|
||||
embedding=embedding_data["values"],
|
||||
index=0,
|
||||
object="embedding",
|
||||
)
|
||||
|
||||
model_response.data = [openai_embedding]
|
||||
model_response.model = model
|
||||
|
||||
if _is_multimodal_input(input):
|
||||
prompt_tokens = 0
|
||||
else:
|
||||
input_text = get_formatted_prompt(data={"input": input}, call_type="embedding")
|
||||
prompt_tokens = token_counter(model=model, text=input_text)
|
||||
model_response.usage = Usage(
|
||||
prompt_tokens=prompt_tokens, total_tokens=prompt_tokens
|
||||
)
|
||||
|
||||
return model_response
|
||||
|
||||
|
||||
def process_response(
|
||||
input: EmbeddingInput,
|
||||
model_response: EmbeddingResponse,
|
||||
model: str,
|
||||
_predictions: VertexAIBatchEmbeddingsResponseObject,
|
||||
) -> EmbeddingResponse:
|
||||
openai_embeddings: List[Embedding] = []
|
||||
for embedding in _predictions["embeddings"]:
|
||||
openai_embedding = Embedding(
|
||||
embedding=embedding["values"],
|
||||
index=0,
|
||||
object="embedding",
|
||||
)
|
||||
openai_embeddings.append(openai_embedding)
|
||||
|
||||
model_response.data = openai_embeddings
|
||||
model_response.model = model
|
||||
|
||||
input_text = get_formatted_prompt(data={"input": input}, call_type="embedding")
|
||||
prompt_tokens = token_counter(model=model, text=input_text)
|
||||
model_response.usage = Usage(
|
||||
prompt_tokens=prompt_tokens, total_tokens=prompt_tokens
|
||||
)
|
||||
|
||||
return model_response
|
||||
Reference in New Issue
Block a user