75 lines
3.0 KiB
Python
75 lines
3.0 KiB
Python
from typing import Dict
|
|
|
|
import openai
|
|
|
|
|
|
class LLMClient:
|
|
"""
|
|
Minimalist client for interacting with Clovis LLM via OpenAI SDK.
|
|
|
|
Attributes:
|
|
api_key (str): API key for Clovis.
|
|
base_url (str): Base URL for Clovis LLM gateway.
|
|
model (str): Model name to use. Defaults to 'ClovisLLM'.
|
|
"""
|
|
|
|
def __init__(self, api_key: str, base_url: str, model: str = "ClovisLLM") -> None:
|
|
if not api_key:
|
|
raise ValueError("API key is required for LLMClient.")
|
|
if not base_url:
|
|
raise ValueError("Base URL is required for LLMClient.")
|
|
|
|
self.api_key = api_key
|
|
self.base_url = base_url
|
|
self.model = model
|
|
self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
|
|
|
|
def generate(self, system_prompt: str, user_prompt: str, context: str) -> Dict[str, object]:
|
|
"""
|
|
Generate a response from the LLM given a system prompt, user prompt, and context.
|
|
|
|
Args:
|
|
system_prompt (str): Instructions for the assistant.
|
|
user_prompt (str): The user's query.
|
|
context (str): Concatenated chunks from RAG search.
|
|
|
|
Returns:
|
|
Dict[str, object]: Contains:
|
|
- "answer" (str): Text generated by the LLM.
|
|
- "usage" (int): Total tokens used in the completion.
|
|
"""
|
|
# Construct user message with explicit CONTEXT / QUESTION separation
|
|
user_message_content = f"CONTEXT:\n{context}\n\nQUESTION:\n{user_prompt}"
|
|
|
|
try:
|
|
response = self.client.chat.completions.create(model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": user_message_content}
|
|
],
|
|
temperature=0.7,
|
|
max_tokens=2000,
|
|
top_p=1.0,
|
|
n=1,
|
|
# stream=False,
|
|
# presence_penalty=0.0,
|
|
# frequency_penalty=0.0,
|
|
# stop=None,
|
|
# logit_bias={},
|
|
user="obsidian_rag",
|
|
)
|
|
except Exception as e:
|
|
# For now, propagate exceptions (C1 minimal)
|
|
raise e
|
|
|
|
# Extract text and usage
|
|
try:
|
|
answer_text = response.choices[0].message.content
|
|
total_tokens = response.usage.total_tokens
|
|
except AttributeError:
|
|
# Fallback if response structure is unexpected
|
|
answer_text = ""
|
|
total_tokens = 0
|
|
|
|
return {"answer": answer_text, "usage": total_tokens}
|