from typing import Dict import openai class LLMClient: """ Minimalist client for interacting with Clovis LLM via OpenAI SDK. Attributes: api_key (str): API key for Clovis. base_url (str): Base URL for Clovis LLM gateway. model (str): Model name to use. Defaults to 'ClovisLLM'. """ def __init__(self, api_key: str, base_url: str, model: str = "ClovisLLM") -> None: if not api_key: raise ValueError("API key is required for LLMClient.") if not base_url: raise ValueError("Base URL is required for LLMClient.") self.api_key = api_key self.base_url = base_url self.model = model self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) def generate(self, system_prompt: str, user_prompt: str, context: str) -> Dict[str, object]: """ Generate a response from the LLM given a system prompt, user prompt, and context. Args: system_prompt (str): Instructions for the assistant. user_prompt (str): The user's query. context (str): Concatenated chunks from RAG search. Returns: Dict[str, object]: Contains: - "answer" (str): Text generated by the LLM. - "usage" (int): Total tokens used in the completion. """ # Construct user message with explicit CONTEXT / QUESTION separation user_message_content = f"CONTEXT:\n{context}\n\nQUESTION:\n{user_prompt}" try: response = self.client.chat.completions.create(model=self.model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message_content} ], temperature=0.7, max_tokens=2000, top_p=1.0, n=1, # stream=False, # presence_penalty=0.0, # frequency_penalty=0.0, # stop=None, # logit_bias={}, user="obsidian_rag", ) except Exception as e: # For now, propagate exceptions (C1 minimal) raise e # Extract text and usage try: answer_text = response.choices[0].message.content total_tokens = response.usage.total_tokens except AttributeError: # Fallback if response structure is unexpected answer_text = "" total_tokens = 0 return {"answer": answer_text, "usage": total_tokens}