Initial commit
This commit is contained in:
74
obsidian_rag/llm_client.py
Normal file
74
obsidian_rag/llm_client.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from typing import Dict
|
||||
|
||||
import openai
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""
|
||||
Minimalist client for interacting with Clovis LLM via OpenAI SDK.
|
||||
|
||||
Attributes:
|
||||
api_key (str): API key for Clovis.
|
||||
base_url (str): Base URL for Clovis LLM gateway.
|
||||
model (str): Model name to use. Defaults to 'ClovisLLM'.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str, base_url: str, model: str = "ClovisLLM") -> None:
|
||||
if not api_key:
|
||||
raise ValueError("API key is required for LLMClient.")
|
||||
if not base_url:
|
||||
raise ValueError("Base URL is required for LLMClient.")
|
||||
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
self.model = model
|
||||
self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
|
||||
|
||||
def generate(self, system_prompt: str, user_prompt: str, context: str) -> Dict[str, object]:
|
||||
"""
|
||||
Generate a response from the LLM given a system prompt, user prompt, and context.
|
||||
|
||||
Args:
|
||||
system_prompt (str): Instructions for the assistant.
|
||||
user_prompt (str): The user's query.
|
||||
context (str): Concatenated chunks from RAG search.
|
||||
|
||||
Returns:
|
||||
Dict[str, object]: Contains:
|
||||
- "answer" (str): Text generated by the LLM.
|
||||
- "usage" (int): Total tokens used in the completion.
|
||||
"""
|
||||
# Construct user message with explicit CONTEXT / QUESTION separation
|
||||
user_message_content = f"CONTEXT:\n{context}\n\nQUESTION:\n{user_prompt}"
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_message_content}
|
||||
],
|
||||
temperature=0.7,
|
||||
max_tokens=2000,
|
||||
top_p=1.0,
|
||||
n=1,
|
||||
# stream=False,
|
||||
# presence_penalty=0.0,
|
||||
# frequency_penalty=0.0,
|
||||
# stop=None,
|
||||
# logit_bias={},
|
||||
user="obsidian_rag",
|
||||
)
|
||||
except Exception as e:
|
||||
# For now, propagate exceptions (C1 minimal)
|
||||
raise e
|
||||
|
||||
# Extract text and usage
|
||||
try:
|
||||
answer_text = response.choices[0].message.content
|
||||
total_tokens = response.usage.total_tokens
|
||||
except AttributeError:
|
||||
# Fallback if response structure is unexpected
|
||||
answer_text = ""
|
||||
total_tokens = 0
|
||||
|
||||
return {"answer": answer_text, "usage": total_tokens}
|
||||
Reference in New Issue
Block a user