MyObsidianAI/obsidian_rag/llm_client.py

from typing import Dict

import openai


class LLMClient:
  """
  Minimalist client for interacting with Clovis LLM via OpenAI SDK.

  Attributes:
      api_key (str): API key for Clovis.
      base_url (str): Base URL for Clovis LLM gateway.
      model (str): Model name to use. Defaults to 'ClovisLLM'.
  """

  def __init__(self, api_key: str, base_url: str, model: str = "ClovisLLM") -> None:
    if not api_key:
      raise ValueError("API key is required for LLMClient.")
    if not base_url:
      raise ValueError("Base URL is required for LLMClient.")

    self.api_key = api_key
    self.base_url = base_url
    self.model = model
    self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)

  def generate(self, system_prompt: str, user_prompt: str, context: str) -> Dict[str, object]:
    """
    Generate a response from the LLM given a system prompt, user prompt, and context.

    Args:
        system_prompt (str): Instructions for the assistant.
        user_prompt (str): The user's query.
        context (str): Concatenated chunks from RAG search.

    Returns:
        Dict[str, object]: Contains:
            - "answer" (str): Text generated by the LLM.
            - "usage" (int): Total tokens used in the completion.
    """
    # Construct user message with explicit CONTEXT / QUESTION separation
    user_message_content = f"CONTEXT:\n{context}\n\nQUESTION:\n{user_prompt}"

    try:
      response = self.client.chat.completions.create(model=self.model,
                                                     messages=[
                                                         {"role": "system", "content": system_prompt},
                                                         {"role": "user", "content": user_message_content}
                                                     ],
                                                     temperature=0.7,
                                                     max_tokens=2000,
                                                     top_p=1.0,
                                                     n=1,
                                                     # stream=False,
                                                     # presence_penalty=0.0,
                                                     # frequency_penalty=0.0,
                                                     # stop=None,
                                                     # logit_bias={},
                                                     user="obsidian_rag",
                                                     )
    except Exception as e:
      # For now, propagate exceptions (C1 minimal)
      raise e

    # Extract text and usage
    try:
      answer_text = response.choices[0].message.content
      total_tokens = response.usage.total_tokens
    except AttributeError:
      # Fallback if response structure is unexpected
      answer_text = ""
      total_tokens = 0

    return {"answer": answer_text, "usage": total_tokens}