Skip to content

LLMs

Nirvana provides a unified interface for LLMs.

LLMArguments

nirvana.executors.llm_backbone.LLMArguments

Bases: BaseModel

Attributes

max_tokens: int = Field(default=512, ge=1, le=16384, description='The maximum number of tokens to generate.') class-attribute instance-attribute
temperature: float = Field(default=0.1, ge=0.0, le=1.0, description='The sampling temperature.') class-attribute instance-attribute
max_timeouts: int = Field(default=3, ge=1, le=10, description='The maximum number of timeouts.') class-attribute instance-attribute

LLMClient

nirvana.executors.llm_backbone.LLMClient

Attributes

default_model: str | None = None class-attribute instance-attribute
client: AsyncOpenAI = None class-attribute instance-attribute
config: LLMArguments = LLMArguments() class-attribute instance-attribute

Functions

configure(model_name: str, api_key: str | Path | None = None, base_url: str | None = None, **kwargs) classmethod

Configure the shared LLM client.

The provider (OpenAI / DeepSeek / Qwen) is inferred from model_name, and appropriate defaults for base_url and api_key are applied. Users can still override both api_key and base_url explicitly.

Source code in nirvana/executors/llm_backbone.py
@classmethod
def configure(
    cls,
    model_name: str,
    api_key: str | Path | None = None,
    base_url: str | None = None,
    **kwargs,
):
    """
    Configure the shared LLM client.

    The provider (OpenAI / DeepSeek / Qwen) is inferred from ``model_name``,
    and appropriate defaults for ``base_url`` and ``api_key`` are applied.
    Users can still override both ``api_key`` and ``base_url`` explicitly.
    """
    cls.default_model = model_name
    api_key, base_url = _get_openai_compatible_provider_info(
        model_name=model_name,
        api_key=api_key,
        base_url=base_url,
    )
    cls.client = _create_client(api_key=api_key, base_url=base_url, **kwargs)
    return cls()
create_embedding(text: list[str] | str, embed_model: str = 'text-embedding-3-large') async
Source code in nirvana/executors/llm_backbone.py
async def create_embedding(self, text: list[str] | str, embed_model: str = "text-embedding-3-large"):
    api_key, base_url = _get_openai_compatible_provider_info(model_name=embed_model)
    api_key = self.client.api_key if api_key == "" else api_key
    self.client = _create_client(api_key=api_key, base_url=base_url)
    response = await self.client.embeddings.create(
        input=text, model=embed_model
    )
    cost = (response.usage.total_tokens / 1000) * MODEL_PRICING[embed_model]["Input"]
    return np.array([data.embedding for data in response.data]).squeeze(), cost
__call__(messages: str | list[dict[str, str]], parse_tags: bool = False, parse_code: bool = False, **kwargs) -> dict[str, Any] async
Source code in nirvana/executors/llm_backbone.py
async def __call__(self,
    messages: str | list[dict[str, str]],
    parse_tags: bool = False,
    parse_code: bool = False,
    **kwargs,
) -> dict[str, Any]:
    model_name = kwargs.pop("model", None)
    if model_name is not None:
        api_key, base_url = _get_openai_compatible_provider_info(model_name=model_name)
        api_key = self.client.api_key if api_key == "" else api_key
        self.client = _create_client(api_key=api_key, base_url=base_url)
    else:
        model_name = self.default_model
    timeout = 0
    success = False
    while not success and timeout < self.config.max_timeouts:
        timeout += 1
        try:
            response = await self.client.responses.create(
                model=model_name,
                input=messages,
                max_output_tokens=self.config.max_tokens,
                temperature=self.config.temperature,
            )
            llm_output = response.output_text
            token_cost = self._compute_usage(response)
            success = True
        except Exception as e:
            logger.error(f"An error occurs when creating a response: {e}")

    outputs = dict()
    outputs["raw_output"] = llm_output
    if parse_tags:
        tags: list[str] = kwargs["tags"]
        for tag in tags:
            outputs[tag] = self._extract_xml(llm_output, tag)
    elif parse_code:
        code = self._extract_code(llm_output, lang=kwargs["lang"])
        outputs["output"] = code
    else:
        outputs["output"] = llm_output
    outputs["cost"] = token_cost
    return outputs