my-project-public

#1

import os

#2

from collections.abc import Iterable

#3

from typing import Any, Optional, Union

#4

#5

from langchain.callbacks.manager import CallbackManager

#6

from langchain.callbacks.stdout import StdOutCallbackHandler

#7

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

#8

#9

try:

#10

from langchain_nvidia_ai_endpoints import ChatNVIDIA

#11

except ImportError:

#12

raise ImportError(

#13

"NVIDIA AI endpoints requires extra dependencies. Install with `pip install langchain-nvidia-ai-endpoints`"

#14

) from None

#15

#16

from embedchain.config import BaseLlmConfig

#17

from embedchain.helpers.json_serializable import register_deserializable

#18

from embedchain.llm.base import BaseLlm

#19

#20

#21

@register_deserializable

#22

class NvidiaLlm(BaseLlm):

#23

def __init__(self, config: Optional[BaseLlmConfig] = None):

#24

super().__init__(config=config)

#25

if not self.config.api_key and "NVIDIA_API_KEY" not in os.environ:

#26

raise ValueError("Please set the NVIDIA_API_KEY environment variable or pass it in the config.")

#27

#28

def get_llm_model_answer(self, prompt) -> tuple[str, Optional[dict[str, Any]]]:

#29

if self.config.token_usage:

#30

response, token_info = self._get_answer(prompt, self.config)

#31

model_name = "nvidia/" + self.config.model

#32

if model_name not in self.config.model_pricing_map:

#33

raise ValueError(

#34

f"Model {model_name} not found in `model_prices_and_context_window.json`. \

#35

You can disable token usage by setting `token_usage` to False."

#36

)

#37

total_cost = (

#38

self.config.model_pricing_map[model_name]["input_cost_per_token"] * token_info["input_tokens"]

#39

) + self.config.model_pricing_map[model_name]["output_cost_per_token"] * token_info["output_tokens"]

#40

response_token_info = {

#41

"prompt_tokens": token_info["input_tokens"],

#42

"completion_tokens": token_info["output_tokens"],

#43

"total_tokens": token_info["input_tokens"] + token_info["output_tokens"],

#44

"total_cost": round(total_cost, 10),

#45

"cost_currency": "USD",

#46

}

#47

return response, response_token_info

#48

return self._get_answer(prompt, self.config)

#49

#50

@staticmethod

#51

def _get_answer(prompt: str, config: BaseLlmConfig) -> Union[str, Iterable]:

#52

callback_manager = [StreamingStdOutCallbackHandler()] if config.stream else [StdOutCallbackHandler()]

#53

model_kwargs = config.model_kwargs or {}

#54

labels = model_kwargs.get("labels", None)

#55

params = {"model": config.model, "nvidia_api_key": config.api_key or os.getenv("NVIDIA_API_KEY")}

#56

if config.system_prompt:

#57

params["system_prompt"] = config.system_prompt

#58

if config.temperature:

#59

params["temperature"] = config.temperature

#60

if config.top_p:

#61

params["top_p"] = config.top_p

#62

if labels:

#63

params["labels"] = labels

#64

llm = ChatNVIDIA(**params, callback_manager=CallbackManager(callback_manager))

#65

chat_response = llm.invoke(prompt) if labels is None else llm.invoke(prompt, labels=labels)

#66

if config.token_usage:

#67

return chat_response.content, chat_response.response_metadata["token_usage"]

#68

return chat_response.content

#69

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public