my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	import logging
#2	import os
#3	from collections.abc import Generator
#4	from typing import Any, Optional
#5
#6	from langchain.schema import BaseMessage as LCBaseMessage
#7
#8	from embedchain.config import BaseLlmConfig
#9	from embedchain.config.llm.base import (
#10	DEFAULT_PROMPT,
#11	DEFAULT_PROMPT_WITH_HISTORY_TEMPLATE,
#12	DEFAULT_PROMPT_WITH_MEM0_MEMORY_TEMPLATE,
#13	DOCS_SITE_PROMPT_TEMPLATE,
#14	)
#15	from embedchain.constants import SQLITE_PATH
#16	from embedchain.core.db.database import init_db, setup_engine
#17	from embedchain.helpers.json_serializable import JSONSerializable
#18	from embedchain.memory.base import ChatHistory
#19	from embedchain.memory.message import ChatMessage
#20
#21	logger = logging.getLogger(__name__)
#22
#23
#24	class BaseLlm(JSONSerializable):
#25	def __init__(self, config: Optional[BaseLlmConfig] = None):
#26	"""Initialize a base LLM class
#27
#28	:param config: LLM configuration option class, defaults to None
#29	:type config: Optional[BaseLlmConfig], optional
#30	"""
#31	if config is None:
#32	self.config = BaseLlmConfig()
#33	else:
#34	self.config = config
#35
#36	# Initialize the metadata db for the app here since llmfactory needs it for initialization of
#37	# the llm memory
#38	setup_engine(database_uri=os.environ.get("EMBEDCHAIN_DB_URI", f"sqlite:///{SQLITE_PATH}"))
#39	init_db()
#40
#41	self.memory = ChatHistory()
#42	self.is_docs_site_instance = False
#43	self.history: Any = None
#44
#45	def get_llm_model_answer(self):
#46	"""
#47	Usually implemented by child class
#48	"""
#49	raise NotImplementedError
#50
#51	def set_history(self, history: Any):
#52	"""
#53	Provide your own history.
#54	Especially interesting for the query method, which does not internally manage conversation history.
#55
#56	:param history: History to set
#57	:type history: Any
#58	"""
#59	self.history = history
#60
#61	def update_history(self, app_id: str, session_id: str = "default"):
#62	"""Update class history attribute with history in memory (for chat method)"""
#63	chat_history = self.memory.get(app_id=app_id, session_id=session_id, num_rounds=10)
#64	self.set_history([str(history) for history in chat_history])
#65
#66	def add_history(
#67	self,
#68	app_id: str,
#69	question: str,
#70	answer: str,
#71	metadata: Optional[dict[str, Any]] = None,
#72	session_id: str = "default",
#73	):
#74	chat_message = ChatMessage()
#75	chat_message.add_user_message(question, metadata=metadata)
#76	chat_message.add_ai_message(answer, metadata=metadata)
#77	self.memory.add(app_id=app_id, chat_message=chat_message, session_id=session_id)
#78	self.update_history(app_id=app_id, session_id=session_id)
#79
#80	def _format_history(self) -> str:
#81	"""Format history to be used in prompt
#82
#83	:return: Formatted history
#84	:rtype: str
#85	"""
#86	return "\n".join(self.history)
#87
#88	def _format_memories(self, memories: list[dict]) -> str:
#89	"""Format memories to be used in prompt
#90
#91	:param memories: Memories to format
#92	:type memories: list[dict]
#93	:return: Formatted memories
#94	:rtype: str
#95	"""
#96	return "\n".join([memory["text"] for memory in memories])
#97
#98	def generate_prompt(self, input_query: str, contexts: list[str], **kwargs: dict[str, Any]) -> str:
#99	"""
#100	Generates a prompt based on the given query and context, ready to be
#101	passed to an LLM
#102
#103	:param input_query: The query to use.
#104	:type input_query: str
#105	:param contexts: List of similar documents to the query used as context.
#106	:type contexts: list[str]
#107	:return: The prompt
#108	:rtype: str
#109	"""
#110	context_string = " \| ".join(contexts)
#111	web_search_result = kwargs.get("web_search_result", "")
#112	memories = kwargs.get("memories", None)
#113	if web_search_result:
#114	context_string = self._append_search_and_context(context_string, web_search_result)
#115
#116	prompt_contains_history = self.config._validate_prompt_history(self.config.prompt)
#117	if prompt_contains_history:
#118	prompt = self.config.prompt.substitute(
#119	context=context_string, query=input_query, history=self._format_history() or "No history"
#120	)
#121	elif self.history and not prompt_contains_history:
#122	# History is present, but not included in the prompt.
#123	# check if it's the default prompt without history
#124	if (
#125	not self.config._validate_prompt_history(self.config.prompt)
#126	and self.config.prompt.template == DEFAULT_PROMPT
#127	):
#128	if memories:
#129	# swap in the template with Mem0 memory template
#130	prompt = DEFAULT_PROMPT_WITH_MEM0_MEMORY_TEMPLATE.substitute(
#131	context=context_string,
#132	query=input_query,
#133	history=self._format_history(),
#134	memories=self._format_memories(memories),
#135	)
#136	else:
#137	# swap in the template with history
#138	prompt = DEFAULT_PROMPT_WITH_HISTORY_TEMPLATE.substitute(
#139	context=context_string, query=input_query, history=self._format_history()
#140	)
#141	else:
#142	# If we can't swap in the default, we still proceed but tell users that the history is ignored.
#143	logger.warning(
#144	"Your bot contains a history, but prompt does not include `$history` key. History is ignored."
#145	)
#146	prompt = self.config.prompt.substitute(context=context_string, query=input_query)
#147	else:
#148	# basic use case, no history.
#149	prompt = self.config.prompt.substitute(context=context_string, query=input_query)
#150	return prompt
#151
#152	@staticmethod
#153	def _append_search_and_context(context: str, web_search_result: str) -> str:
#154	"""Append web search context to existing context
#155
#156	:param context: Existing context
#157	:type context: str
#158	:param web_search_result: Web search result
#159	:type web_search_result: str
#160	:return: Concatenated web search result
#161	:rtype: str
#162	"""
#163	return f"{context}\nWeb Search Result: {web_search_result}"
#164
#165	def get_answer_from_llm(self, prompt: str):
#166	"""
#167	Gets an answer based on the given query and context by passing it
#168	to an LLM.
#169
#170	:param prompt: Gets an answer based on the given query and context by passing it to an LLM.
#171	:type prompt: str
#172	:return: The answer.
#173	:rtype: _type_
#174	"""
#175	return self.get_llm_model_answer(prompt)
#176
#177	@staticmethod
#178	def access_search_and_get_results(input_query: str):
#179	"""
#180	Search the internet for additional context
#181
#182	:param input_query: search query
#183	:type input_query: str
#184	:return: Search results
#185	:rtype: Unknown
#186	"""
#187	try:
#188	from langchain.tools import DuckDuckGoSearchRun
#189	except ImportError:
#190	raise ImportError(
#191	"Searching requires extra dependencies. Install with `pip install duckduckgo-search==6.1.5`"
#192	) from None
#193	search = DuckDuckGoSearchRun()
#194	logger.info(f"Access search to get answers for {input_query}")
#195	return search.run(input_query)
#196
#197	@staticmethod
#198	def _stream_response(answer: Any, token_info: Optional[dict[str, Any]] = None) -> Generator[Any, Any, None]:
#199	"""Generator to be used as streaming response
#200
#201	:param answer: Answer chunk from llm
#202	:type answer: Any
#203	:yield: Answer chunk from llm
#204	:rtype: Generator[Any, Any, None]
#205	"""
#206	streamed_answer = ""
#207	for chunk in answer:
#208	streamed_answer = streamed_answer + chunk
#209	yield chunk
#210	logger.info(f"Answer: {streamed_answer}")
#211	if token_info:
#212	logger.info(f"Token Info: {token_info}")
#213
#214	def query(self, input_query: str, contexts: list[str], config: BaseLlmConfig = None, dry_run=False, memories=None):
#215	"""
#216	Queries the vector database based on the given input query.
#217	Gets relevant doc based on the query and then passes it to an
#218	LLM as context to get the answer.
#219
#220	:param input_query: The query to use.
#221	:type input_query: str
#222	:param contexts: Embeddings retrieved from the database to be used as context.
#223	:type contexts: list[str]
#224	:param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
#225	To persistently use a config, declare it during app init., defaults to None
#226	:type config: Optional[BaseLlmConfig], optional
#227	:param dry_run: A dry run does everything except send the resulting prompt to
#228	the LLM. The purpose is to test the prompt, not the response., defaults to False
#229	:type dry_run: bool, optional
#230	:return: The answer to the query or the dry run result
#231	:rtype: str
#232	"""
#233	try:
#234	if config:
#235	# A config instance passed to this method will only be applied temporarily, for one call.
#236	# So we will save the previous config and restore it at the end of the execution.
#237	# For this we use the serializer.
#238	prev_config = self.config.serialize()
#239	self.config = config
#240
#241	if config is not None and config.query_type == "Images":
#242	return contexts
#243
#244	if self.is_docs_site_instance:
#245	self.config.prompt = DOCS_SITE_PROMPT_TEMPLATE
#246	self.config.number_documents = 5
#247	k = {}
#248	if self.config.online:
#249	k["web_search_result"] = self.access_search_and_get_results(input_query)
#250	k["memories"] = memories
#251	prompt = self.generate_prompt(input_query, contexts, **k)
#252	logger.info(f"Prompt: {prompt}")
#253	if dry_run:
#254	return prompt
#255
#256	if self.config.token_usage:
#257	answer, token_info = self.get_answer_from_llm(prompt)
#258	else:
#259	answer = self.get_answer_from_llm(prompt)
#260	if isinstance(answer, str):
#261	logger.info(f"Answer: {answer}")
#262	if self.config.token_usage:
#263	return answer, token_info
#264	return answer
#265	else:
#266	if self.config.token_usage:
#267	return self._stream_response(answer, token_info)
#268	return self._stream_response(answer)
#269	finally:
#270	if config:
#271	# Restore previous config
#272	self.config: BaseLlmConfig = BaseLlmConfig.deserialize(prev_config)
#273
#274	def chat(
#275	self, input_query: str, contexts: list[str], config: BaseLlmConfig = None, dry_run=False, session_id: str = None
#276	):
#277	"""
#278	Queries the vector database on the given input query.
#279	Gets relevant doc based on the query and then passes it to an
#280	LLM as context to get the answer.
#281
#282	Maintains the whole conversation in memory.
#283
#284	:param input_query: The query to use.
#285	:type input_query: str
#286	:param contexts: Embeddings retrieved from the database to be used as context.
#287	:type contexts: list[str]
#288	:param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call.
#289	To persistently use a config, declare it during app init., defaults to None
#290	:type config: Optional[BaseLlmConfig], optional
#291	:param dry_run: A dry run does everything except send the resulting prompt to
#292	the LLM. The purpose is to test the prompt, not the response., defaults to False
#293	:type dry_run: bool, optional
#294	:param session_id: Session ID to use for the conversation, defaults to None
#295	:type session_id: str, optional
#296	:return: The answer to the query or the dry run result
#297	:rtype: str
#298	"""
#299	try:
#300	if config:
#301	# A config instance passed to this method will only be applied temporarily, for one call.
#302	# So we will save the previous config and restore it at the end of the execution.
#303	# For this we use the serializer.
#304	prev_config = self.config.serialize()
#305	self.config = config
#306
#307	if self.is_docs_site_instance:
#308	self.config.prompt = DOCS_SITE_PROMPT_TEMPLATE
#309	self.config.number_documents = 5
#310	k = {}
#311	if self.config.online:
#312	k["web_search_result"] = self.access_search_and_get_results(input_query)
#313
#314	prompt = self.generate_prompt(input_query, contexts, **k)
#315	logger.info(f"Prompt: {prompt}")
#316
#317	if dry_run:
#318	return prompt
#319
#320	answer, token_info = self.get_answer_from_llm(prompt)
#321	if isinstance(answer, str):
#322	logger.info(f"Answer: {answer}")
#323	return answer, token_info
#324	else:
#325	# this is a streamed response and needs to be handled differently.
#326	return self._stream_response(answer, token_info)
#327	finally:
#328	if config:
#329	# Restore previous config
#330	self.config: BaseLlmConfig = BaseLlmConfig.deserialize(prev_config)
#331
#332	@staticmethod
#333	def _get_messages(prompt: str, system_prompt: Optional[str] = None) -> list[LCBaseMessage]:
#334	"""
#335	Construct a list of langchain messages
#336
#337	:param prompt: User prompt
#338	:type prompt: str
#339	:param system_prompt: System prompt, defaults to None
#340	:type system_prompt: Optional[str], optional
#341	:return: List of messages
#342	:rtype: list[BaseMessage]
#343	"""
#344	from langchain.schema import HumanMessage, SystemMessage
#345
#346	messages = []
#347	if system_prompt:
#348	messages.append(SystemMessage(content=system_prompt))
#349	messages.append(HumanMessage(content=prompt))
#350	return messages
#351

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public