repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import logging |
| #2 | import os |
| #3 | from collections.abc import Generator |
| #4 | from typing import Any, Optional |
| #5 | |
| #6 | from langchain.schema import BaseMessage as LCBaseMessage |
| #7 | |
| #8 | from embedchain.config import BaseLlmConfig |
| #9 | from embedchain.config.llm.base import ( |
| #10 | DEFAULT_PROMPT, |
| #11 | DEFAULT_PROMPT_WITH_HISTORY_TEMPLATE, |
| #12 | DEFAULT_PROMPT_WITH_MEM0_MEMORY_TEMPLATE, |
| #13 | DOCS_SITE_PROMPT_TEMPLATE, |
| #14 | ) |
| #15 | from embedchain.constants import SQLITE_PATH |
| #16 | from embedchain.core.db.database import init_db, setup_engine |
| #17 | from embedchain.helpers.json_serializable import JSONSerializable |
| #18 | from embedchain.memory.base import ChatHistory |
| #19 | from embedchain.memory.message import ChatMessage |
| #20 | |
| #21 | logger = logging.getLogger(__name__) |
| #22 | |
| #23 | |
| #24 | class BaseLlm(JSONSerializable): |
| #25 | def __init__(self, config: Optional[BaseLlmConfig] = None): |
| #26 | """Initialize a base LLM class |
| #27 | |
| #28 | :param config: LLM configuration option class, defaults to None |
| #29 | :type config: Optional[BaseLlmConfig], optional |
| #30 | """ |
| #31 | if config is None: |
| #32 | self.config = BaseLlmConfig() |
| #33 | else: |
| #34 | self.config = config |
| #35 | |
| #36 | # Initialize the metadata db for the app here since llmfactory needs it for initialization of |
| #37 | # the llm memory |
| #38 | setup_engine(database_uri=os.environ.get("EMBEDCHAIN_DB_URI", f"sqlite:///{SQLITE_PATH}")) |
| #39 | init_db() |
| #40 | |
| #41 | self.memory = ChatHistory() |
| #42 | self.is_docs_site_instance = False |
| #43 | self.history: Any = None |
| #44 | |
| #45 | def get_llm_model_answer(self): |
| #46 | """ |
| #47 | Usually implemented by child class |
| #48 | """ |
| #49 | raise NotImplementedError |
| #50 | |
| #51 | def set_history(self, history: Any): |
| #52 | """ |
| #53 | Provide your own history. |
| #54 | Especially interesting for the query method, which does not internally manage conversation history. |
| #55 | |
| #56 | :param history: History to set |
| #57 | :type history: Any |
| #58 | """ |
| #59 | self.history = history |
| #60 | |
| #61 | def update_history(self, app_id: str, session_id: str = "default"): |
| #62 | """Update class history attribute with history in memory (for chat method)""" |
| #63 | chat_history = self.memory.get(app_id=app_id, session_id=session_id, num_rounds=10) |
| #64 | self.set_history([str(history) for history in chat_history]) |
| #65 | |
| #66 | def add_history( |
| #67 | self, |
| #68 | app_id: str, |
| #69 | question: str, |
| #70 | answer: str, |
| #71 | metadata: Optional[dict[str, Any]] = None, |
| #72 | session_id: str = "default", |
| #73 | ): |
| #74 | chat_message = ChatMessage() |
| #75 | chat_message.add_user_message(question, metadata=metadata) |
| #76 | chat_message.add_ai_message(answer, metadata=metadata) |
| #77 | self.memory.add(app_id=app_id, chat_message=chat_message, session_id=session_id) |
| #78 | self.update_history(app_id=app_id, session_id=session_id) |
| #79 | |
| #80 | def _format_history(self) -> str: |
| #81 | """Format history to be used in prompt |
| #82 | |
| #83 | :return: Formatted history |
| #84 | :rtype: str |
| #85 | """ |
| #86 | return "\n".join(self.history) |
| #87 | |
| #88 | def _format_memories(self, memories: list[dict]) -> str: |
| #89 | """Format memories to be used in prompt |
| #90 | |
| #91 | :param memories: Memories to format |
| #92 | :type memories: list[dict] |
| #93 | :return: Formatted memories |
| #94 | :rtype: str |
| #95 | """ |
| #96 | return "\n".join([memory["text"] for memory in memories]) |
| #97 | |
| #98 | def generate_prompt(self, input_query: str, contexts: list[str], **kwargs: dict[str, Any]) -> str: |
| #99 | """ |
| #100 | Generates a prompt based on the given query and context, ready to be |
| #101 | passed to an LLM |
| #102 | |
| #103 | :param input_query: The query to use. |
| #104 | :type input_query: str |
| #105 | :param contexts: List of similar documents to the query used as context. |
| #106 | :type contexts: list[str] |
| #107 | :return: The prompt |
| #108 | :rtype: str |
| #109 | """ |
| #110 | context_string = " | ".join(contexts) |
| #111 | web_search_result = kwargs.get("web_search_result", "") |
| #112 | memories = kwargs.get("memories", None) |
| #113 | if web_search_result: |
| #114 | context_string = self._append_search_and_context(context_string, web_search_result) |
| #115 | |
| #116 | prompt_contains_history = self.config._validate_prompt_history(self.config.prompt) |
| #117 | if prompt_contains_history: |
| #118 | prompt = self.config.prompt.substitute( |
| #119 | context=context_string, query=input_query, history=self._format_history() or "No history" |
| #120 | ) |
| #121 | elif self.history and not prompt_contains_history: |
| #122 | # History is present, but not included in the prompt. |
| #123 | # check if it's the default prompt without history |
| #124 | if ( |
| #125 | not self.config._validate_prompt_history(self.config.prompt) |
| #126 | and self.config.prompt.template == DEFAULT_PROMPT |
| #127 | ): |
| #128 | if memories: |
| #129 | # swap in the template with Mem0 memory template |
| #130 | prompt = DEFAULT_PROMPT_WITH_MEM0_MEMORY_TEMPLATE.substitute( |
| #131 | context=context_string, |
| #132 | query=input_query, |
| #133 | history=self._format_history(), |
| #134 | memories=self._format_memories(memories), |
| #135 | ) |
| #136 | else: |
| #137 | # swap in the template with history |
| #138 | prompt = DEFAULT_PROMPT_WITH_HISTORY_TEMPLATE.substitute( |
| #139 | context=context_string, query=input_query, history=self._format_history() |
| #140 | ) |
| #141 | else: |
| #142 | # If we can't swap in the default, we still proceed but tell users that the history is ignored. |
| #143 | logger.warning( |
| #144 | "Your bot contains a history, but prompt does not include `$history` key. History is ignored." |
| #145 | ) |
| #146 | prompt = self.config.prompt.substitute(context=context_string, query=input_query) |
| #147 | else: |
| #148 | # basic use case, no history. |
| #149 | prompt = self.config.prompt.substitute(context=context_string, query=input_query) |
| #150 | return prompt |
| #151 | |
| #152 | @staticmethod |
| #153 | def _append_search_and_context(context: str, web_search_result: str) -> str: |
| #154 | """Append web search context to existing context |
| #155 | |
| #156 | :param context: Existing context |
| #157 | :type context: str |
| #158 | :param web_search_result: Web search result |
| #159 | :type web_search_result: str |
| #160 | :return: Concatenated web search result |
| #161 | :rtype: str |
| #162 | """ |
| #163 | return f"{context}\nWeb Search Result: {web_search_result}" |
| #164 | |
| #165 | def get_answer_from_llm(self, prompt: str): |
| #166 | """ |
| #167 | Gets an answer based on the given query and context by passing it |
| #168 | to an LLM. |
| #169 | |
| #170 | :param prompt: Gets an answer based on the given query and context by passing it to an LLM. |
| #171 | :type prompt: str |
| #172 | :return: The answer. |
| #173 | :rtype: _type_ |
| #174 | """ |
| #175 | return self.get_llm_model_answer(prompt) |
| #176 | |
| #177 | @staticmethod |
| #178 | def access_search_and_get_results(input_query: str): |
| #179 | """ |
| #180 | Search the internet for additional context |
| #181 | |
| #182 | :param input_query: search query |
| #183 | :type input_query: str |
| #184 | :return: Search results |
| #185 | :rtype: Unknown |
| #186 | """ |
| #187 | try: |
| #188 | from langchain.tools import DuckDuckGoSearchRun |
| #189 | except ImportError: |
| #190 | raise ImportError( |
| #191 | "Searching requires extra dependencies. Install with `pip install duckduckgo-search==6.1.5`" |
| #192 | ) from None |
| #193 | search = DuckDuckGoSearchRun() |
| #194 | logger.info(f"Access search to get answers for {input_query}") |
| #195 | return search.run(input_query) |
| #196 | |
| #197 | @staticmethod |
| #198 | def _stream_response(answer: Any, token_info: Optional[dict[str, Any]] = None) -> Generator[Any, Any, None]: |
| #199 | """Generator to be used as streaming response |
| #200 | |
| #201 | :param answer: Answer chunk from llm |
| #202 | :type answer: Any |
| #203 | :yield: Answer chunk from llm |
| #204 | :rtype: Generator[Any, Any, None] |
| #205 | """ |
| #206 | streamed_answer = "" |
| #207 | for chunk in answer: |
| #208 | streamed_answer = streamed_answer + chunk |
| #209 | yield chunk |
| #210 | logger.info(f"Answer: {streamed_answer}") |
| #211 | if token_info: |
| #212 | logger.info(f"Token Info: {token_info}") |
| #213 | |
| #214 | def query(self, input_query: str, contexts: list[str], config: BaseLlmConfig = None, dry_run=False, memories=None): |
| #215 | """ |
| #216 | Queries the vector database based on the given input query. |
| #217 | Gets relevant doc based on the query and then passes it to an |
| #218 | LLM as context to get the answer. |
| #219 | |
| #220 | :param input_query: The query to use. |
| #221 | :type input_query: str |
| #222 | :param contexts: Embeddings retrieved from the database to be used as context. |
| #223 | :type contexts: list[str] |
| #224 | :param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call. |
| #225 | To persistently use a config, declare it during app init., defaults to None |
| #226 | :type config: Optional[BaseLlmConfig], optional |
| #227 | :param dry_run: A dry run does everything except send the resulting prompt to |
| #228 | the LLM. The purpose is to test the prompt, not the response., defaults to False |
| #229 | :type dry_run: bool, optional |
| #230 | :return: The answer to the query or the dry run result |
| #231 | :rtype: str |
| #232 | """ |
| #233 | try: |
| #234 | if config: |
| #235 | # A config instance passed to this method will only be applied temporarily, for one call. |
| #236 | # So we will save the previous config and restore it at the end of the execution. |
| #237 | # For this we use the serializer. |
| #238 | prev_config = self.config.serialize() |
| #239 | self.config = config |
| #240 | |
| #241 | if config is not None and config.query_type == "Images": |
| #242 | return contexts |
| #243 | |
| #244 | if self.is_docs_site_instance: |
| #245 | self.config.prompt = DOCS_SITE_PROMPT_TEMPLATE |
| #246 | self.config.number_documents = 5 |
| #247 | k = {} |
| #248 | if self.config.online: |
| #249 | k["web_search_result"] = self.access_search_and_get_results(input_query) |
| #250 | k["memories"] = memories |
| #251 | prompt = self.generate_prompt(input_query, contexts, **k) |
| #252 | logger.info(f"Prompt: {prompt}") |
| #253 | if dry_run: |
| #254 | return prompt |
| #255 | |
| #256 | if self.config.token_usage: |
| #257 | answer, token_info = self.get_answer_from_llm(prompt) |
| #258 | else: |
| #259 | answer = self.get_answer_from_llm(prompt) |
| #260 | if isinstance(answer, str): |
| #261 | logger.info(f"Answer: {answer}") |
| #262 | if self.config.token_usage: |
| #263 | return answer, token_info |
| #264 | return answer |
| #265 | else: |
| #266 | if self.config.token_usage: |
| #267 | return self._stream_response(answer, token_info) |
| #268 | return self._stream_response(answer) |
| #269 | finally: |
| #270 | if config: |
| #271 | # Restore previous config |
| #272 | self.config: BaseLlmConfig = BaseLlmConfig.deserialize(prev_config) |
| #273 | |
| #274 | def chat( |
| #275 | self, input_query: str, contexts: list[str], config: BaseLlmConfig = None, dry_run=False, session_id: str = None |
| #276 | ): |
| #277 | """ |
| #278 | Queries the vector database on the given input query. |
| #279 | Gets relevant doc based on the query and then passes it to an |
| #280 | LLM as context to get the answer. |
| #281 | |
| #282 | Maintains the whole conversation in memory. |
| #283 | |
| #284 | :param input_query: The query to use. |
| #285 | :type input_query: str |
| #286 | :param contexts: Embeddings retrieved from the database to be used as context. |
| #287 | :type contexts: list[str] |
| #288 | :param config: The `BaseLlmConfig` instance to use as configuration options. This is used for one method call. |
| #289 | To persistently use a config, declare it during app init., defaults to None |
| #290 | :type config: Optional[BaseLlmConfig], optional |
| #291 | :param dry_run: A dry run does everything except send the resulting prompt to |
| #292 | the LLM. The purpose is to test the prompt, not the response., defaults to False |
| #293 | :type dry_run: bool, optional |
| #294 | :param session_id: Session ID to use for the conversation, defaults to None |
| #295 | :type session_id: str, optional |
| #296 | :return: The answer to the query or the dry run result |
| #297 | :rtype: str |
| #298 | """ |
| #299 | try: |
| #300 | if config: |
| #301 | # A config instance passed to this method will only be applied temporarily, for one call. |
| #302 | # So we will save the previous config and restore it at the end of the execution. |
| #303 | # For this we use the serializer. |
| #304 | prev_config = self.config.serialize() |
| #305 | self.config = config |
| #306 | |
| #307 | if self.is_docs_site_instance: |
| #308 | self.config.prompt = DOCS_SITE_PROMPT_TEMPLATE |
| #309 | self.config.number_documents = 5 |
| #310 | k = {} |
| #311 | if self.config.online: |
| #312 | k["web_search_result"] = self.access_search_and_get_results(input_query) |
| #313 | |
| #314 | prompt = self.generate_prompt(input_query, contexts, **k) |
| #315 | logger.info(f"Prompt: {prompt}") |
| #316 | |
| #317 | if dry_run: |
| #318 | return prompt |
| #319 | |
| #320 | answer, token_info = self.get_answer_from_llm(prompt) |
| #321 | if isinstance(answer, str): |
| #322 | logger.info(f"Answer: {answer}") |
| #323 | return answer, token_info |
| #324 | else: |
| #325 | # this is a streamed response and needs to be handled differently. |
| #326 | return self._stream_response(answer, token_info) |
| #327 | finally: |
| #328 | if config: |
| #329 | # Restore previous config |
| #330 | self.config: BaseLlmConfig = BaseLlmConfig.deserialize(prev_config) |
| #331 | |
| #332 | @staticmethod |
| #333 | def _get_messages(prompt: str, system_prompt: Optional[str] = None) -> list[LCBaseMessage]: |
| #334 | """ |
| #335 | Construct a list of langchain messages |
| #336 | |
| #337 | :param prompt: User prompt |
| #338 | :type prompt: str |
| #339 | :param system_prompt: System prompt, defaults to None |
| #340 | :type system_prompt: Optional[str], optional |
| #341 | :return: List of messages |
| #342 | :rtype: list[BaseMessage] |
| #343 | """ |
| #344 | from langchain.schema import HumanMessage, SystemMessage |
| #345 | |
| #346 | messages = [] |
| #347 | if system_prompt: |
| #348 | messages.append(SystemMessage(content=system_prompt)) |
| #349 | messages.append(HumanMessage(content=prompt)) |
| #350 | return messages |
| #351 |