repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import os |
| #2 | import queue |
| #3 | import re |
| #4 | import tempfile |
| #5 | import threading |
| #6 | |
| #7 | import streamlit as st |
| #8 | |
| #9 | from embedchain import App |
| #10 | from embedchain.config import BaseLlmConfig |
| #11 | from embedchain.helpers.callbacks import StreamingStdOutCallbackHandlerYield, generate |
| #12 | |
| #13 | |
| #14 | def embedchain_bot(db_path, api_key): |
| #15 | return App.from_config( |
| #16 | config={ |
| #17 | "llm": { |
| #18 | "provider": "openai", |
| #19 | "config": { |
| #20 | "model": "gpt-4o-mini", |
| #21 | "temperature": 0.5, |
| #22 | "max_tokens": 1000, |
| #23 | "top_p": 1, |
| #24 | "stream": True, |
| #25 | "api_key": api_key, |
| #26 | }, |
| #27 | }, |
| #28 | "vectordb": { |
| #29 | "provider": "chroma", |
| #30 | "config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True}, |
| #31 | }, |
| #32 | "embedder": {"provider": "openai", "config": {"api_key": api_key}}, |
| #33 | "chunker": {"chunk_size": 2000, "chunk_overlap": 0, "length_function": "len"}, |
| #34 | } |
| #35 | ) |
| #36 | |
| #37 | |
| #38 | def get_db_path(): |
| #39 | tmpdirname = tempfile.mkdtemp() |
| #40 | return tmpdirname |
| #41 | |
| #42 | |
| #43 | def get_ec_app(api_key): |
| #44 | if "app" in st.session_state: |
| #45 | print("Found app in session state") |
| #46 | app = st.session_state.app |
| #47 | else: |
| #48 | print("Creating app") |
| #49 | db_path = get_db_path() |
| #50 | app = embedchain_bot(db_path, api_key) |
| #51 | st.session_state.app = app |
| #52 | return app |
| #53 | |
| #54 | |
| #55 | with st.sidebar: |
| #56 | openai_access_token = st.text_input("OpenAI API Key", key="api_key", type="password") |
| #57 | "WE DO NOT STORE YOUR OPENAI KEY." |
| #58 | "Just paste your OpenAI API key here and we'll use it to power the chatbot. [Get your OpenAI API key](https://platform.openai.com/api-keys)" # noqa: E501 |
| #59 | |
| #60 | if st.session_state.api_key: |
| #61 | app = get_ec_app(st.session_state.api_key) |
| #62 | |
| #63 | pdf_files = st.file_uploader("Upload your PDF files", accept_multiple_files=True, type="pdf") |
| #64 | add_pdf_files = st.session_state.get("add_pdf_files", []) |
| #65 | for pdf_file in pdf_files: |
| #66 | file_name = pdf_file.name |
| #67 | if file_name in add_pdf_files: |
| #68 | continue |
| #69 | try: |
| #70 | if not st.session_state.api_key: |
| #71 | st.error("Please enter your OpenAI API Key") |
| #72 | st.stop() |
| #73 | temp_file_name = None |
| #74 | with tempfile.NamedTemporaryFile(mode="wb", delete=False, prefix=file_name, suffix=".pdf") as f: |
| #75 | f.write(pdf_file.getvalue()) |
| #76 | temp_file_name = f.name |
| #77 | if temp_file_name: |
| #78 | st.markdown(f"Adding {file_name} to knowledge base...") |
| #79 | app.add(temp_file_name, data_type="pdf_file") |
| #80 | st.markdown("") |
| #81 | add_pdf_files.append(file_name) |
| #82 | os.remove(temp_file_name) |
| #83 | st.session_state.messages.append({"role": "assistant", "content": f"Added {file_name} to knowledge base!"}) |
| #84 | except Exception as e: |
| #85 | st.error(f"Error adding {file_name} to knowledge base: {e}") |
| #86 | st.stop() |
| #87 | st.session_state["add_pdf_files"] = add_pdf_files |
| #88 | |
| #89 | st.title("📄 Embedchain - Chat with PDF") |
| #90 | styled_caption = '<p style="font-size: 17px; color: #aaa;">🚀 An <a href="https://github.com/embedchain/embedchain">Embedchain</a> app powered by OpenAI!</p>' # noqa: E501 |
| #91 | st.markdown(styled_caption, unsafe_allow_html=True) |
| #92 | |
| #93 | if "messages" not in st.session_state: |
| #94 | st.session_state.messages = [ |
| #95 | { |
| #96 | "role": "assistant", |
| #97 | "content": """ |
| #98 | Hi! I'm chatbot powered by Embedchain, which can answer questions about your pdf documents.\n |
| #99 | Upload your pdf documents here and I'll answer your questions about them! |
| #100 | """, |
| #101 | } |
| #102 | ] |
| #103 | |
| #104 | for message in st.session_state.messages: |
| #105 | with st.chat_message(message["role"]): |
| #106 | st.markdown(message["content"]) |
| #107 | |
| #108 | if prompt := st.chat_input("Ask me anything!"): |
| #109 | if not st.session_state.api_key: |
| #110 | st.error("Please enter your OpenAI API Key", icon="🤖") |
| #111 | st.stop() |
| #112 | |
| #113 | app = get_ec_app(st.session_state.api_key) |
| #114 | |
| #115 | with st.chat_message("user"): |
| #116 | st.session_state.messages.append({"role": "user", "content": prompt}) |
| #117 | st.markdown(prompt) |
| #118 | |
| #119 | with st.chat_message("assistant"): |
| #120 | msg_placeholder = st.empty() |
| #121 | msg_placeholder.markdown("Thinking...") |
| #122 | full_response = "" |
| #123 | |
| #124 | q = queue.Queue() |
| #125 | |
| #126 | def app_response(result): |
| #127 | llm_config = app.llm.config.as_dict() |
| #128 | llm_config["callbacks"] = [StreamingStdOutCallbackHandlerYield(q=q)] |
| #129 | config = BaseLlmConfig(**llm_config) |
| #130 | answer, citations = app.chat(prompt, config=config, citations=True) |
| #131 | result["answer"] = answer |
| #132 | result["citations"] = citations |
| #133 | |
| #134 | results = {} |
| #135 | thread = threading.Thread(target=app_response, args=(results,)) |
| #136 | thread.start() |
| #137 | |
| #138 | for answer_chunk in generate(q): |
| #139 | full_response += answer_chunk |
| #140 | msg_placeholder.markdown(full_response) |
| #141 | |
| #142 | thread.join() |
| #143 | answer, citations = results["answer"], results["citations"] |
| #144 | if citations: |
| #145 | full_response += "\n\n**Sources**:\n" |
| #146 | sources = [] |
| #147 | for i, citation in enumerate(citations): |
| #148 | source = citation[1]["url"] |
| #149 | pattern = re.compile(r"([^/]+)\.[^\.]+\.pdf$") |
| #150 | match = pattern.search(source) |
| #151 | if match: |
| #152 | source = match.group(1) + ".pdf" |
| #153 | sources.append(source) |
| #154 | sources = list(set(sources)) |
| #155 | for source in sources: |
| #156 | full_response += f"- {source}\n" |
| #157 | |
| #158 | msg_placeholder.markdown(full_response) |
| #159 | print("Answer: ", full_response) |
| #160 | st.session_state.messages.append({"role": "assistant", "content": full_response}) |
| #161 |