Merge pull request #28 from linjungz/staging

Merge to master
linjungz · Aug 5, 2023 · 0b5a7ba · 0b5a7ba
2 parents 4fb57cf + 95bc0be
commit 0b5a7ba
Show file tree

Hide file tree

Showing 6 changed files with 251 additions and 97 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,9 @@
 .venv/*
 .env
 __pycache__/*
-data/uploaded/*
+data/uploaded/*
+data/vector_store/*
+data/source_documents/*
+data/archive/*
+*.code-workspace
+*.DS_STORE
diff --git a/README.md b/README.md
@@ -64,10 +64,25 @@ pip install -r requirements.txt
 
 ## Configuration
 
+> In this project we're supporting both API from OpenAI and Azure OpenAI Service. There're some environmnet variables that are common for the two APIs while some are unique. The following table lists all the env vars that're supported:
+
+| Environment Variables | Azure OpenAI Service | OpenAI |
+| --- | --- | --- |
+| OPENAI_API_BASE | :white_check_mark: | |
+| OPENAI_API_KEY  | :white_check_mark: | :white_check_mark: |
+| OPENAI_GPT_DEPLOYMENT_NAME | :white_check_mark: | |
+| OPENAI_EMBEDDING_DEPLOYMENT_NAME | :white_check_mark: | :white_check_mark: |
+| CHAT_MODEL_NAME | | :white_check_mark: |
+| REQUEST_TIMEOUT | :white_check_mark: | :white_check_mark: |
+| VECTORDB_PATH | :white_check_mark: | :white_check_mark: |
+| TEMPERATURE | :white_check_mark: | :white_check_mark: |
+| CHUNK_SIZE | :white_check_mark: | :white_check_mark: |
+| CHUNK_OVERLAP | :white_check_mark: | :white_check_mark: |
+
+
 ### Azure OpenAI Services
 
 1. Obtain your Azure OpenAI API key, Endpoint and Deployment Name from the [Azure Portal](https://portal.azure.com/).
-
 2. Create `.env` in the root dir and set the environment variables in the file:
 
 ```
@@ -82,11 +97,11 @@ Here's where you can find the deployment names for GPT and Embedding:
 ### OpenAI
 
 1. Obtain your OpenAI API key from the [platform.openai.com](https://platform.openai.com/account/api-keys).
-
 2. Create `.env` in the root dir and set the environment variable in the file:
 
 ```
 OPENAI_API_KEY=your-key-here
+CHAT_MODEL_NAME="gpt-4-0314"
 ```
 
 ## Usage: Web

diff --git a/chat_cli.py b/chat_cli.py
@@ -1,19 +1,31 @@
 from chatbot import DocChatbot
 import typer
 from typing_extensions import Annotated
+import os
+from dotenv import load_dotenv
 
 import glob
 
-
-
-VECTORDB_PATH = "./data/vector_store"
 app = typer.Typer()
 docChatbot = DocChatbot()
+load_dotenv()
+VECTORDB_PATH = os.getenv("VECTORDB_PATH")
+
+if VECTORDB_PATH is None:
+    typer.echo(typer.style("VECTORDB_PATH environment variable not found and default path ./data/vector_store will be used.", fg=typer.colors.RED))
+    VECTORDB_PATH = "./data/vector_store"
 
 @app.command()
 def ingest(
         path : Annotated[str, typer.Option(help="Path to the documents to be ingested, support glob pattern", show_default=False)],
         name : Annotated[str, typer.Option(help="Name of the index to be created", show_default=False)]):
+    """
+    Ingests documents into a vector database.
+
+    Args:
+        path: The path to the documents to be ingested (supports glob patterns).
+        name: The name of the index to be created.
+    """
     #support for glob in doc_path
     file_list = glob.glob(path)
     # print(file_list)
@@ -23,22 +35,33 @@ def ingest(
 
 @app.command()
 def chat(name : str = "index"):
+    """
+    Initiates a chat interface allowing users to query the vector database.
+
+    Args:
+        name: The name of the index to be used (default is "index").
+    """
 
     docChatbot.load_vector_db_from_local(VECTORDB_PATH, name)
     docChatbot.init_chatchain()
 
     chat_history = []
 
     while True:
-        query = input("Question：")
+        question_prompt = typer.style("Question：", fg=typer.colors.GREEN)  # Style the prompt
+        query = input(question_prompt)  # Use the styled prompt
         if query == "exit":
             break
         if query == "reset":
             chat_history = []
             continue
 
         result_answer, result_source = docChatbot.get_answer_with_source(query, chat_history)
-        print(f"Q: {query}\nA: {result_answer}")
+
+        # Style the answer in yellow
+        styled_answer = typer.style(f"A: {result_answer}", fg=typer.colors.YELLOW)
+
+        print(f"Q: {query}\n{styled_answer}")  # Print the styled answer
         print("Source Documents:")
         for doc in result_source:
             print(doc.metadata)

diff --git a/chat_web_st.py b/chat_web_st.py
@@ -4,31 +4,50 @@
 import streamlit as st
 from datetime import datetime
 
+docChatBot = DocChatbot()
+available_indexes = docChatBot.get_available_indexes("./data/vector_store")
+
+# Add an option for "Uploaded File"
+index_options = ["-- Existing Vector Stores --"] + available_indexes
+
 with st.sidebar:
     st.title("💬 Chat with Your Doc")
     st.write("Upload a document and ask questions about it.")
+
     with st.form("Upload and Process", True):
-        uploaded_file = st.file_uploader("Upload documents", type=["pdf", "md", "txt", "docx"])
+        # Dropdown for selecting an index or uploaded file
+        selected_index = st.selectbox('Select an existing vector store or upload a file to create one, then press Process button', index_options)
+
+        uploaded_file = st.file_uploader("Upload documents", type=["pdf", "md", "txt", "docx", ".csv", ".xml"])
         submitted = st.form_submit_button("Process")
 
-        if uploaded_file:
-            # Save the uploaded file to local
-            ext_name = os.path.splitext(uploaded_file.name)[-1]
-            timestamp = int(datetime.timestamp(datetime.now()))
-            local_file_name = f"""./data/uploaded/{timestamp}{ext_name}"""
-            with open(local_file_name, "wb") as f:
-                f.write(uploaded_file.getbuffer())
-                f.close()
-
-            if submitted:
-                with st.spinner("Initializing vector db..."):
-                    docChatBot = DocChatbot()
-                    docChatBot.init_vector_db_from_documents([local_file_name])
-                    st.session_state['docChatBot'] = docChatBot
-                    st.session_state["messages"] = [{"role": "assistant", "content": "Hi!😊"}]
+        if submitted:
+            try:
+                if selected_index == "-- Existing Vector Stores --":
+                    if uploaded_file:
+                        ext_name = os.path.splitext(uploaded_file.name)[-1]
+                        if ext_name not in [".pdf", ".md", ".txt", ".docx", ".csv", ".xml"]:
+                            st.error("Unsupported file type.")
+                            st.stop()
+                        # Save the uploaded file to local
+                        timestamp = int(datetime.timestamp(datetime.now()))
+                        local_file_name = f"""./data/uploaded/{timestamp}{ext_name}"""
+                        with open(local_file_name, "wb") as f:
+                            f.write(uploaded_file.getbuffer())
+                            f.close()
+
+                        docChatBot.init_vector_db_from_documents([local_file_name])
+                else:
+                    docChatBot.load_vector_db_from_local("./data/vector_store", selected_index)
+
+                st.session_state['docChatBot'] = docChatBot
+                st.session_state["messages"] = [{"role": "assistant", "content": "Hi!😊"}]
 
                 st.success("Vector db initialized.")
                 st.balloons()
+            except Exception as e:
+                st.error(f"An error occurred while processing the file: {str(e)}")
+                st.stop()
 
     with st.container():
         "[Github Repo Link](https://github.com/linjungz/chat-with-your-doc)"