Skip to content

Commit

Permalink
Merge pull request #28 from linjungz/staging
Browse files Browse the repository at this point in the history
Merge to master
  • Loading branch information
linjungz authored Aug 5, 2023
2 parents 4fb57cf + 95bc0be commit 0b5a7ba
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 97 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
.venv/*
.env
__pycache__/*
data/uploaded/*
data/uploaded/*
data/vector_store/*
data/source_documents/*
data/archive/*
*.code-workspace
*.DS_STORE
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,25 @@ pip install -r requirements.txt

## Configuration

> In this project we're supporting both API from OpenAI and Azure OpenAI Service. There're some environmnet variables that are common for the two APIs while some are unique. The following table lists all the env vars that're supported:
| Environment Variables | Azure OpenAI Service | OpenAI |
| --- | --- | --- |
| OPENAI_API_BASE | :white_check_mark: | |
| OPENAI_API_KEY | :white_check_mark: | :white_check_mark: |
| OPENAI_GPT_DEPLOYMENT_NAME | :white_check_mark: | |
| OPENAI_EMBEDDING_DEPLOYMENT_NAME | :white_check_mark: | :white_check_mark: |
| CHAT_MODEL_NAME | | :white_check_mark: |
| REQUEST_TIMEOUT | :white_check_mark: | :white_check_mark: |
| VECTORDB_PATH | :white_check_mark: | :white_check_mark: |
| TEMPERATURE | :white_check_mark: | :white_check_mark: |
| CHUNK_SIZE | :white_check_mark: | :white_check_mark: |
| CHUNK_OVERLAP | :white_check_mark: | :white_check_mark: |


### Azure OpenAI Services

1. Obtain your Azure OpenAI API key, Endpoint and Deployment Name from the [Azure Portal](https://portal.azure.com/).

2. Create `.env` in the root dir and set the environment variables in the file:

```
Expand All @@ -82,11 +97,11 @@ Here's where you can find the deployment names for GPT and Embedding:
### OpenAI

1. Obtain your OpenAI API key from the [platform.openai.com](https://platform.openai.com/account/api-keys).

2. Create `.env` in the root dir and set the environment variable in the file:

```
OPENAI_API_KEY=your-key-here
CHAT_MODEL_NAME="gpt-4-0314"
```

## Usage: Web
Expand Down
33 changes: 28 additions & 5 deletions chat_cli.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
from chatbot import DocChatbot
import typer
from typing_extensions import Annotated
import os
from dotenv import load_dotenv

import glob



VECTORDB_PATH = "./data/vector_store"
app = typer.Typer()
docChatbot = DocChatbot()
load_dotenv()
VECTORDB_PATH = os.getenv("VECTORDB_PATH")

if VECTORDB_PATH is None:
typer.echo(typer.style("VECTORDB_PATH environment variable not found and default path ./data/vector_store will be used.", fg=typer.colors.RED))
VECTORDB_PATH = "./data/vector_store"

@app.command()
def ingest(
path : Annotated[str, typer.Option(help="Path to the documents to be ingested, support glob pattern", show_default=False)],
name : Annotated[str, typer.Option(help="Name of the index to be created", show_default=False)]):
"""
Ingests documents into a vector database.
Args:
path: The path to the documents to be ingested (supports glob patterns).
name: The name of the index to be created.
"""
#support for glob in doc_path
file_list = glob.glob(path)
# print(file_list)
Expand All @@ -23,22 +35,33 @@ def ingest(

@app.command()
def chat(name : str = "index"):
"""
Initiates a chat interface allowing users to query the vector database.
Args:
name: The name of the index to be used (default is "index").
"""

docChatbot.load_vector_db_from_local(VECTORDB_PATH, name)
docChatbot.init_chatchain()

chat_history = []

while True:
query = input("Question:")
question_prompt = typer.style("Question:", fg=typer.colors.GREEN) # Style the prompt
query = input(question_prompt) # Use the styled prompt
if query == "exit":
break
if query == "reset":
chat_history = []
continue

result_answer, result_source = docChatbot.get_answer_with_source(query, chat_history)
print(f"Q: {query}\nA: {result_answer}")

# Style the answer in yellow
styled_answer = typer.style(f"A: {result_answer}", fg=typer.colors.YELLOW)

print(f"Q: {query}\n{styled_answer}") # Print the styled answer
print("Source Documents:")
for doc in result_source:
print(doc.metadata)
Expand Down
51 changes: 35 additions & 16 deletions chat_web_st.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,50 @@
import streamlit as st
from datetime import datetime

docChatBot = DocChatbot()
available_indexes = docChatBot.get_available_indexes("./data/vector_store")

# Add an option for "Uploaded File"
index_options = ["-- Existing Vector Stores --"] + available_indexes

with st.sidebar:
st.title("💬 Chat with Your Doc")
st.write("Upload a document and ask questions about it.")

with st.form("Upload and Process", True):
uploaded_file = st.file_uploader("Upload documents", type=["pdf", "md", "txt", "docx"])
# Dropdown for selecting an index or uploaded file
selected_index = st.selectbox('Select an existing vector store or upload a file to create one, then press Process button', index_options)

uploaded_file = st.file_uploader("Upload documents", type=["pdf", "md", "txt", "docx", ".csv", ".xml"])
submitted = st.form_submit_button("Process")

if uploaded_file:
# Save the uploaded file to local
ext_name = os.path.splitext(uploaded_file.name)[-1]
timestamp = int(datetime.timestamp(datetime.now()))
local_file_name = f"""./data/uploaded/{timestamp}{ext_name}"""
with open(local_file_name, "wb") as f:
f.write(uploaded_file.getbuffer())
f.close()

if submitted:
with st.spinner("Initializing vector db..."):
docChatBot = DocChatbot()
docChatBot.init_vector_db_from_documents([local_file_name])
st.session_state['docChatBot'] = docChatBot
st.session_state["messages"] = [{"role": "assistant", "content": "Hi!😊"}]
if submitted:
try:
if selected_index == "-- Existing Vector Stores --":
if uploaded_file:
ext_name = os.path.splitext(uploaded_file.name)[-1]
if ext_name not in [".pdf", ".md", ".txt", ".docx", ".csv", ".xml"]:
st.error("Unsupported file type.")
st.stop()
# Save the uploaded file to local
timestamp = int(datetime.timestamp(datetime.now()))
local_file_name = f"""./data/uploaded/{timestamp}{ext_name}"""
with open(local_file_name, "wb") as f:
f.write(uploaded_file.getbuffer())
f.close()

docChatBot.init_vector_db_from_documents([local_file_name])
else:
docChatBot.load_vector_db_from_local("./data/vector_store", selected_index)

st.session_state['docChatBot'] = docChatBot
st.session_state["messages"] = [{"role": "assistant", "content": "Hi!😊"}]

st.success("Vector db initialized.")
st.balloons()
except Exception as e:
st.error(f"An error occurred while processing the file: {str(e)}")
st.stop()

with st.container():
"[Github Repo Link](https://github.com/linjungz/chat-with-your-doc)"
Expand Down
Loading

0 comments on commit 0b5a7ba

Please sign in to comment.