Skip to content

Commit

Permalink
Merge pull request #22 from linjungz/staging
Browse files Browse the repository at this point in the history
Add Support for OpenAI API
  • Loading branch information
linjungz authored Jul 9, 2023
2 parents dd5d866 + 958b853 commit 4fb57cf
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 42 deletions.
23 changes: 18 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
# chat-with-your-doc

`chat-with-your-doc` is a demonstration application that leverages the capabilities of Azure OpenAI ChatGPT/GPT-4 and LangChain to enable users to chat with their documents. This repository hosts the codebase, instructions, and resources needed to set up and run the application.
`chat-with-your-doc` is a demonstration application that leverages the capabilities of ChatGPT/GPT-4 and LangChain to enable users to chat with their documents. This repository hosts the codebase, instructions, and resources needed to set up and run the application.

## Introduction

The primary goal of this project is to simplify the interaction with documents and extract valuable information with using natural language. This project is built using LangChain and Azure OpenAI GPT-4/ChatGPT to deliver a smooth and natural conversational experience to the user.
The primary goal of this project is to simplify the interaction with documents and extract valuable information with using natural language. This project is built using LangChain and GPT-4/ChatGPT to deliver a smooth and natural conversational experience to the user, with support for both `Azure OpenAI Services` and `OpenAI`

![](static/web_ui.png)

## Updates

- 20230703 [v0.3.0](https://github.com/linjungz/chat-with-your-doc/releases/tag/v0.3.0): Web UI changed to Streamlit, with support for streaming
- 20230709: Add Support for OpenAI API
- 20230703: Web UI changed to Streamlit, with support for streaming

## Features

- Upload documents as external knowledge base for Azure OpenAI GPT-4/ChatGPT.
- Upload documents as external knowledge base for GPT-4/ChatGPT, support both `Azure OpenAI Services` and `OpenAI`
- Support various format including PDF, DOCX, PPTX, TXT and etc.
- Chat with the document content, ask questions, and get relevant answers based on the context.
- User-friendly interface to ensure seamless interaction.
Expand Down Expand Up @@ -63,9 +64,11 @@ pip install -r requirements.txt

## Configuration

### Azure OpenAI Services

1. Obtain your Azure OpenAI API key, Endpoint and Deployment Name from the [Azure Portal](https://portal.azure.com/).

2. Create `.env` in the root dir and set the environment variable in the file:
2. Create `.env` in the root dir and set the environment variables in the file:

```
OPENAI_API_BASE=https://your-endpoint.openai.azure.com
Expand All @@ -76,6 +79,16 @@ OPENAI_EMBEDDING_DEPLOYMENT_NAME=your-embedding-deployment-name
Here's where you can find the deployment names for GPT and Embedding:
![Alt text](./static/deployment.png)

### OpenAI

1. Obtain your OpenAI API key from the [platform.openai.com](https://platform.openai.com/account/api-keys).

2. Create `.env` in the root dir and set the environment variable in the file:

```
OPENAI_API_KEY=your-key-here
```

## Usage: Web

This will initialize the application based on `Streamlit` and open up the user interface in your default web browser. You can now upload a document to create a knowledge base and start a conversation with it.
Expand Down
117 changes: 80 additions & 37 deletions chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import openai
from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.callbacks.base import BaseCallbackHandler

Expand All @@ -18,6 +19,8 @@
from typing import List
import streamlit

REQUEST_TIMEOUT = 10

class StreamHandler(BaseCallbackHandler):
def __init__(self, container, initial_text=""):
self.container = container
Expand All @@ -29,57 +32,98 @@ def on_llm_new_token(self, token: str, **kwargs) -> None:


class DocChatbot:
llm: AzureChatOpenAI
condens_question_llm: AzureChatOpenAI
llm: ChatOpenAI
condens_question_llm: ChatOpenAI
embeddings: OpenAIEmbeddings
vector_db: FAISS
chatchain: BaseConversationalRetrievalChain

def __init__(self) -> None:
#init for OpenAI GPT-4 and Embeddings
#init for LLM and Embeddings
load_dotenv()
assert(os.getenv("OPENAI_API_KEY") is not None)
api_key = str(os.getenv("OPENAI_API_KEY"))
embedding_deployment = "text-embedding-ada-002"

#check if user is using API from openai.com or Azure OpenAI Service by inspecting the api key
if api_key.startswith("sk-"):
# user is using API from openai.com
assert(len(api_key) == 51)

self.llm = ChatOpenAI(
temperature=0,
openai_api_key=api_key,
request_timeout=REQUEST_TIMEOUT,
) # type: ignore
else:
# user is using Azure OpenAI Service
assert(os.getenv("OPENAI_GPT_DEPLOYMENT_NAME") is not None)
assert(os.getenv("OPENAI_API_BASE") is not None)
assert(len(api_key) == 32)

self.llm = AzureChatOpenAI(
deployment_name=os.getenv("OPENAI_GPT_DEPLOYMENT_NAME"),
temperature=0,
openai_api_version="2023-05-15",
openai_api_type="azure",
openai_api_base=os.getenv("OPENAI_API_BASE"),
openai_api_key=api_key,
request_timeout=REQUEST_TIMEOUT,
) # type: ignore

self.llm = AzureChatOpenAI(
deployment_name=os.getenv("OPENAI_GPT_DEPLOYMENT_NAME"),
temperature=0,
openai_api_version="2023-05-15",
openai_api_type="azure",
openai_api_base=os.getenv("OPENAI_API_BASE"),
openai_api_key=os.getenv("OPENAI_API_KEY"),
request_timeout=30,
) # type: ignore
embedding_deployment = os.getenv("OPENAI_EMBEDDING_DEPLOYMENT_NAME")

self.condens_question_llm = self.llm

self.embeddings = OpenAIEmbeddings(
deployment=os.getenv("OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
deployment=embedding_deployment,
chunk_size=1
) # type: ignore

def init_streaming(self, condense_question_container, answer_container) -> None:
self.llm = AzureChatOpenAI(
deployment_name=os.getenv("OPENAI_GPT_DEPLOYMENT_NAME"),
temperature=0,
openai_api_version="2023-05-15",
openai_api_type="azure",
openai_api_base=os.getenv("OPENAI_API_BASE"),
openai_api_key=os.getenv("OPENAI_API_KEY"),
request_timeout=30,
streaming=True,
callbacks=[StreamHandler(answer_container)]
) # type: ignore

self.condens_question_llm = AzureChatOpenAI(
deployment_name=os.getenv("OPENAI_GPT_DEPLOYMENT_NAME"),
temperature=0,
openai_api_version="2023-05-15",
openai_api_type="azure",
openai_api_base=os.getenv("OPENAI_API_BASE"),
openai_api_key=os.getenv("OPENAI_API_KEY"),
request_timeout=30,
streaming=True,
callbacks=[StreamHandler(condense_question_container, "🤔...")]
) # type: ignore
api_key = str(os.getenv("OPENAI_API_KEY"))
if api_key.startswith("sk-"):
# user is using API from openai.com
self.llm = ChatOpenAI(
temperature=0,
openai_api_key=api_key,
request_timeout=REQUEST_TIMEOUT,
streaming=True,
callbacks=[StreamHandler(answer_container)]
) # type: ignore

self.condens_question_llm = ChatOpenAI(
temperature=0,
openai_api_key=api_key,
request_timeout=REQUEST_TIMEOUT,
streaming=True,
callbacks=[StreamHandler(condense_question_container, "🤔...")]
) # type: ignore
else:
# user is using Azure OpenAI Service
self.llm = AzureChatOpenAI(
deployment_name=os.getenv("OPENAI_GPT_DEPLOYMENT_NAME"),
temperature=0,
openai_api_version="2023-05-15",
openai_api_type="azure",
openai_api_base=os.getenv("OPENAI_API_BASE"),
openai_api_key=os.getenv("OPENAI_API_KEY"),
request_timeout=REQUEST_TIMEOUT,
streaming=True,
callbacks=[StreamHandler(answer_container)]
) # type: ignore

self.condens_question_llm = AzureChatOpenAI(
deployment_name=os.getenv("OPENAI_GPT_DEPLOYMENT_NAME"),
temperature=0,
openai_api_version="2023-05-15",
openai_api_type="azure",
openai_api_base=os.getenv("OPENAI_API_BASE"),
openai_api_key=os.getenv("OPENAI_API_KEY"),
request_timeout=REQUEST_TIMEOUT,
streaming=True,
callbacks=[StreamHandler(condense_question_container, "🤔...")]
) # type: ignore

def init_chatchain(self, chain_type : str = "stuff") -> None:
# init for ConversationalRetrievalChain
Expand Down Expand Up @@ -180,7 +224,6 @@ def init_vector_db_from_documents(self, file_list: List[str]):

print("Generating embeddings and ingesting to vector db.")
self.vector_db = FAISS.from_documents(docs, self.embeddings)
print(self.vector_db)
print("Vector db initialized.")


0 comments on commit 4fb57cf

Please sign in to comment.