diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..3d68e9f
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,18 @@
+# Use an official Python runtime as a parent image
+FROM python:3.10-slim-buster
+
+# Set the working directory to /app
+WORKDIR /app
+
+# Copy the current directory contents into the container at /app
+COPY . /app
+COPY ./data/* /app/data/
+
+# Install any needed packages specified in requirements.txt
+RUN pip install -r requirements.txt
+
+# Expose the port that the Gradio app will run on
+EXPOSE 7860
+
+# Run the command to start the Gradio app
+CMD ["python", "chat_web.py"]
diff --git a/README.md b/README.md
index d7c78dd..3ef39ec 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,15 @@ cd chat-with-your-doc
 
 3. Install the required Python packages:
 
+Create virtual environment:
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+```
+
+Install depenancies:
+
 ```bash
 pip install -r requirements.txt
 ```
@@ -66,32 +75,31 @@ The CLI application is built to support both `ingest` and `chat` commands. Pytho
 
 This command would take the documents as input, split the texts, generate the embeddings and store in a vector store `FAISS`. The vector store would be store locally for later used for chat.
 
-```bash
-$ python chat_cli.py ingest --help
-
- Usage: chat_cli.py ingest [OPTIONS] DOC_PATH INDEX_NAME
+![](./static/cli_ingest.png)
 
-Arguments:
-doc_path        TEXT  Path to the documents to be ingested, support glob pattern [required]
-index_name      TEXT  Name of the index to be created [default: None] [required]
-
-Options:
---help          Show this message and exit. 
+For example if you want to put all the PDFs in the directory into one single vector store named `surface`, you could run:
+    
+```bash
+$ python chat_cli.py ingest --path "./data/source_documents/*.pdf" --name surface
 ```
+Note that the path should be enclosed with double quotes to avoid shell expansion.
 
 ### **Chat**
 
-This command would start a interactive chat, with documents as a external knowledge base in a vector store. You could choose which knowledge base to load for chat.
+This command would start a interactive chat, with documents as a external knowledge base in a vector store. You could choose which knowledge base to load for chat. 
 
-```bash
-$ python chat_cli.py chat --help 
+![CLI Chat](./static/cli_chat.png)
 
-Usage: chat_cli.py chat [OPTIONS]
+Two sample documents about Surface has been provided in the [data/source_document](data/source_documents) directory and already ingested into the default vector store `index`, stored in the [data/vector_store](data/vector_store). You could run the following command to start a chat with the documents:
 
-Options:
---index-name        TEXT  [default: index]
---help                    Show this message and exit.
+```bash
+$ python chat_cli.py chat
+```
+
+Or you could specify the vector store to load for chat:
 
+```bash
+$ python chat_cli.py chat --name surface
 ```
 
 ## Usage: Web
diff --git a/chat_cli.py b/chat_cli.py
index 2ead252..0c415e5 100644
--- a/chat_cli.py
+++ b/chat_cli.py
@@ -12,19 +12,19 @@
 
 @app.command()
 def ingest(
-        doc_path : Annotated[str, typer.Argument(help="Path to the documents to be ingested, support glob pattern", show_default=False)],
-        index_name : Annotated[str, typer.Argument(help="Name of the index to be created")]):
+        path : Annotated[str, typer.Option(help="Path to the documents to be ingested, support glob pattern", show_default=False)],
+        name : Annotated[str, typer.Option(help="Name of the index to be created", show_default=False)]):
     #support for glob in doc_path
-    file_list = glob.glob(doc_path)
+    file_list = glob.glob(path)
     # print(file_list)
     
     docChatbot.init_vector_db_from_documents(file_list)
-    docChatbot.save_vector_db_to_local(VECTORDB_PATH, index_name)
+    docChatbot.save_vector_db_to_local(VECTORDB_PATH, name)
 
 @app.command()
-def chat(index_name : str = "index"):
+def chat(name : str = "index"):
     
-    docChatbot.load_vector_db_from_local(VECTORDB_PATH, index_name)
+    docChatbot.load_vector_db_from_local(VECTORDB_PATH, name)
     docChatbot.init_chatchain()
 
     chat_history = []
diff --git a/chat_web.py b/chat_web.py
index a7b1e1e..bad5a26 100644
--- a/chat_web.py
+++ b/chat_web.py
@@ -71,8 +71,19 @@ def get_answer(message, chat_history):
         a = "" if chat[1] == None else chat[1]
         ch.append((q, a))
 
+    #todo: need to handle exception
     result_answer, result_source = docChatbot.get_answer_with_source(message, ch)
 
+    output_source = "\n\n"
+    i = 0
+    for doc in result_source:
+        reference_html = f"""<details> <summary>Reference [{i+1}] <a href="" target="_blank">{os.path.basename(doc.metadata["source"])}  P{doc.metadata['page']+1}</a> </summary>\n""" 
+        reference_html += f"""{doc.page_content}\n"""
+        reference_html += f"""</details>"""
+        output_source += reference_html
+        i += 1
+    #todo: show referenced pdf content in web ui
+
     chat_history.append((message, result_answer))
     return "", chat_history
 
@@ -146,4 +157,7 @@ def get_answer(message, chat_history):
 
 
 
-demo.launch()
\ No newline at end of file
+demo.launch(
+    server_name="0.0.0.0",
+    server_port=8000
+)
\ No newline at end of file
diff --git a/chatbot.py b/chatbot.py
index 29528fd..7692e74 100644
--- a/chatbot.py
+++ b/chatbot.py
@@ -24,22 +24,23 @@ class DocChatbot:
     def __init__(self) -> None:
         #init for OpenAI GPT-4 and Embeddings
         load_dotenv()
-        openai.api_type = "azure"
-        openai.api_version = "2023-03-15-preview"
-        openai.api_base = os.getenv("OPENAI_API_BASE")
-        openai.api_key = os.getenv("OPENAI_API_KEY")
 
         self.llm = AzureChatOpenAI(
             deployment_name=os.getenv("OPENAI_DEPLOYMENT_NAME"),
             temperature=0,
-            openai_api_version="2023-03-15-preview"
+            openai_api_version="2023-05-15",
+            openai_api_type="azure",
+            openai_api_base=os.getenv("OPENAI_API_BASE"),
+            openai_api_key=os.getenv("OPENAI_API_KEY"),
+            request_timeout=30
         )
 
         self.embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", chunk_size=1)
         
     def init_chatchain(self, chain_type : str = "stuff") -> None:
         # init for ConversationalRetrievalChain
-        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question.
+        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question. 
+        The follow up question should be in the same language with the input. For example, if the input is in Chinese, the follow up question or the standalone question below should be in Chinese too.
             Chat History:
             {chat_history}
 
@@ -103,8 +104,8 @@ def init_vector_db_from_documents(self, file_list: List[str]):
             docs.extend(doc)
             print("Processed document: " + file)
     
+        print("Generating embeddings and ingesting to vector db.")
         self.vector_db = FAISS.from_documents(docs, OpenAIEmbeddings(chunk_size=1))
-        print("Generated embeddings and ingested to vector db.")
-
+        print("Vector db initialized.")
 
         
\ No newline at end of file
diff --git a/data/source_documents/FY2223-Q3-JFM-8-K-Final.pdf b/data/source_documents/FY2223-Q3-JFM-8-K-Final.pdf
deleted file mode 100644
index 07c71ff..0000000
Binary files a/data/source_documents/FY2223-Q3-JFM-8-K-Final.pdf and /dev/null differ
diff --git a/data/source_documents/surface.pdf b/data/source_documents/surface.pdf
new file mode 100644
index 0000000..747d8c0
Binary files /dev/null and b/data/source_documents/surface.pdf differ
diff --git a/data/source_documents/Surface_Pro_9_Guide.pdf b/data/source_documents/surface9_service.pdf
similarity index 100%
rename from data/source_documents/Surface_Pro_9_Guide.pdf
rename to data/source_documents/surface9_service.pdf
diff --git a/data/vector_store/index.faiss b/data/vector_store/index.faiss
index feba2b9..1437598 100644
Binary files a/data/vector_store/index.faiss and b/data/vector_store/index.faiss differ
diff --git a/data/vector_store/index.pkl b/data/vector_store/index.pkl
index fd0bfe8..d5276d4 100644
Binary files a/data/vector_store/index.pkl and b/data/vector_store/index.pkl differ
diff --git a/data/vector_store/pgq3.faiss b/data/vector_store/pgq3.faiss
deleted file mode 100644
index cad828b..0000000
Binary files a/data/vector_store/pgq3.faiss and /dev/null differ
diff --git a/data/vector_store/pgq3.pkl b/data/vector_store/pgq3.pkl
deleted file mode 100644
index 528cc16..0000000
Binary files a/data/vector_store/pgq3.pkl and /dev/null differ
diff --git a/requirements.txt b/requirements.txt
index 2ba3ef2..9311e95 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,15 @@
 aiofiles==23.1.0
 aiohttp==3.8.4
 aiosignal==1.3.1
-altair==4.2.2
+altair==5.0.0
 anyio==3.6.2
-argilla==1.6.0
+argilla==1.7.0
 async-timeout==4.0.2
 attrs==23.1.0
 backoff==2.2.1
-certifi==2022.12.7
+certifi==2023.5.7
 cffi==1.15.1
+chardet==5.1.0
 charset-normalizer==3.1.0
 click==8.1.3
 commonmark==0.9.1
@@ -17,17 +18,16 @@ cryptography==40.0.2
 cycler==0.11.0
 dataclasses-json==0.5.7
 Deprecated==1.2.13
-entrypoints==0.4
 et-xmlfile==1.1.0
 faiss-cpu==1.7.4
 fastapi==0.95.1
 ffmpy==0.3.0
 filelock==3.12.0
-fonttools==4.39.3
+fonttools==4.39.4
 frozenlist==1.3.3
-fsspec==2023.4.0
-gradio==3.28.1
-gradio_client==0.1.4
+fsspec==2023.5.0
+gradio==3.30.0
+gradio_client==0.2.4
 greenlet==2.0.2
 h11==0.14.0
 httpcore==0.16.3
@@ -39,8 +39,8 @@ Jinja2==3.1.2
 joblib==1.2.0
 jsonschema==4.17.3
 kiwisolver==1.4.4
-langchain==0.0.153
-linkify-it-py==2.0.1
+langchain==0.0.191
+linkify-it-py==2.0.2
 lxml==4.9.2
 Markdown==3.4.3
 markdown-it-py==2.2.0
@@ -58,17 +58,15 @@ nltk==3.8.1
 numexpr==2.8.4
 numpy==1.23.5
 olefile==0.46
-openai==0.27.0
+openai==0.27.6
 openapi-schema-pydantic==1.2.4
 openpyxl==3.1.2
-orjson==3.8.11
+orjson==3.8.12
 packaging==23.1
 pandas==1.5.3
-pdfminer==20191125
 pdfminer.six==20221105
 Pillow==9.5.0
 pycparser==2.21
-pycryptodome==3.17
 pydantic==1.10.7
 pydub==0.25.1
 Pygments==2.15.1
@@ -84,28 +82,28 @@ python-multipart==0.0.6
 python-pptx==0.6.21
 pytz==2023.3
 PyYAML==6.0
-regex==2023.3.23
-requests==2.29.0
+regex==2023.5.5
+requests==2.30.0
 rfc3986==1.5.0
 rich==13.0.1
 semantic-version==2.10.0
 six==1.16.0
 sniffio==1.3.0
-SQLAlchemy==2.0.12
+SQLAlchemy==2.0.13
 starlette==0.26.1
 tenacity==8.2.2
-tiktoken==0.3.3
+tiktoken==0.4.0
 toolz==0.12.0
 tqdm==4.65.0
 typer==0.9.0
 typing-inspect==0.8.0
 typing_extensions==4.5.0
-tzdata==2023.3
 uc-micro-py==1.0.2
-unstructured==0.6.2
-urllib3==1.26.15
+unstructured==0.7.1
+urllib3==2.0.2
 uvicorn==0.22.0
-websockets==11.0.2
+websockets==11.0.3
 wrapt==1.14.1
+xlrd==2.0.1
 XlsxWriter==3.1.0
 yarl==1.9.2
diff --git a/static/cli_chat.png b/static/cli_chat.png
new file mode 100644
index 0000000..22e236f
Binary files /dev/null and b/static/cli_chat.png differ
diff --git a/static/cli_ingest.png b/static/cli_ingest.png
new file mode 100644
index 0000000..d7c1686
Binary files /dev/null and b/static/cli_ingest.png differ