Skip to content

Commit 9091266

Browse files
committed
End2End APP LLM Model Usig GEMINI
1 parent 1f872ca commit 9091266

File tree

6 files changed

+170
-0
lines changed

6 files changed

+170
-0
lines changed

app.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import streamlit as st
2+
from src.helper import *
3+
4+
5+
6+
def user_input(user_question):
7+
response = st.session_state.conversation({'question': user_question})
8+
st.session_state.chatHistory = response['chat_history']
9+
for i, message in enumerate(st.session_state.chatHistory):
10+
if i%2 == 0:
11+
st.write("User: ", message.content)
12+
else:
13+
st.write("Reply: ", message.content)
14+
15+
16+
def main():
17+
st.set_page_config("Information Retrieval")
18+
st.header("Information Retrieval System💁")
19+
20+
user_question = st.text_input("Ask a Question from the PDF Files")
21+
22+
if "conversation" not in st.session_state:
23+
st.session_state.conversation = None
24+
if "chatHistory" not in st.session_state:
25+
st.session_state.chatHistory = None
26+
if user_question:
27+
user_input(user_question)
28+
29+
with st.sidebar:
30+
st.title("Menu:")
31+
pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
32+
if st.button("Submit & Process"):
33+
with st.spinner("Processing..."):
34+
raw_text = get_pdf_text(pdf_docs)
35+
text_chunks = get_text_chunks(raw_text)
36+
vector_store = get_vector_store(text_chunks)
37+
st.session_state.conversation = get_conversational_chain(vector_store)
38+
st.success("Done")
39+
40+
41+
42+
if __name__ == "__main__":
43+
main()

requirements.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
python-dotenv
2+
google-generativeai
3+
langchain
4+
PyPDF2
5+
faiss-cpu
6+
streamlit
7+
-e .

research/trials.ipynb

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from dotenv import load_dotenv\n",
10+
"import os"
11+
]
12+
},
13+
{
14+
"cell_type": "markdown",
15+
"metadata": {},
16+
"source": []
17+
}
18+
],
19+
"metadata": {
20+
"kernelspec": {
21+
"display_name": "GenAI",
22+
"language": "python",
23+
"name": "python3"
24+
},
25+
"language_info": {
26+
"codemirror_mode": {
27+
"name": "ipython",
28+
"version": 3
29+
},
30+
"file_extension": ".py",
31+
"mimetype": "text/x-python",
32+
"name": "python",
33+
"nbconvert_exporter": "python",
34+
"pygments_lexer": "ipython3",
35+
"version": "3.13.0"
36+
}
37+
},
38+
"nbformat": 4,
39+
"nbformat_minor": 2
40+
}

research/trials.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# %%
2+
3+
from dotenv import load_dotenv
4+
import os
5+
6+
# %%
7+
load_dotenv()
8+
GOOGLE_AI_KEY = os.getenv("GOOGLE_API_KEY")
9+
print(GOOGLE_AI_KEY)
10+
# %%

setup.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from setuptools import find_packages, setup
2+
3+
setup(
4+
name= 'LLMGenerativeAI',
5+
version='0.0.0',
6+
author= 'Hamza BEKOURY',
7+
author_email= '[email protected]',
8+
packages= find_packages(),
9+
install_requires = []
10+
)

src/helper.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import os
2+
from PyPDF2 import PdfReader
3+
from langchain.text_splitter import RecursiveCharacterTextSplitter
4+
from langchain_community.vectorstores import FAISS # Updated import
5+
from langchain.chains import ConversationalRetrievalChain
6+
from langchain.memory import ConversationBufferMemory
7+
from dotenv import load_dotenv
8+
from langchain_google_genai import (
9+
ChatGoogleGenerativeAI,
10+
GoogleGenerativeAIEmbeddings,
11+
HarmCategory,
12+
HarmBlockThreshold,
13+
)
14+
from langchain_core.prompts import ChatPromptTemplate
15+
import google.generativeai as genai
16+
17+
# Load environment variables
18+
load_dotenv()
19+
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
20+
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
21+
genai.configure(api_key=GOOGLE_API_KEY)
22+
23+
def get_pdf_text(pdf_docs):
24+
text = ""
25+
for pdf in pdf_docs:
26+
pdf_reader = PdfReader(pdf)
27+
for page in pdf_reader.pages:
28+
text += page.extract_text()
29+
return text
30+
31+
def get_text_chunks(text):
32+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
33+
chunks = text_splitter.split_text(text)
34+
return chunks
35+
36+
def get_vector_store(text_chunks):
37+
embeddings = GoogleGenerativeAIEmbeddings(
38+
google_api_key=GOOGLE_API_KEY,
39+
model="models/embedding-001"
40+
)
41+
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
42+
return vector_store
43+
44+
def get_conversational_chain(vector_store):
45+
llm = ChatGoogleGenerativeAI(
46+
model="gemini-1.5-pro",
47+
temperature=0.7,
48+
max_tokens=512,
49+
safety_settings={
50+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
51+
}
52+
)
53+
54+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
55+
conversation_chain = ConversationalRetrievalChain.from_llm(
56+
llm=llm,
57+
retriever=vector_store.as_retriever(),
58+
memory=memory
59+
)
60+
return conversation_chain

0 commit comments

Comments
 (0)