Skip to content

Commit cf09ea9

Browse files
NekxislatekvoSOSNE
authored
App dockerization (#38)
Co-authored-by: LatekVon <[email protected]> Co-authored-by: SOSNE <[email protected]>
1 parent 192f47e commit cf09ea9

29 files changed

+603
-679
lines changed

.dockerignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
webui/frontend

README.md

+29-7
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,31 @@
22

33
#### Langchain project aiming at achieving perpetual research with the help of a chain of ai researching agents.
44

5-
- Install and launch Ollama: `ollama serve`
6-
- Create new environment: `conda env create -n ResearchChain -f environment.yml`
7-
- Activate the new environment: `conda activate ResearchChain`
8-
- Pull the model you intend to use: `ollama pull zephyr:7b-beta-q5_K_M` (default)
9-
- Pull an embedding model: `ollama pull nomic-embed-text` (default)
10-
- Run: `python3 main.py`
5+
### Running ResearchChain
6+
Deploy every single worker, database and utility simultaneously
7+
> sudo docker-compose -f docker/docker-compose.yml up
8+
9+
Please note, that webui frontend has to be launched separately. `see below`
10+
11+
### Running webui front-end `user interaction`
12+
13+
Frontend is launched separately to back end, run the following command to start it.
14+
- go to frontend directory: `cd webui/frontend/`
15+
- install dependencies: `npm install`
16+
- start react project: `npm run dev`
17+
- open `http://localhost:3000/` in your browser
18+
19+
### Accessing postgres database
20+
- postgres can be accessed via `pgAdmin`, which is already included in the docker compose,
21+
so there is no need for any additional packages
22+
- go to `localhost:8081/browser/`
23+
- click `add new server`
24+
- in `name`, write `postgres`
25+
- go to `connection` tab
26+
- in `hostname/address` write `postgres`
27+
- in `username` write `admin` and in `password` write `pass`
28+
- click `save`, the database should be immediately available
29+
- there, you'll see connection statistics as well as the entire schema
1130

1231
#### Other notes
1332

@@ -35,9 +54,12 @@
3554
> to supply constant 24/7 knowledge and news analysis,
3655
> and to expand its knowledge base by scheduling crawls based on the provided areas of interest.
3756
38-
### Flow of operations with WebUI app.
57+
### Flow of operations with WebUI app
3958
![Flow chart explaining flow of research chain when WebUI is used as the scheduling app.](./assets/rc_flow.png "Research chain flow chart.")
4059

60+
### Database schema
61+
![Database schema](./assets/db_schema.png "Database schema.")
62+
4163
---
4264
### Contributing
4365

assets/db_schema.png

161 KB
Loading

core/databases/db_base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from sqlalchemy.orm import DeclarativeBase
55

66
engine = create_engine(
7-
"postgresql://postgres:zaq12wsx@localhost:5432/postgres"
8-
) # this string needs to be replaced
7+
"postgresql://postgres:pass@postgres:5432/postgres", pool_size=20, max_overflow=0
8+
)
99

1010
logging.basicConfig()
1111
logging.getLogger("sqlalchemy.engine").setLevel(logging.CRITICAL)

core/databases/db_completion_tasks.py

+50-49
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def db_add_completion_task(prompt, mode) -> str:
3939
executing=False,
4040
execution_date=0,
4141
completed=False,
42-
completion_result="", # providing default value
42+
completion_result="N/A",
4343
completion_date=0,
4444
required_crawl_tasks=[],
4545
)
@@ -53,75 +53,76 @@ def db_add_completion_task(prompt, mode) -> str:
5353
def db_get_completion_tasks_by_page(
5454
page: int, per_page: int = defaults.ITEMS_PER_PAGE
5555
) -> list[CompletionTask]:
56-
session = Session(engine)
57-
58-
start, stop = page_to_range(page, per_page)
59-
60-
query = select(CompletionTask).slice(start, stop)
56+
with Session(engine) as session:
57+
session.expire_on_commit = False
6158

62-
results = list(session.scalars(query))
63-
return results
59+
start, stop = page_to_range(page, per_page)
60+
query = select(CompletionTask).slice(start, stop)
61+
results = list(session.scalars(query))
62+
return results
6463

6564

6665
def db_get_completion_task_by_uuid(uuid: int) -> CompletionTask:
67-
session = Session(engine)
68-
69-
query = select(CompletionTask).where(CompletionTask.uuid.is_(uuid))
66+
with Session(engine) as session:
67+
session.expire_on_commit = False
7068

71-
result = session.scalars(query).one()
72-
return result
69+
query = select(CompletionTask).where(CompletionTask.uuid == uuid)
70+
result = session.scalars(query).one()
71+
return result
7372

7473

7574
def db_set_completion_task_executing(uuid: str):
76-
session = Session(engine)
77-
78-
session.execute(
79-
update(CompletionTask)
80-
.where(
81-
CompletionTask.uuid == uuid
82-
) # new operator could be replaced with (__eq__) method s
83-
.values(executing=True, execution_date=gen_unix_time())
84-
)
75+
with Session(engine) as session:
76+
session.execute(
77+
update(CompletionTask)
78+
.where(CompletionTask.uuid == uuid)
79+
.values(executing=True, execution_date=gen_unix_time())
80+
)
8581

86-
session.commit()
82+
session.commit()
8783

8884

8985
def db_get_incomplete_completion_tasks(amount: int = 1):
90-
session = Session(engine)
91-
92-
query = (
93-
select(CompletionTask).where(CompletionTask.completed.is_(False)).limit(amount)
94-
)
86+
with Session(engine) as session:
87+
session.expire_on_commit = False
88+
89+
query = (
90+
select(CompletionTask)
91+
# point of notice! is_ may need to be replaced with ==
92+
.where(CompletionTask.completed.is_(False))
93+
.where(CompletionTask.executing.is_(False))
94+
.limit(amount)
95+
)
9596

96-
results = list(session.scalars(query).all())
97+
results = list(session.scalars(query).all())
9798

98-
for task in results:
99-
db_set_completion_task_executing(task.uuid)
99+
for task in results:
100+
db_set_completion_task_executing(task.uuid)
100101

101-
return results
102+
return results
102103

103104

104105
def db_release_executing_tasks(uuid_list: list[str]):
105-
session = Session(engine)
106-
107-
session.execute(
108-
update(CompletionTask)
109-
.where(CompletionTask.uuid.in_(uuid_list))
110-
.values(executing=False, execution_date=0)
111-
)
106+
with Session(engine) as session:
107+
session.execute(
108+
update(CompletionTask)
109+
.where(CompletionTask.uuid.in_(uuid_list))
110+
.values(executing=False, execution_date=0)
111+
)
112112

113-
session.commit()
113+
session.commit()
114114

115115

116116
def db_update_completion_task_after_summarizing(summary: str, uuid: str):
117-
session = Session(engine)
118-
119-
session.execute(
120-
update(CompletionTask)
121-
.where(CompletionTask.uuid.is_(uuid))
122-
.values(
123-
completed=True, completion_result=summary, completion_date=gen_unix_time()
117+
with Session(engine) as session:
118+
session.execute(
119+
update(CompletionTask)
120+
.where(CompletionTask.uuid == uuid)
121+
.values(
122+
completed=True,
123+
completion_result=summary,
124+
completion_date=gen_unix_time(),
125+
)
124126
)
125-
)
126127

127-
session.commit()
128+
session.commit()

core/databases/db_crawl_tasks.py

+70-66
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99
ForeignKey,
1010
)
1111
from sqlalchemy.orm import Mapped, mapped_column, Session, relationship
12-
from core.databases import defaults
1312

14-
# from core.databases.models import CrawlTask, EmbeddingProgression
13+
from core.databases import defaults
1514
from core.databases.db_base import Base, engine
1615
from core.tools import utils
1716
from core.tools.utils import gen_unix_time, page_to_range
@@ -76,68 +75,75 @@ def db_add_crawl_task(prompt: str, mode: Literal["news", "wiki", "docs"] = "wiki
7675
return new_uuid
7776

7877

79-
def db_set_crawl_executing(uuid: str):
80-
session = Session(engine)
78+
def db_get_crawl_tasks_by_page(
79+
page: int, per_page: int = defaults.ITEMS_PER_PAGE
80+
) -> list[CrawlTask]:
81+
with Session(engine) as session:
82+
session.expire_on_commit = False
8183

82-
session.execute(
83-
update(CrawlTask)
84-
.where(CrawlTask.uuid.is_(uuid))
85-
.values(executing=True, execution_date=gen_unix_time())
86-
)
84+
start, stop = page_to_range(page, per_page)
85+
query = select(CrawlTask).slice(start, stop)
86+
results = list(session.scalars(query))
87+
return results
8788

88-
session.commit()
8989

90+
def db_get_crawl_task_by_uuid(uuid: int) -> CrawlTask:
91+
with Session(engine) as session:
92+
session.expire_on_commit = False
9093

91-
def db_set_crawl_completed(uuid: str):
92-
session = Session(engine)
94+
query = select(CrawlTask).where(CrawlTask.uuid == uuid)
95+
result = session.scalars(query).one()
96+
return result
9397

94-
session.execute(
95-
update(CrawlTask)
96-
.where(CrawlTask.uuid.is_(uuid))
97-
.values(completed=True, completion_date=gen_unix_time())
98-
)
99-
100-
session.commit()
10198

99+
def db_set_crawl_executing(uuid: str):
100+
with Session(engine) as session:
101+
session.execute(
102+
update(CrawlTask)
103+
.where(CrawlTask.uuid == uuid)
104+
.values(executing=True, execution_date=gen_unix_time())
105+
)
102106

103-
# fixme: this function should return a list of all tasks for management purposes (see below)
104-
def db_get_crawl_task():
105-
session = Session(engine)
107+
session.commit()
106108

107-
query = select(CrawlTask).where(CrawlTask.completed.is_(False))
108-
crawl_task = session.scalars(query).one_or_none()
109109

110-
if crawl_task is not None:
111-
db_set_crawl_executing(crawl_task.uuid)
110+
def db_set_crawl_completed(uuid: str):
111+
with Session(engine) as session:
112+
session.execute(
113+
update(CrawlTask)
114+
.where(CrawlTask.uuid == uuid)
115+
.values(completed=True, completion_date=gen_unix_time())
116+
)
112117

113-
return crawl_task
118+
session.commit()
114119

115120

116121
# fixme cont. and this function should only return n of inComp and nonExec tasks, for workers
117122
def db_get_incomplete_crawl_task():
118-
session = Session(engine)
123+
with Session(engine) as session:
124+
session.expire_on_commit = False
119125

120-
query = (
121-
select(CrawlTask)
122-
.where(CrawlTask.completed.is_(False))
123-
.where(CrawlTask.executing.is_(False))
124-
)
126+
query = (
127+
select(CrawlTask)
128+
.where(CrawlTask.completed.is_(False))
129+
.where(CrawlTask.executing.is_(False))
130+
.limit(1)
131+
)
125132

126-
crawl_task = session.scalars(query).one_or_none()
133+
crawl_task = session.scalars(query).one_or_none()
127134

128-
if crawl_task is not None:
129-
db_set_crawl_executing(crawl_task.uuid)
135+
if crawl_task is not None:
136+
db_set_crawl_executing(crawl_task.uuid)
130137

131-
return crawl_task
138+
return crawl_task
132139

133140

134141
def db_is_task_completed(uuid: str):
135-
session = Session(engine)
136-
137-
query = select(CrawlTask).where(CrawlTask.uuid.is_(uuid))
138-
crawl_task = session.scalars(query).one_or_none()
142+
with Session(engine) as session:
143+
query = select(CrawlTask).where(CrawlTask.uuid == uuid)
144+
crawl_task = session.scalars(query).one()
139145

140-
return crawl_task.completed
146+
return crawl_task.completed
141147

142148

143149
def db_are_tasks_completed(uuid_list: list[str]):
@@ -156,15 +162,14 @@ def db_are_tasks_completed(uuid_list: list[str]):
156162

157163

158164
def db_is_crawl_task_fully_embedded(uuid: str, model_name: str):
159-
session = Session(engine)
160-
161-
query = select(CrawlTask).where(CrawlTask.uuid.is_(uuid))
162-
crawl_task = session.scalars(query).one()
165+
with Session(engine) as session:
166+
query = select(CrawlTask).where(CrawlTask.uuid == uuid)
167+
crawl_task = session.scalars(query).one()
163168

164-
baseline_count = crawl_task.base_amount_scheduled
165-
current_count = crawl_task.embedding_progression[model_name]
169+
baseline_count = crawl_task.base_amount_scheduled
170+
current_count = crawl_task.embedding_progression[model_name]
166171

167-
return current_count >= baseline_count
172+
return current_count >= baseline_count
168173

169174

170175
def db_are_crawl_tasks_fully_embedded(uuid_list: str, model_name: str):
@@ -177,25 +182,24 @@ def db_are_crawl_tasks_fully_embedded(uuid_list: str, model_name: str):
177182

178183

179184
def db_increment_task_embedding_progression(uuid: str, model_name: str):
180-
session = Session(engine)
181-
182-
query = select(CrawlTask).where(CrawlTask.uuid.is_(uuid))
183-
crawl_task = session.scalars(query).one()
185+
with Session(engine) as session:
186+
query = select(CrawlTask).where(CrawlTask.uuid == uuid)
187+
crawl_task = session.scalars(query).one()
184188

185-
current_progression = crawl_task.embedding_progression
186-
current_count = current_progression[model_name]
189+
current_progression = crawl_task.embedding_progression
190+
current_count = current_progression[model_name]
187191

188-
if current_count is not None:
189-
current_count += 1
190-
else:
191-
current_count = 1
192+
if current_count is not None:
193+
current_count += 1
194+
else:
195+
current_count = 1
192196

193-
current_progression[model_name] = current_count
197+
current_progression[model_name] = current_count
194198

195-
session.execute(
196-
update(CrawlTask)
197-
.where(CrawlTask.uuid.is_(crawl_task.uuid))
198-
.values(embedding_progression=current_progression)
199-
)
199+
session.execute(
200+
update(CrawlTask)
201+
.where(CrawlTask.uuid == crawl_task.uuid)
202+
.values(embedding_progression=current_progression)
203+
)
200204

201-
session.commit()
205+
session.commit()

0 commit comments

Comments
 (0)