9
9
ForeignKey ,
10
10
)
11
11
from sqlalchemy .orm import Mapped , mapped_column , Session , relationship
12
- from core .databases import defaults
13
12
14
- # from core.databases.models import CrawlTask, EmbeddingProgression
13
+ from core .databases import defaults
15
14
from core .databases .db_base import Base , engine
16
15
from core .tools import utils
17
16
from core .tools .utils import gen_unix_time , page_to_range
@@ -76,68 +75,75 @@ def db_add_crawl_task(prompt: str, mode: Literal["news", "wiki", "docs"] = "wiki
76
75
return new_uuid
77
76
78
77
79
- def db_set_crawl_executing (uuid : str ):
80
- session = Session (engine )
78
+ def db_get_crawl_tasks_by_page (
79
+ page : int , per_page : int = defaults .ITEMS_PER_PAGE
80
+ ) -> list [CrawlTask ]:
81
+ with Session (engine ) as session :
82
+ session .expire_on_commit = False
81
83
82
- session .execute (
83
- update (CrawlTask )
84
- .where (CrawlTask .uuid .is_ (uuid ))
85
- .values (executing = True , execution_date = gen_unix_time ())
86
- )
84
+ start , stop = page_to_range (page , per_page )
85
+ query = select (CrawlTask ).slice (start , stop )
86
+ results = list (session .scalars (query ))
87
+ return results
87
88
88
- session .commit ()
89
89
90
+ def db_get_crawl_task_by_uuid (uuid : int ) -> CrawlTask :
91
+ with Session (engine ) as session :
92
+ session .expire_on_commit = False
90
93
91
- def db_set_crawl_completed (uuid : str ):
92
- session = Session (engine )
94
+ query = select (CrawlTask ).where (CrawlTask .uuid == uuid )
95
+ result = session .scalars (query ).one ()
96
+ return result
93
97
94
- session .execute (
95
- update (CrawlTask )
96
- .where (CrawlTask .uuid .is_ (uuid ))
97
- .values (completed = True , completion_date = gen_unix_time ())
98
- )
99
-
100
- session .commit ()
101
98
99
+ def db_set_crawl_executing (uuid : str ):
100
+ with Session (engine ) as session :
101
+ session .execute (
102
+ update (CrawlTask )
103
+ .where (CrawlTask .uuid == uuid )
104
+ .values (executing = True , execution_date = gen_unix_time ())
105
+ )
102
106
103
- # fixme: this function should return a list of all tasks for management purposes (see below)
104
- def db_get_crawl_task ():
105
- session = Session (engine )
107
+ session .commit ()
106
108
107
- query = select (CrawlTask ).where (CrawlTask .completed .is_ (False ))
108
- crawl_task = session .scalars (query ).one_or_none ()
109
109
110
- if crawl_task is not None :
111
- db_set_crawl_executing (crawl_task .uuid )
110
+ def db_set_crawl_completed (uuid : str ):
111
+ with Session (engine ) as session :
112
+ session .execute (
113
+ update (CrawlTask )
114
+ .where (CrawlTask .uuid == uuid )
115
+ .values (completed = True , completion_date = gen_unix_time ())
116
+ )
112
117
113
- return crawl_task
118
+ session . commit ()
114
119
115
120
116
121
# fixme cont. and this function should only return n of inComp and nonExec tasks, for workers
117
122
def db_get_incomplete_crawl_task ():
118
- session = Session (engine )
123
+ with Session (engine ) as session :
124
+ session .expire_on_commit = False
119
125
120
- query = (
121
- select (CrawlTask )
122
- .where (CrawlTask .completed .is_ (False ))
123
- .where (CrawlTask .executing .is_ (False ))
124
- )
126
+ query = (
127
+ select (CrawlTask )
128
+ .where (CrawlTask .completed .is_ (False ))
129
+ .where (CrawlTask .executing .is_ (False ))
130
+ .limit (1 )
131
+ )
125
132
126
- crawl_task = session .scalars (query ).one_or_none ()
133
+ crawl_task = session .scalars (query ).one_or_none ()
127
134
128
- if crawl_task is not None :
129
- db_set_crawl_executing (crawl_task .uuid )
135
+ if crawl_task is not None :
136
+ db_set_crawl_executing (crawl_task .uuid )
130
137
131
- return crawl_task
138
+ return crawl_task
132
139
133
140
134
141
def db_is_task_completed (uuid : str ):
135
- session = Session (engine )
136
-
137
- query = select (CrawlTask ).where (CrawlTask .uuid .is_ (uuid ))
138
- crawl_task = session .scalars (query ).one_or_none ()
142
+ with Session (engine ) as session :
143
+ query = select (CrawlTask ).where (CrawlTask .uuid == uuid )
144
+ crawl_task = session .scalars (query ).one ()
139
145
140
- return crawl_task .completed
146
+ return crawl_task .completed
141
147
142
148
143
149
def db_are_tasks_completed (uuid_list : list [str ]):
@@ -156,15 +162,14 @@ def db_are_tasks_completed(uuid_list: list[str]):
156
162
157
163
158
164
def db_is_crawl_task_fully_embedded (uuid : str , model_name : str ):
159
- session = Session (engine )
160
-
161
- query = select (CrawlTask ).where (CrawlTask .uuid .is_ (uuid ))
162
- crawl_task = session .scalars (query ).one ()
165
+ with Session (engine ) as session :
166
+ query = select (CrawlTask ).where (CrawlTask .uuid == uuid )
167
+ crawl_task = session .scalars (query ).one ()
163
168
164
- baseline_count = crawl_task .base_amount_scheduled
165
- current_count = crawl_task .embedding_progression [model_name ]
169
+ baseline_count = crawl_task .base_amount_scheduled
170
+ current_count = crawl_task .embedding_progression [model_name ]
166
171
167
- return current_count >= baseline_count
172
+ return current_count >= baseline_count
168
173
169
174
170
175
def db_are_crawl_tasks_fully_embedded (uuid_list : str , model_name : str ):
@@ -177,25 +182,24 @@ def db_are_crawl_tasks_fully_embedded(uuid_list: str, model_name: str):
177
182
178
183
179
184
def db_increment_task_embedding_progression (uuid : str , model_name : str ):
180
- session = Session (engine )
181
-
182
- query = select (CrawlTask ).where (CrawlTask .uuid .is_ (uuid ))
183
- crawl_task = session .scalars (query ).one ()
185
+ with Session (engine ) as session :
186
+ query = select (CrawlTask ).where (CrawlTask .uuid == uuid )
187
+ crawl_task = session .scalars (query ).one ()
184
188
185
- current_progression = crawl_task .embedding_progression
186
- current_count = current_progression [model_name ]
189
+ current_progression = crawl_task .embedding_progression
190
+ current_count = current_progression [model_name ]
187
191
188
- if current_count is not None :
189
- current_count += 1
190
- else :
191
- current_count = 1
192
+ if current_count is not None :
193
+ current_count += 1
194
+ else :
195
+ current_count = 1
192
196
193
- current_progression [model_name ] = current_count
197
+ current_progression [model_name ] = current_count
194
198
195
- session .execute (
196
- update (CrawlTask )
197
- .where (CrawlTask .uuid . is_ ( crawl_task .uuid ) )
198
- .values (embedding_progression = current_progression )
199
- )
199
+ session .execute (
200
+ update (CrawlTask )
201
+ .where (CrawlTask .uuid == crawl_task .uuid )
202
+ .values (embedding_progression = current_progression )
203
+ )
200
204
201
- session .commit ()
205
+ session .commit ()
0 commit comments