Skip to content

Commit 2f0d248

Browse files
committed
Apply some limits and fixup
1 parent bcc685a commit 2f0d248

File tree

5 files changed

+99
-41
lines changed

5 files changed

+99
-41
lines changed

backend/ibutsu_server/controllers/artifact_controller.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
from datetime import datetime
23

34
import connexion
45
import magic
@@ -119,6 +120,7 @@ def upload_artifact(body):
119120
filename=filename,
120121
result_id=data["resultId"],
121122
content=file_.read(),
123+
upload_date=datetime.utcnow(),
122124
data=additional_metadata,
123125
)
124126
session.add(artifact)

backend/ibutsu_server/db/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class Artifact(Model, FileMixin):
6868
result_id = Column(PortableUUID(), ForeignKey("results.id"), nullable=False)
6969
filename = Column(Text)
7070
data = Column(mutable_json_type(dbtype=PortableJSON(), nested=True))
71+
upload_date = Column(DateTime)
7172

7273

7374
class Group(Model, ModelMixin):

backend/ibutsu_server/scripts/mongo2postgres.py

+90-35
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""
88
from argparse import ArgumentParser
99
from datetime import datetime
10+
from datetime import timedelta
1011
from datetime import timezone
1112
from uuid import UUID
1213

@@ -27,7 +28,10 @@
2728
UUID_1_EPOCH = datetime(1582, 10, 15, tzinfo=timezone.utc)
2829
UUID_TICKS = 10000000
2930
UUID_VARIANT_1 = 0b1000000000000000
30-
ROWS_TO_COMMIT_AT_ONCE = 100000
31+
ROWS_TO_COMMIT_AT_ONCE = 10000
32+
MONTHS_TO_KEEP = 3
33+
MIGRATION_TIME_FRAME = MONTHS_TO_KEEP * 30 * 24 * 60 * 60 # 3 months in seconds
34+
MIGRATION_LIMIT = 10000000000 # mostly for testing purposes
3135

3236
Base = declarative_base()
3337
session = None
@@ -36,9 +40,9 @@
3640
TABLE_MAP = [
3741
("groups", Group),
3842
("projects", Project),
43+
("widgetConfigs", WidgetConfig),
3944
("runs", Run),
4045
("results", Result),
41-
("widgetConfigs", WidgetConfig),
4246
]
4347
FILE_MAP = [
4448
# only convert artifacts, the reports in the existing DB aren't particularly useful
@@ -60,6 +64,28 @@
6064
"run",
6165
]
6266

67+
# indexes for the tables
68+
INDEXES = {
69+
"results": [
70+
"CREATE INDEX results_env ON results(env);",
71+
"CREATE INDEX results_component ON results(component);",
72+
"CREATE INDEX results_project ON results(project_id);",
73+
"CREATE INDEX results_start_time ON results(start_time);",
74+
"CREATE INDEX results_run ON results(run_id);",
75+
],
76+
"runs": [
77+
"CREATE INDEX runs_env ON runs(env);",
78+
"CREATE INDEX runs_component ON runs(component);",
79+
"CREATE INDEX runs_project ON runs(project_id);",
80+
"CREATE INDEX runs_start_time ON runs(start_time);",
81+
],
82+
"widget_configs": ["CREATE INDEX widget_configs_project ON widget_configs(project_id);"],
83+
"artifacts": [
84+
"CREATE INDEX artifact_result ON artifacts(result_id);",
85+
"CREATE INDEX artifact_upload_date ON artifacts(upload_date);",
86+
],
87+
}
88+
6389

6490
def is_uuid(candidate):
6591
"""Determine if this is a uuid"""
@@ -112,15 +138,25 @@ def setup_postgres(postgres_url):
112138
def migrate_table(collection, Model, vprint):
113139
"""Migrate a collection from MongoDB into a table in PostgreSQL"""
114140
# TODO: update indexes once we know them
115-
# indexes = collection.list_indexes()
116-
# conn = Base.metadata.bind.connect()
117-
# for idx in indexes:
118-
# if idx['name'] == '_id_':
119-
# continue
120-
# vprint('.', end='')
121-
# sql = convert_index(idx, Model.__tablename__)
122-
# conn.execute(sql)
123-
for idx, row in enumerate(collection.find()):
141+
conn = Base.metadata.bind.connect()
142+
for sql_index in INDEXES.get(Model.__tablename__, []):
143+
vprint(".", end="")
144+
conn.execute(sql_index)
145+
146+
# for runs and results, sort by descending start_time
147+
if Model.__tablename__ == "runs" or Model.__tablename__ == "results":
148+
sort = [("start_time", -1)]
149+
most_recent_record = collection.find_one(sort=sort)
150+
most_recent_start_time = most_recent_record["start_time"]
151+
# only include most recent runs and results
152+
filter_ = {"start_time": {"$gt": most_recent_start_time - MIGRATION_TIME_FRAME}}
153+
else:
154+
sort = None
155+
filter_ = None
156+
157+
for idx, row in enumerate(collection.find(filter_, sort=sort)):
158+
if idx > MIGRATION_LIMIT:
159+
break
124160
vprint(".", end="")
125161
mongo_id = row.pop("_id")
126162
# overwrite id with PSQL uuid
@@ -136,54 +172,71 @@ def migrate_table(collection, Model, vprint):
136172

137173
# promote some metadata fields to the appropriate column
138174
for field in FIELDS_TO_PROMOTE:
139-
if row.get("metadata") and field in row["metadata"]:
175+
if row.get("metadata") and row["metadata"].get(field):
140176
row[field] = row["metadata"][field]
141177
# convert some ObjectId's to UUID's
142178
for field in ID_FIELDS:
143-
if field == "project":
144-
if row.get("metadata") and field in row["metadata"]:
179+
if row.get("metadata") and row["metadata"].get(field):
180+
if field == "project":
145181
row["project_id"] = convert_objectid_to_uuid(row["metadata"][field])
146182
# also update the metadata field
147183
row["metadata"][field] = row["project_id"]
148-
elif field == "run":
149-
if row.get("metadata") and field in row["metadata"]:
184+
elif field == "run":
150185
row["run_id"] = convert_objectid_to_uuid(row["metadata"][field])
151186
# also update the metadata field
152187
row["metadata"][field] = row["run_id"]
153-
elif field in ["result_id", "resultId"]:
154-
if row.get("metadata") and field in row["metadata"]:
188+
elif field in ["result_id", "resultId"]:
155189
row["result_id"] = convert_objectid_to_uuid(row["metadata"][field])
156-
else:
157-
if row.get("metadata") and field in row["metadata"]:
190+
else:
158191
row["metadata"][field] = convert_objectid_to_uuid(row["metadata"][field])
192+
159193
obj = Model.from_dict(**row)
160194
session.add(obj)
161195
if idx % ROWS_TO_COMMIT_AT_ONCE == 0:
162196
session.commit()
163197
session.commit()
198+
# at the end of the session do a little cleanup
199+
if Model.__tablename__ == "runs" or Model.__tablename__ == "results":
200+
conn = Base.metadata.bind.connect()
201+
# delete any results or runs without start_time
202+
sql_delete = f"DELETE FROM {Model.__tablename__} where start_time IS NULL;"
203+
conn.execute(sql_delete)
164204
vprint(" done")
165205

166206

167207
def migrate_file(collection, Model, vprint):
168208
"""Migrate a GridFS collection from MongoDB into a table in PostgreSQL"""
169209
# Access the underlying collection object
170210
# TODO: update indexes once we know them
171-
# indexes = collection._collection.list_indexes()
172-
# conn = Base.metadata.bind.connect()
173-
# for idx in indexes:
174-
# if idx['name'] == '_id_':
175-
# continue
176-
# vprint('.', end='')
177-
# sql = convert_index(idx, Model.__tablename__)
178-
# conn.execute(sql)
179-
for idx, row in enumerate(collection.find({})):
211+
conn = Base.metadata.bind.connect()
212+
for sql_index in INDEXES.get(Model.__tablename__, []):
213+
vprint(".", end="")
214+
conn.execute(sql_index)
215+
216+
# for runs and results, sort by descending start_time
217+
if Model.__tablename__ == "artifacts":
218+
sort = [("_id", -1)] # in MongoDB sorting by id is like sorting by a 'created' field
219+
most_recent_record = [x for x in collection.find(limit=1, sort=sort)][0]
220+
most_recent_upload_date = most_recent_record.upload_date
221+
# only include most recent runs and results
222+
filter_ = {
223+
"uploadDate": {"$gt": (most_recent_upload_date - timedelta(days=30 * MONTHS_TO_KEEP))}
224+
}
225+
else:
226+
sort = None
227+
filter_ = None
228+
229+
for idx, row in enumerate(collection.find(filter_, sort=sort)):
230+
if idx > MIGRATION_LIMIT:
231+
break
180232
vprint(".", end="")
181233
pg_id = convert_objectid_to_uuid(row._id)
182-
data = {}
234+
data = dict()
183235
data["metadata"] = row.metadata
184236
data["id"] = pg_id
185237
data["filename"] = row.filename
186238
data["content"] = row.read()
239+
data["upload_date"] = row.upload_date
187240
for field in ID_FIELDS:
188241
if field == "resultId":
189242
data["result_id"] = convert_objectid_to_uuid(row.metadata[field])
@@ -198,14 +251,15 @@ def migrate_file(collection, Model, vprint):
198251
vprint(" done")
199252

200253

201-
def migrate_tables(mongo, vprint):
254+
def migrate_tables(mongo, vprint, migrate_files=False):
202255
"""Migrate all the tables"""
203256
for collection, model in TABLE_MAP:
204257
vprint("Migrating {} ".format(collection), end="")
205258
migrate_table(mongo[collection], model, vprint)
206-
for collection, model in FILE_MAP:
207-
vprint("Migrating {} ".format(collection), end="")
208-
migrate_file(GridFSBucket(mongo, collection), model, vprint)
259+
if migrate_files:
260+
for collection, model in FILE_MAP:
261+
vprint("Migrating {} ".format(collection), end="")
262+
migrate_file(GridFSBucket(mongo, collection), model, vprint)
209263

210264

211265
def build_mongo_connection(url):
@@ -225,6 +279,7 @@ def parse_args():
225279
parser.add_argument("mongo_url", help="URL to MongoDB database")
226280
parser.add_argument("postgres_url", help="URL to PostgreSQL database")
227281
parser.add_argument("-v", "--verbose", action="store_true", help="Say what I'm doing")
282+
parser.add_argument("-f", "--files", action="store_true", help="Migrate artifact files")
228283
return parser.parse_args()
229284

230285

@@ -234,7 +289,7 @@ def main():
234289
mongo_url, database = build_mongo_connection(args.mongo_url)
235290
mongo = get_mongo(mongo_url, database)
236291
setup_postgres(args.postgres_url)
237-
migrate_tables(mongo, vprint)
292+
migrate_tables(mongo, vprint, args.files)
238293

239294

240295
if __name__ == "__main__":

frontend/src/result-list.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -341,10 +341,10 @@ export class ResultList extends React.Component {
341341
let filters = this.state.filters;
342342
const project = getActiveProject();
343343
if (project) {
344-
filters['metadata.project'] = {'val': project.id, 'op': 'eq'};
344+
filters['project_id'] = {'val': project.id, 'op': 'eq'};
345345
}
346-
else if (Object.prototype.hasOwnProperty.call(filters, 'metadata.project')) {
347-
delete filters['metadata.project']
346+
else if (Object.prototype.hasOwnProperty.call(filters, 'project_id')) {
347+
delete filters['project_id']
348348
}
349349
if (filters) {
350350
params['apply_max'] = true; // if filters are applied limit the documents returned

frontend/src/run-list.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,10 @@ export class RunList extends React.Component {
323323
let filters = this.state.filters;
324324
const project = getActiveProject();
325325
if (project) {
326-
filters['metadata.project'] = {'val': project.id, 'op': 'eq'};
326+
filters['project_id'] = {'val': project.id, 'op': 'eq'};
327327
}
328-
else if (Object.prototype.hasOwnProperty.call(filters, 'metadata.project')) {
329-
delete filters['metadata.project']
328+
else if (Object.prototype.hasOwnProperty.call(filters, 'project_id')) {
329+
delete filters['project_id']
330330
}
331331
params['pageSize'] = this.state.pageSize;
332332
params['page'] = this.state.page;

0 commit comments

Comments
 (0)