forked from tnc-ca-geo/tnc-edge-service
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_aifish.py
476 lines (382 loc) · 18.3 KB
/
run_aifish.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
from datetime import datetime, timezone, timedelta
from dateutil import parser
import click
from collections import defaultdict
import json
import os
from pathlib import Path
import re
import requests
from requests import Response
import schedule
import shutil
import subprocess
from subprocess import CompletedProcess
import sys
import time
from model import Base as ModelBase, VideoFile, AifishData, Track
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker as SessionMaker, Query
from sqlalchemy.orm.session import Session
from flask.config import Config as FlaskConfig
flaskconfig = FlaskConfig(root_path='')
flaskconfig.from_object('config.defaults')
if 'ENVIRONMENT' in os.environ:
flaskconfig.from_envvar('ENVIRONMENT')
# select video_files.* from video_files
# join (
# select COALESCE(max(workday_counts.workday), '1970-01-01') most_recent_active_workday
# from (
# select date(start_datetime AT TIME ZONE 'utc' - interval '8 hours' ) as workday,
# count(*) as count
# from video_files
# where decrypted_path is not null
# group by workday
# ) workday_counts
# where workday_counts.count > 4
# ) workdays
# on video_files.start_datetime >= workdays.most_recent_active_workday + time with time zone '08:00Z'
# left join aifishdata
# on video_files.decrypted_path = aifishdata.video_uri
# where video_files.decrypted_path is not null
# and aifishdata.video_uri is null
# and video_files.cam_name = 'cam1'
# order by video_files.decrypted_datetime asc;
def next_videos(session: Session, thalos_cam_name):
workday_start_hour_at_utc_interval = '8 hours';
workday_start_hour_at_utc_timestr = '08:00Z';
num_vids_required = 4;
results: Query[VideoFile] = session.query(VideoFile).from_statement(sa.text(
"""
select video_files.* from video_files
join (
select COALESCE(max(workday_counts.workday), '1970-01-01') most_recent_active_workday
from (
select date(start_datetime AT TIME ZONE 'utc' - interval :timei ) as workday,
count(*) as count
from video_files
where decrypted_path is not null
group by workday
) workday_counts
where workday_counts.count > :numvids
) workdays
on video_files.start_datetime >= workdays.most_recent_active_workday + time with time zone :times
left join aifishdata
on video_files.decrypted_path = aifishdata.video_uri
where video_files.decrypted_path is not null
and aifishdata.video_uri is null
and video_files.cam_name = :cam_name
order by video_files.decrypted_datetime asc;
""")).params(
{
"timei": workday_start_hour_at_utc_interval,
"times": workday_start_hour_at_utc_timestr,
"numvids": num_vids_required,
"cam_name": thalos_cam_name,
})
return list(results)
def v2_next_videos(session: Session, thalos_cam_name):
results: Query[VideoFile] = session.query(VideoFile).from_statement(sa.text(
"""
select video_files.* from video_files
left join aifishdata
on video_files.decrypted_path = aifishdata.video_uri
where video_files.decrypted_path is not null
and video_files.start_datetime is not null
and aifishdata.video_uri is null
and video_files.cam_name = :cam_name
order by video_files.start_datetime asc;
""")).params(
{
"cam_name": thalos_cam_name,
})
return list(results)
MAGIC_VALUE_5_MiB = 5 * 1024 * 1024
def parse_json(session: Session, decrypted_path: Path, json_out_file: Path, only_tracks=False):
with json_out_file.open() as f:
detections = [json.loads(line) for line in f]
if len(detections) == 0:
# error handling here
pass
fish_detections = list(filter(lambda d: d.get('class_name') == 'fish', detections))
if len(fish_detections) == 0:
# error handling here
if only_tracks:
return
session.execute(sa.text("""insert into aifishdata ( video_uri, output_uri,
count, detection_confidence )
values ( :decrypted_path, :json_out_file , :cnt, :mean_c)
on conflict (video_uri) do update set
output_uri = :json_out_file,
count = :cnt,
detection_confidence = :mean_c
;"""), {
"decrypted_path": str(decrypted_path.absolute()),
"json_out_file":str(json_out_file.absolute()),
"cnt": 0,
"mean_c": 0,
}
)
session.commit()
return
last_frame = max(map(lambda d: d.get('frame'), detections))
frames = []
detectionconfidences = list(filter(lambda x: x is not None, map(lambda d: d.get('object_confidence'), fish_detections)))
# = max(map(lambda detection: detection.get('object_confidence'), detections))
# trackedconfidences = []
tracks = defaultdict(list)
for d in fish_detections:
tracks[d.get('track')].append(d)
cnt = len(tracks.keys())
done_tracks = []
for track_id, detections in tracks.items():
frame_nums = list(map(lambda d: d.get('frame'), detections))
min_frame = min(frame_nums)
max_frame = max(frame_nums)
t = Track()
t.video_uri = str(decrypted_path.absolute())
t.cocoannotations_uri = str(json_out_file.absolute())
t.track_id = track_id
t.first_framenum = min_frame
t.last_framenum = max_frame
t.confidences = [0 for i in range(1 + max_frame - min_frame)]
for d in detections:
t.confidences[d.get('frame') - min_frame] = d.get('object_confidence') or 0
done_tracks.append(t)
session.add_all(done_tracks)
session.commit()
if only_tracks:
return
if len(detectionconfidences) > 0:
meandetectionconfidence = float(sum(detectionconfidences)) / float(len(detectionconfidences))
else:
meandetectionconfidence = 0
# with sessionmaker() as session:
session.execute(sa.text("""insert into aifishdata ( video_uri, output_uri,
count, detection_confidence )
values ( :decrypted_path, :json_out_file , :cnt, :mean_c)
on conflict (video_uri) do update set
output_uri = :json_out_file,
count = :cnt,
detection_confidence = :mean_c
;"""), {
"decrypted_path": str(decrypted_path.absolute()),
"json_out_file":str(json_out_file.absolute()),
"cnt":cnt,
"mean_c":meandetectionconfidence,
}
)
session.commit()
VIDEO_TOO_SMALL = 1024*1024
def enqueue(output_dir: Path, sessionmaker: SessionMaker, thalos_cam_name: str):
video_files: list[VideoFile] = []
with sessionmaker() as session:
video_files = next_videos(session, thalos_cam_name)
# print(video_files)
while len(video_files) > 0:
video_file: VideoFile = video_files.pop(0)
# print(video_file)
decrypted_path = Path(video_file.decrypted_path)
# use_reencoded = False
v_source_path = str(decrypted_path.absolute())
v_source_name = decrypted_path.name
if not decrypted_path.exists() or not decrypted_path.is_file() or decrypted_path.stat().st_size < VIDEO_TOO_SMALL:
click.echo(f"original video file {decrypted_path.name} failed basic checks. Using reencoded")
# use_reencoded = True
if video_file.reencoded_path is None:
click.echo(f"video not reencoded, skipping video")
continue
reencoded_path = Path(video_file.reencoded_path)
v_source_path = str(reencoded_path.absolute())
v_source_name = reencoded_path.name
if not reencoded_path.exists() or not reencoded_path.is_file() or reencoded_path.stat().st_size < VIDEO_TOO_SMALL:
click.echo(f"reencoded_video {reencoded_path.name} fails basic checks. skipping video")
continue
rname = v_source_name[::-1]
last_dot_index: int = rname.find('.')
if last_dot_index < 0:
json_out_file: Path = output_dir / Path(v_source_name + ".json")
else:
json_out_file: Path = output_dir / Path(v_source_name[0:-last_dot_index-1] + ".json")
aifish_processing_path = decrypted_path.parent / 'processing' / v_source_name
# decrypted_path.rename(aifish_processing_path)
# aifish_processing_path.touch()
# with aifish_processing_path.open('a') as _:
# pass
shutil.copy(v_source_path, aifish_processing_path)
with sessionmaker() as session:
session.execute(sa.text("""insert into aifishdata ( video_uri, processing_uri, output_uri, status )
values ( :video_uri, :processing_uri, :output_uri, :status )
on conflict (video_uri) DO UPDATE SET status = :status ;"""), {
"video_uri": str(decrypted_path.absolute()),
"processing_uri": str(aifish_processing_path.absolute()),
"output_uri": str(json_out_file.absolute()),
"status": "queued"
}
)
session.commit()
MAGIC_VALUE_1_MINUTE = 60
def parse(output_dir: Path, sessionmaker: SessionMaker):
# only pick files that end with .json
a = filter(lambda x: x.is_file() and x.name.endswith('.json'), output_dir.iterdir())
epoch_now = int(time.time())
# only pick files that haven't been modified in the last minute
b = filter(lambda x: x.stat().st_mtime + MAGIC_VALUE_1_MINUTE < epoch_now, a)
# get the filenames
c = map(lambda x: str(x.absolute()) , b)
found_aifish_files = list(c)
click.echo("found {} .json files".format(str(len(found_aifish_files))))
with sessionmaker() as session:
results: Query[AifishData] = session.query(AifishData).where( AifishData.status == 'queued' )
for pending_aifishdata in results:
# click.echo("found {} queued row".format(str(pending_aifishdata)))
if pending_aifishdata.output_uri in found_aifish_files:
video = Path(pending_aifishdata.video_uri)
processing = Path(pending_aifishdata.processing_uri)
output = Path(pending_aifishdata.output_uri)
otime = output.stat().st_mtime
if processing.exists():
ptime = processing.stat().st_mtime
pending_aifishdata.runtimems = (otime - ptime) * 1000.0
pending_aifishdata.status = "parsing"
session.commit()
if processing.exists():
processing.unlink()
parse_json(session, video, output)
pending_aifishdata.status = "done"
session.commit()
def errors(sessionmaker: SessionMaker):
try:
r: Response = requests.get('http://127.0.0.1:5000/errors')
click.echo("errors resp: {} body: {}".format(repr(r), repr(r.json())))
for error in r.json():
input_path = error.get('input_path')
error_message = error.get('error_message')
if error_message.startswith('Task performance mode set to SKIP'):
with sessionmaker() as session:
session.execute(sa.text("""insert into ondeckdata ( video_uri, status )
values ( :decrypted_path, :skiphalfstatus )
on conflict (video_uri) do update set
status = :skiphalfstatus
;"""), {
"decrypted_path": input_path,
"skiphalfstatus": "runningskiphalf"
}
)
session.commit()
continue
with sessionmaker() as session:
session.execute(sa.text("""insert into ondeckdata ( video_uri, cocoannotations_uri )
values ( :decrypted_path, :error_str )
on conflict (video_uri) do update set
status = 'errored', cocoannotations_uri = :error_str
;"""), {
"decrypted_path": input_path,
"error_str": "ondeck model failure. stdout, stderr: " + error_message
}
)
session.commit()
except requests.exceptions.RequestException as e:
click.echo("ondeck model errors request exception: {}".format(e))
return
LOST_TIME_BUFFER = timedelta(minutes=30)
def lost_inprogress(sessionmaker: SessionMaker, aifish_processing_dir: Path):
last_start_time_s = subprocess.run('journalctl -o short-iso -u aifish_model.service | grep systemd | grep Started | tail -n 1 | sed "s/edge.*//"', shell=True, text=True, capture_output=True)
last_start_time_dt = parser.parse(last_start_time_s.stdout)
check_these = list(filter(
lambda f: f.is_file()
and (f.name.endswith('.avi')
or f.name.endswith('.mkv'))
and datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc) + LOST_TIME_BUFFER < last_start_time_dt,
aifish_processing_dir.iterdir()
))
if len(check_these) > 0:
abs_names = list(map(lambda f: str(f.absolute()), check_these))
with sessionmaker() as session:
rows: Query = session.query(AifishData) \
.filter(AifishData.processing_uri.in_(abs_names)) \
.filter(AifishData.status == 'queued')
for lost_file in rows.all():
click.echo(f'found lost file in progress - deleting: {lost_file.processing_uri}')
Path(lost_file.processing_uri).unlink()
lost_file.status = 'errored'
session.commit()
def ensure_is_dir(p: Path):
if p is None:
return
a = str(p.absolute())
if not p.exists() or not p.is_dir():
click.echo(f"folder {a} does not exist. Creating")
try:
p.mkdir()
except:
pass
if not p.exists() or not p.is_dir():
click.echo(f"Could not create folder {a}. Exiting")
sys.exit(1)
@click.command()
@click.option('--dbname', default=flaskconfig.get('DBNAME'))
@click.option('--dbuser', default=flaskconfig.get('DBUSER'))
@click.option('--output_dir', default=flaskconfig.get('VIDEO_OUTPUT_DIR'))
@click.option('--engine', default=flaskconfig.get('ONDECK_MODEL_ENGINE'))
@click.option('--thalos_cam_name', default=flaskconfig.get('THALOS_CAM_NAME'))
@click.option('--print_queue', is_flag=True)
@click.option('--parsetesta')
@click.option('--parsetestb')
@click.option('--testlostinprogress', is_flag=True)
def main(dbname, dbuser, output_dir, engine, thalos_cam_name, print_queue, parsetesta, parsetestb, testlostinprogress):
video_output_dir = Path(output_dir)
aifish_processing_dir = video_output_dir / 'processing'
aifish_output_dir = video_output_dir / 'output'
ensure_is_dir(aifish_processing_dir)
ensure_is_dir(aifish_output_dir)
if engine:
engine = Path(engine)
sa_engine = sa.create_engine("postgresql+psycopg2://%s@/%s"%(dbuser, dbname), echo=True)
sessionmaker = SessionMaker(sa_engine)
ModelBase.metadata.create_all(sa_engine)
if parsetesta and parsetestb:
with sessionmaker() as session:
parse_json(session, Path(parsetesta), Path(parsetestb))
return
if print_queue:
with sessionmaker() as session:
video_files = next_videos(session, thalos_cam_name)
for v in video_files:
click.echo(v.decrypted_path)
return
if testlostinprogress:
lost_inprogress(sessionmaker, aifish_processing_dir)
return
def runonce_enqueue(aifish_output_dir, sessionmaker, thalos_cam_name):
enqueue(aifish_output_dir, sessionmaker, thalos_cam_name)
return schedule.CancelJob
schedule.every(1).seconds.do(runonce_enqueue, aifish_output_dir, sessionmaker, thalos_cam_name)
schedule.every(5).minutes.do(enqueue, aifish_output_dir, sessionmaker, thalos_cam_name )
def runonce_errors(sessionmaker):
errors(sessionmaker)
return schedule.CancelJob
schedule.every(1).seconds.do(runonce_errors, sessionmaker)
schedule.every(1).minutes.do(errors, sessionmaker)
def runonce_parse(aifish_output_dir, sessionmaker):
parse(aifish_output_dir, sessionmaker)
return schedule.CancelJob
schedule.every(1).seconds.do(runonce_parse, aifish_output_dir, sessionmaker)
schedule.every(1).minutes.do(parse, aifish_output_dir, sessionmaker )
# def runonce_lost_inprogress(sessionmaker, aifish_processing_dir):
# lost_inprogress(sessionmaker, aifish_processing_dir)
# return schedule.CancelJob
# schedule.every(1).seconds.do(runonce_lost_inprogress, sessionmaker, aifish_processing_dir)
# schedule.every(5).minutes.do(lost_inprogress, sessionmaker, aifish_processing_dir )
while 1:
n = schedule.idle_seconds()
if n is None:
# no more jobs
break
elif n > 0:
# sleep exactly the right amount of time
click.echo("sleeping for: {}".format(n))
time.sleep(n)
schedule.run_pending()
if __name__ == '__main__':
main()