-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathextract_dataset.py
90 lines (72 loc) · 2.93 KB
/
extract_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
""" data_processing.py
Recover image data from video dataset and convert all other data to Numpy.
"""
import os
import sys
import glob
import numpy as np
import cv2
from compile_labels import load_actions
# The dataset is available in data/ directory from repository root.
MINERL_DATA_ROOT = os.getenv('MINERL_DATA_ROOT', 'data/')
all_images_list = []
all_actions_np = None
# Find all MineRLBasalt tasks
tasks = glob.glob(os.path.join(MINERL_DATA_ROOT, 'MineRLBasalt*'))
# For each task, find all dataset
for task in tasks:
del all_images_list
del all_actions_np
all_images_list = []
all_actions_np = None
print(f'Extract all data from {task}')
dataset_addrs = glob.glob(os.path.join(task, '*'))
# Delete previous data
os.system(f"rm -rf {os.path.join(task, 'images.npy')}")
os.system(f"rm -rf {os.path.join(task, 'actions.npy')}")
# Process data of each dataset
for dataset_addr in dataset_addrs:
# Load all demonstrated actions (not compiled files)
if dataset_addr[-3:] != 'npy':
print(f' Extracting data from {dataset_addr}')
try:
# Load image data
cap = cv2.VideoCapture(os.path.join(dataset_addr, 'recording.mp4'))
frame_counter = 0
# Load action data
actions = load_actions(
labels_dataset_addr=None,
relative_label_addr=None,
dataset_addr=dataset_addr)
if all_actions_np is None:
# First array of actions
all_actions_np = actions
else:
# Not the first, stack with previous
all_actions_np = np.vstack((all_actions_np, actions))
# Read until video is completed
while(cap.isOpened()):
# Capture frame-by-frame
ret, frame = cap.read()
if ret == True:
# Store frame
all_images_list.append(frame)
# Next frame
frame_counter += 1
# Break the loop
else:
break
except:
print(f' Invalid dataset (no video data)')
# Save all images and all labels to disk
all_images_np = np.array(all_images_list, dtype = np.float32)
# convert images to 0-1 and fix rbg order
all_images_np = all_images_np / 255.0
all_images_np = all_images_np[:,:,:,[2,1,0]]
print(f' [*] Extracted {all_images_np.shape[0]} images for task {task}')
with open(f'{task}/images.npy', 'wb') as f:
np.save(f, all_images_np)
del all_images_np
print(f' [*] Extracted {all_actions_np.shape[0]} actions for task {task}')
with open(f'{task}/actions.npy', 'wb') as f:
np.save(f, all_actions_np)