forked from autorope/donkeycar
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbulk_trainer_alt_aug_fixed_bs.py
155 lines (117 loc) · 5.3 KB
/
bulk_trainer_alt_aug_fixed_bs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import donkeycar.train as dkt
import donkeycar.config as dkconfig
import os
import numpy as np
import pickle
from time import time
'''
This is a bulk trainer that allows me to run multiple training sessions
on my specific machine. You will need to adapt it extensively for your own
use. I'm running two GTX-1070 GPUs on a 6-core i7 Ubuntu 16.04
This is for fixed batch size and alternative Augmentation
'''
from testing_files.training_experiments.exp17b import e_def as exp_n
# on my set up I have GPUs at ID 0 and 2. This provides an index
# into 'gpu_ids' list below.
gpu_idx = 0
experiment_date = '2018-08-02_shuffled' # folder into which experiment will be stored
hist_pkl_name = 'exp17b_shuff' # name for resulting history file of experiment
num_runs = 5 # number of training sessions to run per experiment.
agidx = 1
aug_order = [[0,1],[1,0]]
# tubs of data to use
imu_tubs = ['../d2IMU/data/smoothed_imu/2018-07-08_3n_smooth_run_1',
'../d2IMU/data/smoothed_imu/2018-07-09_imu_run_2']
tub_names = ','.join(imu_tubs)
# if augmenting data, which augmentations to perform. Color Balance and Noise are
# most expensive. clean_percent is the percentage of training data that is not
# augmented per batch on average. If batch_size = 64, clean_percent 0.10 = ~6.4 samples
# are not augmented in the batch.
aug_args = {'vary_color_balance':True,'vary_sharpness':False,'vary_bright':True,
'vary_contrast':True, 'add_noise':True,'vary_sat':True,
'clean_percent':0.15}
path_for_training_config = '../d2IMU/config.py' # which config file to use as base
cfg = dkconfig.load_config(path_for_training_config)
cfg.MAX_EPOCHS = 30
# these are the NVIDIA gpu ids that show up when I run
# echo $CUDA_VISIBLE_DEVICES.
gpu_ids = ['gpu-0','gpu-2']
try:
os.stat('models')
except:
os.mkdir('models')
experiments = exp_n(tub_names)
try:
os.stat('models/' + experiment_date)
except:
os.mkdir('models/' + experiment_date)
all_history = {}
exp_counter = 0
for experiment in experiments:
exp_counter += 1
if exp_counter % 2 == gpu_idx:
continue
# reshuffle training/validation split each experiment ?
experiment['reset_split'] = True
model_base_name = experiment['model_base_name']
base_name = 'models/' + experiment_date + '/'+str(experiment['exp'])+'_'+ model_base_name
try:
os.stat(base_name)
except:
os.mkdir(base_name)
bs = 128
all_history[experiment['exp']] = {}
run = 1
for aug in aug_order[agidx]:
all_history[experiment['exp']][bs] = {}
all_history[experiment['exp']][bs]['run'] = {}
experiment['batch_size'] = bs # lets move away from using hardwired batch_size
cfg.BATCH_SIZE = bs # original project stores this a config option.
try:
os.stat(base_name+'/'+str(bs))
except:
os.mkdir(base_name+'/'+str(bs))
for i in range(1,(num_runs+1)):
# set the GPU ID so that we can run separate experiements on different GPUs and balance
# the training load.
experiment['gpu'] = gpu_ids[gpu_idx]
if (aug == 1):
# indicate on the model name that data augmentation was used.
aug_text = '_aug'
# aug_args could be moved into the exp files itself to facilitate looping
# over multiple various augmentations.
experiment['aug_args'] = aug_args
experiment['no_aug_percent'] = aug_args['clean_percent']
experiment['aug'] = True
else:
aug_text = ''
experiment['aug'] = False
model_file_name = base_name +'/'+ str(bs)+'/'+model_base_name + '_'+str(experiment['gpu'])+aug_text+'_v'+str(run)
experiment['model_name'] = model_file_name
start_time = time()
print ('Training experiment %d, %25s' % (experiment['exp'],experiment['model_base_name']))
history = dkt.multi_train(cfg, **experiment)
end_time = time()
print ('Time for Experiment',experiment['exp'],':',end_time-start_time)
hist_order = ['loss', 'val_loss', 'out_0_loss', 'out_1_loss', 'val_out_0_loss', 'val_out_1_loss']
hist_rows = []
hist_dict = {}
# create Row based history. Each row is one Epoch.
for ho in hist_order:
hist_rows.append(history.history[ho])
hist_dict[ho] = history.history[ho]
np_rows = np.asarray(hist_rows)
hist = np_rows.T
min_validation_loss_row = np.argmin(hist[:, 1])
num_epochs = len(hist)
best_epoch = hist[min_validation_loss_row, :]
min_val_loss = best_epoch[1]
data = {'history': hist_dict, 'table': hist,'exp':experiment['exp'],
'bs':bs,'gpu_id':gpu_ids[gpu_idx],'run':run,'aug':experiment['aug'],
'epochs':num_epochs,'best_epoch':best_epoch,'val_loss':min_val_loss,
'file':model_file_name,'aug_args':aug_args}
all_history[experiment['exp']][bs]['run'][run] = data
run += 1
file = open('models/' + experiment_date +'/'+hist_pkl_name+'_'+gpu_ids[gpu_idx]+'.pkl', 'wb')
pickle.dump(all_history, file)
file.close()