-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdata_utils.py
50 lines (41 loc) · 1.32 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy as np
import cv2
import os
import pandas as pd
from six.moves import cPickle
# For this problem the validation and test data provided by the concerned authority did not have labels, so the training data was split into train, test and validation sets
train_dir = '/mnt/boneage-training-dataset/'
X_train = []
y_age = []
y_gender = []
df = pd.read_csv('../train.csv')
a = df.as_matrix()
m = a.shape[0]
path = train_dir
k = 0
print 'Loading data set...'
for i in os.listdir(path):
y_age.append(df.boneage[df.id == int(i[:-4])].tolist()[0])
a = df.male[df.id == int(i[:-4])].tolist()[0]
if a:
y_gender.append(1)
else:
y_gender.append(0)
img_path = path + i
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(224,224))
x = np.asarray(img, dtype=np.uint8)
X_train.append(x)
print ''
print '100% completed loading data'
# Save data
train_pkl = open('data.pkl','wb')
cPickle.dump(X_train, train_pkl, protocol=cPickle.HIGHEST_PROTOCOL)
train_pkl.close()
train_age_pkl = open('data_age.pkl','wb')
cPickle.dump(y_age, train_age_pkl, protocol=cPickle.HIGHEST_PROTOCOL)
train_age_pkl.close()
train_gender_pkl = open('data_gender.pkl','wb')
cPickle.dump(y_gender, train_gender_pkl, protocol=cPickle.HIGHEST_PROTOCOL)
train_gender_pkl.close()