12
12
13
13
MODEL_PATH = '/models'
14
14
HANDLE_LENGTH = 10
15
- keras_cache = {}
15
+ keras_cache = {} # Local thread memory cache.
16
16
17
17
def clear_thread_cache ():
18
18
keras_cache = {}
19
19
20
20
def handle2path (handle ):
21
+ """ Translates a handle to a full path on the FS. """
21
22
return config .ROOT_PATH + MODEL_PATH + "/" + handle
22
23
23
24
def path2handle (path ):
25
+ """ Translates a full path to a handle. """
24
26
return path .split ('/' )[- 1 ]
25
27
26
28
def new_model ():
29
+ """ Construcs an empty Model assiging it a new random hex ID and persiting it to disk.
30
+ Returns: The Model instance.
31
+ """
27
32
filename = random_hex ()
28
33
while os .path .isfile (filename ):
29
34
filename = random_hex ()
@@ -38,24 +43,34 @@ def new_model():
38
43
return model
39
44
40
45
def save_model (model ):
46
+ """ Saves the given model to disk. """
41
47
with open (model .model_path , 'w+' ) as f :
42
48
f .write (model .to_json ())
43
49
config .get_mc ().set (path2handle (model .model_path ), model .to_json ())
44
50
45
51
def get_model (handle ):
52
+ """ Fetches the model from memory or disk with a matching handle.
53
+ Returns: The Model instance if the model is found, None otherwise.
54
+ """
46
55
mem_try = config .get_mc ().get (handle )
47
56
if mem_try :
48
57
m = Model ()
49
58
m .from_json (mem_try )
50
59
return m
51
60
model_path = config .ROOT_PATH + MODEL_PATH + "/" + handle
52
- with open (model_path , "r" ) as f :
53
- model = Model ()
54
- model .from_json (f .read ())
55
- config .get_mc ().set (handle , model .to_json ())
56
- return model
61
+ try :
62
+ with open (model_path , "r" ) as f :
63
+ model = Model ()
64
+ model .from_json (f .read ())
65
+ config .get_mc ().set (handle , model .to_json ())
66
+ return model
67
+ except :
68
+ return None
57
69
58
70
def parse_val (value ):
71
+ """ Infers the type of the value by trying to parse it to different formats.
72
+ Returns: The parse value and the type.
73
+ """
59
74
if not value :
60
75
return value , None
61
76
tests = [
@@ -83,6 +98,8 @@ def parse_val(value):
83
98
return value .decode ('utf-8' , 'ignore' ), 'str'
84
99
85
100
def persist_keras_model (handle , model ):
101
+ """ Persists a keras model to disk.
102
+ """
86
103
model_dir = config .ROOT_PATH + MODEL_PATH
87
104
88
105
# Clear first all previously persisted models.
@@ -94,6 +111,9 @@ def persist_keras_model(handle, model):
94
111
model .save (os .path .join (model_dir , name ))
95
112
96
113
def _load_keras_model (handle ):
114
+ """ Loads a keras model from disk.
115
+ Returns: The keras model instance if found, None otherwise.
116
+ """
97
117
name = handle + '_keras'
98
118
print 'load ' + name
99
119
model_dir = config .ROOT_PATH + MODEL_PATH
@@ -106,6 +126,9 @@ def _load_keras_model(handle):
106
126
return model
107
127
108
128
def load_keras_model (handle ):
129
+ """ Loads a keras model from cache or disk.
130
+ Returns: The keras model instance.
131
+ """
109
132
if handle in keras_cache :
110
133
print 'From thread cache'
111
134
return keras_cache [handle ]
@@ -114,12 +137,20 @@ def load_keras_model(handle):
114
137
return model
115
138
116
139
def delete_model (handle ):
140
+ """ Deletes all models with the given handle if found. """
117
141
model_dir = config .ROOT_PATH + MODEL_PATH
118
142
for f in os .listdir (model_dir ):
119
143
if re .search (handle + ".*" , f ):
120
144
os .remove (os .path .join (model_dir , f ))
145
+ config .get_mc ().delete (handle )
146
+
121
147
122
148
def load_csvs (file_list ):
149
+ """ Loads csv from files and returns the parsed value dictionary.
150
+ Params: The list of files.
151
+ Returns: Three dictionaries. The first is feature-name -> value_list, the second one feature_name -> type and the
152
+ third one feature_name -> [min_value, max_value] if applies.
153
+ """
123
154
print 'File of csvs to load ' + unicode (file_list )
124
155
data = {}
125
156
types = {}
@@ -129,7 +160,7 @@ def load_csvs(file_list):
129
160
reader = csv .reader (read_f )
130
161
headers = []
131
162
for row in reader :
132
- if not headers :
163
+ if not headers : # If first row, load the headers assuming they are contained in the first row.
133
164
headers = row
134
165
output_headers = 0
135
166
for h in headers :
@@ -138,7 +169,7 @@ def load_csvs(file_list):
138
169
output_headers += 1 if h .startswith ('output_' ) else 0
139
170
if not output_headers :
140
171
return 'No outputs defined in CSV. Please define columns as outputs by preppending \' output_\' .' , ''
141
- else :
172
+ else : # If not first row, parse values assuming the headers dictionary has been already filled.
142
173
for idx , value in enumerate (row ):
143
174
val , typ = parse_val (value )
144
175
data [headers [idx ]].append (val )
@@ -147,22 +178,25 @@ def load_csvs(file_list):
147
178
types [headers [idx ]] = typ
148
179
else :
149
180
print 'WARN: CSV %s not found' % f
181
+
150
182
# Fix '' values, and standardize formats.
151
183
for header , column in data .iteritems ():
152
184
for idx , value in enumerate (column ):
153
185
if not value :
154
186
data [header ][idx ] = 0 if types [header ] != 'str' else ''
155
187
else :
156
188
data [header ][idx ] = unicode (data [header ][idx ]) if types [header ] == 'str' else data [header ][idx ]
189
+
157
190
# Normalize numeric inputs to -1 to 1.
158
191
norms = {}
159
-
160
192
for header , column in data .iteritems ():
161
193
if types [header ] != 'str' :
162
194
floor = float (min (column ))
163
195
ceil = float (max (column ))
164
196
norms [header ] = (floor , ceil )
165
197
data [header ] = [(x - floor )/ (ceil - floor ) for x in column ]
198
+
199
+ # Run some last verifications so that all features have the same amount of rows.
166
200
length = 0
167
201
for header , column in data .iteritems ():
168
202
if not length :
@@ -173,6 +207,7 @@ def load_csvs(file_list):
173
207
return data , types , norms
174
208
175
209
def random_hex ():
210
+ """ Creates a random hex string ID """
176
211
ran = random .randrange (16 ** HANDLE_LENGTH )
177
212
return "%010x" % ran
178
213
0 commit comments