-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathutil.py
181 lines (160 loc) · 5.9 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import theano
import theano.tensor as T
import numpy as np
import pickle
import hashlib
import json
import enum
import inspect
import os
import itertools
import collections
EPSILON = np.array(1e-8, np.float32)
def identity(x):
return x
def init_params(shape, stddev=0.1, shift=0.0):
"""Get an initial value for a parameter"""
return np.float32(np.random.normal(shift, stddev, shape))
def do_layer(activation, ipt, weights, biases):
"""
Perform a layer operation, i.e. out = activ( xW + b )
activation: An activation function
ipt: Tensor of shape (n_batch, X)
weights: Tensor of shape (X, Y)
biases: Tensor of shape (Y)
Returns: Tensor of shape (n_batch, Y)
"""
xW = T.dot(ipt, weights)
b = T.shape_padleft(biases)
return activation( xW + b )
def broadcast_concat(tensors, axis):
"""
Broadcast tensors together, then concatenate along axis
"""
ndim = tensors[0].ndim
assert all(t.ndim == ndim for t in tensors), "ndims don't match for broadcast_concat: {}".format(tensors)
broadcast_shapes = []
for i in range(ndim):
if i == axis:
broadcast_shapes.append(1)
else:
dim_size = next((t.shape[i] for t in tensors if not t.broadcastable[i]), 1)
broadcast_shapes.append(dim_size)
broadcasted_tensors = []
for t in tensors:
tile_reps = [bshape if t.broadcastable[i] else 1 for i,bshape in enumerate(broadcast_shapes)]
if all(rep is 1 for rep in tile_reps):
# Don't need to broadcast this tensor
broadcasted_tensors.append(t)
else:
broadcasted_tensors.append(T.tile(t, tile_reps))
return T.concatenate(broadcasted_tensors, axis)
def pad_to(tensor, shape):
"""
Pads tensor to shape with zeros
"""
current = tensor
for i in range(len(shape)):
padding = T.zeros([(fs-ts if i==j else fs if j<i else ts) for j,(ts,fs) in enumerate(zip(tensor.shape, shape))])
current = T.concatenate([current, padding], i)
return current
def save_params(params, file):
"""
Save params into a pickle file
"""
values = [param.get_value() for param in params]
pickle.dump(values, file)
def load_params(params, file):
"""
Load params from a pickle file
"""
values = pickle.load(file)
for param,value in zip(params, values):
try:
param.set_value(value)
except:
param.set_value(value.get_value())
def set_params(params, saved_params):
"""
Copies saved_params into params (both must be theano shared variables)
"""
for param,saved in zip(params, saved_params):
param.set_value(saved.get_value())
def reduce_log_sum(tensor, axis=None, guaranteed_finite=False):
"""
Sum probabilities in the log domain, i.e return
log(e^vec[0] + e^vec[1] + ...)
= log(e^x e^(vec[0]-x) + e^x e^(vec[1]-x) + ...)
= log(e^x [e^(vec[0]-x) + e^(vec[1]-x) + ...])
= log(e^x) + log(e^(vec[0]-x) + e^(vec[1]-x) + ...)
= x + log(e^(vec[0]-x) + e^(vec[1]-x) + ...)
For numerical stability, we choose x = max(vec)
Note that if x is -inf, that means all values are -inf,
so the answer should be -inf. In this case, choose x = 0
"""
maxval = T.max(tensor, axis)
maxval_full = T.max(tensor, axis, keepdims=True)
if not guaranteed_finite:
maxval = T.switch(T.isfinite(maxval), maxval, T.zeros_like(maxval))
maxval_full = T.switch(T.isfinite(maxval_full), maxval_full, T.zeros_like(maxval_full))
reduced_sum = T.sum(T.exp(tensor - maxval_full), axis)
logsum = maxval + T.log(reduced_sum)
return logsum
def shape_padaxes(tensor, axes):
for axis in axes:
tensor = T.shape_padaxis(tensor, axis)
return tensor
idx_map = collections.defaultdict(lambda:0)
def get_unique_name(cls):
name = "{}{}".format(cls.__name__, idx_map[cls])
idx_map[cls] += 1
return name
def independent_best(tensor):
"""
tensor should be a tensor of probabilities
Return a new tensor of maximum likelihood, i.e. 1 in each position if p>0.5,
else 0
"""
return T.cast(T.ge(tensor, 0.5), 'floatX')
def categorical_best(tensor):
"""
tensor should be a tensor of shape (..., categories)
Return a new tensor of the same shape but one-hot at position of best category
"""
flat_tensor = tensor.reshape([-1, tensor.shape[-1]])
argmax_posns = T.argmax(flat_tensor, 1)
flat_snapped = T.zeros_like(flat_tensor)
flat_snapped = T.set_subtensor(flat_snapped[T.arange(flat_tensor.shape[0]), argmax_posns], 1.0)
snapped = flat_snapped.reshape(tensor.shape)
return snapped
def make_dropout_mask(shape, keep_frac, srng):
return T.shape_padleft(T.cast(srng.binomial(shape, p=keep_frac), 'float32') / keep_frac)
def apply_dropout(ipt, dropout):
return ipt * dropout
def object_hash(thing):
class EnumEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, enum.Enum):
return obj.name
return super().default(obj)
strform = json.dumps(thing, sort_keys=True, cls=EnumEncoder)
h = hashlib.sha1()
h.update(strform.encode('utf-8'))
return h.hexdigest()
def get_compatible_kwargs(function, kwargs):
kwargs = dict(kwargs)
sig = inspect.signature(function)
for param in sig.parameters.values():
if param.name not in kwargs:
if param.default is inspect.Parameter.empty:
raise TypeError("kwargs missing required argument '{}'".format(param.name))
else:
kwargs[param.name] = param.default
return kwargs
def find_recent_params(outputdir):
files_list = list(os.listdir(outputdir))
numbers = [int(x[6:-2]) for x in files_list if x[:6]=="params" and x[-2:]==".p"]
if len(numbers) == 0:
return None
most_recent = max(numbers)
return most_recent, os.path.join(outputdir,"params{}.p".format(most_recent))