4
4
# for full license information.
5
5
# ==============================================================================
6
6
7
+ import os
7
8
import math
8
9
import numpy as np
9
10
from .. import Function
@@ -65,6 +66,64 @@ def test_output_to_retain():
65
66
66
67
assert np .allclose (var_map [z_output ], np .asarray (in1_value )+ 20 )
67
68
69
+ def test_eval_sparse_dense (tmpdir , device_id ):
70
+ from cntk import Axis
71
+ from cntk .io import MinibatchSource , CTFDeserializer , StreamDef , StreamDefs
72
+ from cntk .device import cpu , gpu , set_default_device
73
+ from cntk .ops import input_variable , times
74
+ from scipy .sparse import csr_matrix
75
+
76
+ input_vocab_dim = label_vocab_dim = 69
77
+
78
+ ctf_data = '''\
79
+ 0 |S0 3:1 |# <s> |S1 3:1 |# <s>
80
+ 0 |S0 4:1 |# A |S1 32:1 |# ~AH
81
+ 0 |S0 5:1 |# B |S1 36:1 |# ~B
82
+ 0 |S0 4:1 |# A |S1 31:1 |# ~AE
83
+ 0 |S0 7:1 |# D |S1 38:1 |# ~D
84
+ 0 |S0 12:1 |# I |S1 47:1 |# ~IY
85
+ 0 |S0 1:1 |# </s> |S1 1:1 |# </s>
86
+ 2 |S0 60:1 |# <s> |S1 3:1 |# <s>
87
+ 2 |S0 61:1 |# A |S1 32:1 |# ~AH
88
+ '''
89
+ ctf_file = str (tmpdir / '2seqtest.txt' )
90
+ with open (ctf_file , 'w' ) as f :
91
+ f .write (ctf_data )
92
+
93
+ mbs = MinibatchSource (CTFDeserializer (ctf_file , StreamDefs (
94
+ features = StreamDef (field = 'S0' , shape = input_vocab_dim , is_sparse = True ),
95
+ labels = StreamDef (field = 'S1' , shape = label_vocab_dim , is_sparse = True )
96
+ )), randomize = False , epoch_size = 2 )
97
+
98
+ batch_axis = Axis .default_batch_axis ()
99
+ input_seq_axis = Axis ('inputAxis' )
100
+ label_seq_axis = Axis ('labelAxis' )
101
+
102
+ input_dynamic_axes = [batch_axis , input_seq_axis ]
103
+ raw_input = input_variable (
104
+ shape = input_vocab_dim , dynamic_axes = input_dynamic_axes ,
105
+ name = 'raw_input' , is_sparse = True )
106
+
107
+ mb_valid = mbs .next_minibatch (minibatch_size_in_samples = 100 ,
108
+ input_map = {raw_input : mbs .streams .features })
109
+
110
+ z = times (raw_input , np .eye (input_vocab_dim ))
111
+ e_reader = z .eval (mb_valid )
112
+
113
+ # CSR with the raw_input encoding in ctf_data
114
+ one_hot_data = [
115
+ [3 , 4 , 5 , 4 , 7 , 12 , 1 ],
116
+ [60 , 61 ]
117
+ ]
118
+ data = [csr_matrix (np .eye (input_vocab_dim , dtype = np .float32 )[d ]) for d in
119
+ one_hot_data ]
120
+ e_csr = z .eval ({raw_input : data }, device = cntk_device (device_id ))
121
+ assert np .all (np .allclose (a , b ) for a ,b in zip (e_reader , e_csr ))
122
+
123
+ # One-hot with the raw_input encoding in ctf_data
124
+ data = one_hot (one_hot_data , num_classes = input_vocab_dim )
125
+ e_hot = z .eval ({raw_input : data }, device = cntk_device (device_id ))
126
+ assert np .all (np .allclose (a , b ) for a ,b in zip (e_reader , e_hot ))
68
127
69
128
@pytest .mark .parametrize ("batch_index_data" , [
70
129
[2 ,3 ],
@@ -73,93 +132,65 @@ def test_output_to_retain():
73
132
def test_eval_sparse_no_seq (batch_index_data , device_id ):
74
133
dim = 10
75
134
multiplier = 2
76
- in1 = input_variable (shape = (dim ,), is_sparse = True )
77
- z = times (in1 , np .eye (dim ).astype (np .float32 ))
78
- z *= multiplier
79
- batch = (np .eye (dim )[batch_index_data ]).astype (np .float32 )
80
- expected = batch * multiplier
81
- sparse_val = csr (batch )
82
- result = z .eval ({in1 : sparse_val }, device = cntk_device (device_id ))
83
- assert np .allclose (result , [expected ])
84
-
85
- @pytest .mark .parametrize ("batch_index_data" , [
86
- [[2 ,3 ], [0 ,1 ,6 ]],
87
- ])
88
- def test_eval_sparse_seq_0 (batch_index_data , device_id ):
89
- if cntk_device (device_id )!= cpu (): # FIXME
90
- pytest .skip ("sparse is not yet supported on GPU" )
91
- dim = 10
92
- multiplier = 2
93
- in1 = input_variable (shape = (dim ,), is_sparse = True )
94
- z = times (in1 , np .eye (dim ).astype (np .float32 ))
95
- z *= multiplier
96
- batch = [(np .eye (dim )[seq_index_data ]).astype (np .float32 ) for
97
- seq_index_data in batch_index_data ]
98
- expected = batch * multiplier
99
- sparse_val = [csr (seq ) for seq in batch ]
100
- result = z .eval ({in1 : sparse_val }, device = cntk_device (device_id ))
101
- assert np .all (np .allclose (a ,b ) \
102
- for a ,b in zip (result , expected ))
135
+ for var_is_sparse in [True , False ]:
136
+ in1 = input_variable (shape = (dim ,), is_sparse = var_is_sparse )
137
+ z = times (in1 , multiplier * np .eye (dim ))
138
+ batch = np .eye (dim )[batch_index_data ]
139
+ expected = batch * multiplier
140
+ sparse_val = csr (batch )
141
+ result = z .eval ({in1 : sparse_val }, device = cntk_device (device_id ))
142
+ assert np .allclose (result , [expected ])
103
143
104
144
@pytest .mark .parametrize ("batch" , [
105
- #[[csr([0,1,2,0])]],
106
- [
107
- [csr ([0 , 2 , 0 , 7 ]), csr ([10 , 20 , 0 , 0 ])],
108
- [csr ([0 , 0 , 0 , 3 ])]
145
+ [[csr ([0 ,1 ,2 ,0 ])]],
146
+ [
147
+ [csr ([0 , 2 , 0 , 7 ]), csr ([10 , 20 , 0 , 0 ])],
148
+ [csr ([0 , 0 , 0 , 3 ])]
149
+ ],
150
+ # same as before, but sequence being encoded as one matrix
151
+ [
152
+ csr ([[0 , 2 , 0 , 7 ], [10 , 20 , 0 , 0 ]]),
153
+ csr ([0 , 0 , 0 , 3 ])
109
154
]
110
- ])
155
+ ])
111
156
def test_eval_sparse_seq_1 (batch , device_id ):
112
- if cntk_device (device_id )!= cpu (): # FIXME
113
- pytest .skip ("sparse is not yet supported on GPU" )
114
157
dim = 4
115
158
multiplier = 2
116
- # FIXME
117
- in1 = input_variable (shape = (dim ,), is_sparse = True )
118
- # in1 = input_variable(shape=(dim,))
119
- z = times (in1 , multiplier * np .eye (dim ))#np.eye(dim).astype(np.float32))
120
-
121
- expected = [[m .todense () * multiplier for m in seq ] for seq in batch ]
122
-
123
- result = z .eval ({in1 : batch }, device = cntk_device (device_id ))
159
+ for var_is_sparse in [True , False ]:
160
+ in1 = input_variable (shape = (dim ,), is_sparse = var_is_sparse )
161
+ z = times (in1 , multiplier * np .eye (dim ))
162
+ expected = [[m .todense () * multiplier for m in seq ] for seq in batch ]
163
+ result = z .eval ({in1 : batch }, device = cntk_device (device_id ))
124
164
125
- assert np .all (np .allclose (a ,b ) \
126
- for a ,b in zip (result , expected ))
165
+ assert np .all (np .allclose (a ,b ) \
166
+ for a ,b in zip (result , expected ))
127
167
128
168
129
169
@pytest .mark .parametrize ("one_hot_batch" , [
130
170
([[2 ,5 ],
131
171
[0 ,1 ,6 ]]),
132
- ([[1 ],
133
- [1 ],[2 ],[3 ]]),
172
+ ([[1 ],[1 ],[2 ],[3 ]]),
173
+ ([[1 ,5 ],
174
+ [4 ]]),
134
175
])
135
176
def test_eval_one_hot_seq (one_hot_batch , device_id ):
136
- if cntk_device (device_id )!= cpu (): # FIXME
137
- pytest .skip ("sparse is not yet supported on GPU" )
138
177
dim = 10
139
178
multiplier = 2
140
- # FIXME
141
- # in1 = input_variable(shape=(dim,), is_sparse=True)
142
- in1 = input_variable (shape = (dim ,))
143
- # Convert CNTK node value to dense so that we can compare it later
144
- z = times (in1 , np .eye (dim ).astype (np .float32 ))
145
- z *= multiplier
146
- # Convert expectation to dense
147
- expected = [np .eye (dim )[seq ]* multiplier for seq in one_hot_batch ]
148
- batch = one_hot (one_hot_batch , num_classes = dim , device = cntk_device (device_id ))
149
- assert np .all (np .allclose (a ,b ) \
150
- for a ,b in zip (z .eval ({in1 : batch }, device = cntk_device (device_id )), expected ))
179
+ for var_is_sparse in [True , False ]:
180
+ in1 = input_variable (shape = (dim ,), is_sparse = var_is_sparse )
181
+ # Convert CNTK node value to dense so that we can compare it later
182
+ z = times (in1 , np .eye (dim )* multiplier )
183
+ # Convert expectation to dense
184
+ expected = [np .eye (dim )[seq ]* multiplier for seq in one_hot_batch ]
185
+ batch = one_hot (one_hot_batch , num_classes = dim , device = cntk_device (device_id ))
186
+ assert np .all (np .allclose (a ,b ) \
187
+ for a ,b in zip (z .eval ({in1 : batch }, device = cntk_device (device_id )), expected ))
151
188
152
189
@pytest .mark .parametrize ("one_hot_batch, dim" , [
153
190
([[11 ]], 10 ),
154
191
([[0 , 1 ]], 1 ),
155
192
])
156
- # FIXME
157
- def _test_eval_one_hot_bad (one_hot_batch , dim , device_id ):
158
- in1 = input_variable (shape = dim )
159
- # Convert CNTK node value to dense so that we can compare it later
160
- z = times (in1 , np .eye (dim ).astype (np .float32 ))
161
- # Convert expectation to dense
162
- batch = one_hot (one_hot_batch , num_classes = dim , device = cntk_device (device_id ))
193
+ def test_eval_one_hot_bad (one_hot_batch , dim , device_id ):
163
194
with pytest .raises (ValueError ):
164
- z . eval ({ in1 : batch } )
195
+ batch = one_hot ( one_hot_batch , num_classes = dim , device = cntk_device ( device_id ) )
165
196
0 commit comments