Skip to content

Commit bf2ce56

Browse files
wilrich-msftmahilleb-msft
authored andcommitted
Support for GPU/sparse
1 parent a6bc7a8 commit bf2ce56

File tree

8 files changed

+184
-119
lines changed

8 files changed

+184
-119
lines changed

Source/CNTKv2LibraryDll/CompositeFunction.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1408,7 +1408,7 @@ namespace CNTK
14081408
{
14091409
// Ensure that only a subset of this function's outputs are being asked to be evaluated
14101410
if (functionOutputs.find(outputVarValuePair.first) == functionOutputs.end())
1411-
InvalidArgument("Requested output is not an Ouptut of the Function");
1411+
InvalidArgument("Requested output is not an Output of the Function");
14121412

14131413
auto& requiredArgumentsForCurrentOutput = GetArgumentDependencies(outputVarValuePair.first);
14141414
requiredArguments.insert(requiredArgumentsForCurrentOutput.begin(), requiredArgumentsForCurrentOutput.end());

Source/Math/Matrix.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -1091,7 +1091,15 @@ Matrix<ElemType>& Matrix<ElemType>::DoGatherColumnsOf(ElemType beta, const Matri
10911091
{ m_CPUMatrix->DoGatherColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
10921092
{ m_GPUMatrix->DoGatherColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
10931093
{ m_CPUSparseMatrix->DoGatherColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
1094-
{ NOT_IMPLEMENTED; });
1094+
{
1095+
// TODO replace by more performant version directly on GPU that does not require the round-trip over CPU.
1096+
Matrix<ElemType> tempIdx(CPUDEVICE); tempIdx.AssignValuesOf(idx);
1097+
CPUSparseMatrix<ElemType> tempA(a.GetFormat(), a.GetNumRows(), a.GetNumCols(), a.m_GPUSparseMatrix->GetNumNZElements());
1098+
1099+
a.m_GPUSparseMatrix->CopyToCPUSparseMatrix(tempA);
1100+
tempA.DoGatherColumnsOf(beta, *tempIdx.m_CPUMatrix, tempA, alpha);
1101+
m_GPUSparseMatrix->SetValue(tempA);
1102+
});
10951103

10961104
return *this;
10971105
}
@@ -3621,7 +3629,7 @@ void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, con
36213629
template <class ElemType>
36223630
void Matrix<ElemType>::DecideAndMoveToRightDevice(const Matrix<ElemType>& a, const Matrix<ElemType>& b, const Matrix<ElemType>& c, const Matrix<ElemType>& d)
36233631
{
3624-
// this function is only called for one operator, so for now we keep it imple
3632+
// this function is only called for one operator, so for now we keep it simple
36253633
DecideAndMoveToRightDevice(a, b, c);
36263634
d._transferToDevice(a.GetDeviceId()); // BUGBUG: Is this correct in case a,b,c share the same preferredDevice?
36273635
}

bindings/python/cntk/io/__init__.py

+24-19
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def shape(self):
5151
def mask(self):
5252
'''
5353
The mask object of the minibatch. In it, `2` marks the beginning of a
54-
sequence, `1` marks a sequence element as valid, and `0` markse it as
54+
sequence, `1` marks a sequence element as valid, and `0` marks it as
5555
invalid.
5656
'''
5757
return self.m_data.mask().to_numpy()
@@ -296,6 +296,7 @@ class Deserializer(dict):
296296
Deserializer type Description
297297
========================== ============
298298
:class:`ImageDeserializer` Deserializer for images that uses OpenCV
299+
:class:`CTFDeserializer` Deserializer for text of the `CNTKTextReader format <https://github.com/microsoft/cntk/wiki/CNTKTextFormat-Reader>`_
299300
========================== ============
300301
301302
Args:
@@ -312,16 +313,19 @@ def __init__(self, type):
312313
class ImageDeserializer(Deserializer):
313314
'''
314315
This class configures the image reader that reads images and corresponding
315-
labels from a file of the form
316+
labels from a file of the form::
316317
317-
<full path to image><tab><numerical label (0-based class id)>
318+
<full path to image> <tab> <numerical label (0-based class id)>
319+
or::
320+
321+
sequenceId <tab> path <tab> label
318322
319323
Args:
320324
filename (str): file name of the map file that associates images to
321325
classes
322326
323327
See also:
324-
https://github.com/microsoft/cntk/wiki/Image-reader
328+
`Image reader definition <https://github.com/microsoft/cntk/wiki/Image-reader>`_
325329
'''
326330

327331
def __init__(self, filename, streams=None):
@@ -447,24 +451,22 @@ def mean(filename):
447451

448452
# TODO color transpose
449453

450-
#
451-
# CNTKTextFormatReader
452-
# TODO get away from cntk_py.text_format_minibatch_source and set it up
453-
# similarly to ImageDeserializer
454-
#
455-
456454

457-
#class TextFormatDeserializer(Deserializer): # TODO: either call it CNTKTextFormat or CTF. TextFormat is confusable with plain text
458455
class CTFDeserializer(Deserializer):
459456
'''
460-
This class configures the text reader that reads text-encoded files from a file with lines of the form
461-
[Sequence_Id](Sample)+
462-
where
463-
Sample=|Input_Name (Value )*
457+
This class configures the text reader that reads text-encoded files from a
458+
file with lines of the form::
459+
460+
[Sequence_Id](Sample)+
461+
462+
where::
463+
464+
Sample=|Input_Name (Value )*
465+
464466
Args:
465467
filename (str): file name containing the text input
466468
See also:
467-
https://github.com/Microsoft/CNTK/wiki/CNTKTextFormat-Reader
469+
`CNTKTextReader format <https://github.com/microsoft/cntk/wiki/CNTKTextFormat-Reader>`_
468470
'''
469471

470472
def __init__(self, filename, streams=None):
@@ -483,8 +485,11 @@ def map_input(self, node, dim, format="dense", alias=None):
483485
'''
484486
Maps node (either node instance or node name) to a part of the text input,
485487
either specified by the node name or the alias in the text file.
486-
Example: for node name 'Apples' an input line could look like this:
487-
|Apples 0 1 2 3 4 5 6 7 8 9
488+
489+
Example: for node name 'input0' an input line could look like this::
490+
491+
|input0 3 7 1 0 2
492+
488493
Args:
489494
node (str or input node): node or its name
490495
dim (int): specifies the dimension of the input value vector
@@ -493,7 +498,7 @@ def map_input(self, node, dim, format="dense", alias=None):
493498
format (str, default 'dense'): 'dense' or 'sparse'. Specifies the input type.
494499
alias (str, default None): None or alias name. Optional abbreviated name that
495500
is used in the text file to avoid repeating long input names. For details please
496-
see https://github.com/Microsoft/CNTK/wiki/CNTKTextFormat-Reader
501+
see `CNTKTextReader format <https://github.com/microsoft/cntk/wiki/CNTKTextFormat-Reader>`_
497502
'''
498503
if not isinstance(node, str):
499504
node = node.name()

bindings/python/cntk/learner.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,8 @@ def momentum_sgd(parameters, lr, momentum,
369369
with truncation
370370
371371
Returns:
372-
Instance of a :class:`cntk.learner.Learner` that can be passed to the :class:`cntk.trainer.Trainer`
372+
Instance of a :class:`~cntk.learner.Learner` that can be passed to the
373+
:class:`~cntk.trainer.Trainer`
373374
'''
374375
_verify_learning_rate_type(lr)
375376
_verify_momentum_type(momentum)

bindings/python/cntk/ops/__init__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def pooling(operand, pooling_type, pooling_window_shape, strides=(1,), auto_padd
338338
339339
Args:
340340
operand: pooling input
341-
pooling_type: one of :const:`cntk.ops.MAX_POOLING` or :const:`cntk.ops.AVG_POOLING`
341+
pooling_type: one of :const:`~cntk.ops.MAX_POOLING` or :const:`~cntk.ops.AVG_POOLING`
342342
pooling_window_shape: dimensions of the pooling window
343343
strides (default 1): strides.
344344
auto_padding: automatic padding flags for each input dimension.
@@ -1071,7 +1071,7 @@ def softmax(x, name=''):
10711071
'''
10721072
from cntk.cntk_py import softmax
10731073
x = sanitize_input(x)
1074-
return softmax(x)
1074+
return softmax(x, name)
10751075

10761076

10771077
@typemap
@@ -1095,7 +1095,7 @@ def hardmax(x, name=''):
10951095
'''
10961096
from cntk.cntk_py import hardmax
10971097
x = sanitize_input(x)
1098-
return hardmax(x)
1098+
return hardmax(x, name)
10991099

11001100

11011101
@typemap

bindings/python/cntk/ops/variables.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,10 @@ def __init__(self, value=None, shape=None, dtype=None, device=None, name=''):
203203
if np.isscalar(value):
204204
super().__init__(utils.sanitize_shape(shape), sanitize_dtype_cntk(dtype), value)
205205
else:
206-
ndav = sanitize_value(shape, value, dtype, device)
206+
if isinstance(value, cntk_py.Value):
207+
ndav = value.data()
208+
else:
209+
ndav = sanitize_value(shape, value, dtype, device)
207210
super().__init__(ndav, name)
208211

209212

bindings/python/cntk/tests/trainer_test.py

+98-67
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# for full license information.
55
# ==============================================================================
66

7+
import os
78
import math
89
import numpy as np
910
from .. import Function
@@ -65,6 +66,64 @@ def test_output_to_retain():
6566

6667
assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
6768

69+
def test_eval_sparse_dense(tmpdir, device_id):
70+
from cntk import Axis
71+
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
72+
from cntk.device import cpu, gpu, set_default_device
73+
from cntk.ops import input_variable, times
74+
from scipy.sparse import csr_matrix
75+
76+
input_vocab_dim = label_vocab_dim = 69
77+
78+
ctf_data = '''\
79+
0 |S0 3:1 |# <s> |S1 3:1 |# <s>
80+
0 |S0 4:1 |# A |S1 32:1 |# ~AH
81+
0 |S0 5:1 |# B |S1 36:1 |# ~B
82+
0 |S0 4:1 |# A |S1 31:1 |# ~AE
83+
0 |S0 7:1 |# D |S1 38:1 |# ~D
84+
0 |S0 12:1 |# I |S1 47:1 |# ~IY
85+
0 |S0 1:1 |# </s> |S1 1:1 |# </s>
86+
2 |S0 60:1 |# <s> |S1 3:1 |# <s>
87+
2 |S0 61:1 |# A |S1 32:1 |# ~AH
88+
'''
89+
ctf_file = str(tmpdir/'2seqtest.txt')
90+
with open(ctf_file, 'w') as f:
91+
f.write(ctf_data)
92+
93+
mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
94+
features = StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True),
95+
labels = StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True)
96+
)), randomize=False, epoch_size = 2)
97+
98+
batch_axis = Axis.default_batch_axis()
99+
input_seq_axis = Axis('inputAxis')
100+
label_seq_axis = Axis('labelAxis')
101+
102+
input_dynamic_axes = [batch_axis, input_seq_axis]
103+
raw_input = input_variable(
104+
shape=input_vocab_dim, dynamic_axes=input_dynamic_axes,
105+
name='raw_input', is_sparse=True)
106+
107+
mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100,
108+
input_map={raw_input : mbs.streams.features})
109+
110+
z = times(raw_input, np.eye(input_vocab_dim))
111+
e_reader = z.eval(mb_valid)
112+
113+
# CSR with the raw_input encoding in ctf_data
114+
one_hot_data = [
115+
[3, 4, 5, 4, 7, 12, 1],
116+
[60, 61]
117+
]
118+
data = [csr_matrix(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in
119+
one_hot_data]
120+
e_csr = z.eval({raw_input: data}, device=cntk_device(device_id))
121+
assert np.all(np.allclose(a, b) for a,b in zip(e_reader, e_csr))
122+
123+
# One-hot with the raw_input encoding in ctf_data
124+
data = one_hot(one_hot_data, num_classes=input_vocab_dim)
125+
e_hot = z.eval({raw_input: data}, device=cntk_device(device_id))
126+
assert np.all(np.allclose(a, b) for a,b in zip(e_reader, e_hot))
68127

69128
@pytest.mark.parametrize("batch_index_data", [
70129
[2,3],
@@ -73,93 +132,65 @@ def test_output_to_retain():
73132
def test_eval_sparse_no_seq(batch_index_data, device_id):
74133
dim = 10
75134
multiplier = 2
76-
in1 = input_variable(shape=(dim,), is_sparse=True)
77-
z = times(in1, np.eye(dim).astype(np.float32))
78-
z *= multiplier
79-
batch = (np.eye(dim)[batch_index_data]).astype(np.float32)
80-
expected = batch * multiplier
81-
sparse_val = csr(batch)
82-
result = z.eval({in1: sparse_val}, device=cntk_device(device_id))
83-
assert np.allclose(result, [expected])
84-
85-
@pytest.mark.parametrize("batch_index_data", [
86-
[[2,3], [0,1,6]],
87-
])
88-
def test_eval_sparse_seq_0(batch_index_data, device_id):
89-
if cntk_device(device_id)!=cpu(): # FIXME
90-
pytest.skip("sparse is not yet supported on GPU")
91-
dim = 10
92-
multiplier = 2
93-
in1 = input_variable(shape=(dim,), is_sparse=True)
94-
z = times(in1, np.eye(dim).astype(np.float32))
95-
z *= multiplier
96-
batch = [(np.eye(dim)[seq_index_data]).astype(np.float32) for
97-
seq_index_data in batch_index_data]
98-
expected = batch * multiplier
99-
sparse_val = [csr(seq) for seq in batch]
100-
result = z.eval({in1: sparse_val}, device=cntk_device(device_id))
101-
assert np.all(np.allclose(a,b) \
102-
for a,b in zip(result, expected))
135+
for var_is_sparse in [True, False]:
136+
in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse)
137+
z = times(in1, multiplier*np.eye(dim))
138+
batch = np.eye(dim)[batch_index_data]
139+
expected = batch * multiplier
140+
sparse_val = csr(batch)
141+
result = z.eval({in1: sparse_val}, device=cntk_device(device_id))
142+
assert np.allclose(result, [expected])
103143

104144
@pytest.mark.parametrize("batch", [
105-
#[[csr([0,1,2,0])]],
106-
[
107-
[csr([0, 2, 0, 7]), csr([10, 20, 0, 0])],
108-
[csr([0, 0, 0, 3])]
145+
[[csr([0,1,2,0])]],
146+
[
147+
[csr([0, 2, 0, 7]), csr([10, 20, 0, 0])],
148+
[csr([0, 0, 0, 3])]
149+
],
150+
# same as before, but sequence being encoded as one matrix
151+
[
152+
csr([[0, 2, 0, 7], [10, 20, 0, 0]]),
153+
csr([0, 0, 0, 3])
109154
]
110-
])
155+
])
111156
def test_eval_sparse_seq_1(batch, device_id):
112-
if cntk_device(device_id)!=cpu(): # FIXME
113-
pytest.skip("sparse is not yet supported on GPU")
114157
dim = 4
115158
multiplier = 2
116-
# FIXME
117-
in1 = input_variable(shape=(dim,), is_sparse=True)
118-
# in1 = input_variable(shape=(dim,))
119-
z = times(in1, multiplier*np.eye(dim))#np.eye(dim).astype(np.float32))
120-
121-
expected = [[m.todense() * multiplier for m in seq] for seq in batch]
122-
123-
result = z.eval({in1: batch}, device=cntk_device(device_id))
159+
for var_is_sparse in [True, False]:
160+
in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse)
161+
z = times(in1, multiplier*np.eye(dim))
162+
expected = [[m.todense() * multiplier for m in seq] for seq in batch]
163+
result = z.eval({in1: batch}, device=cntk_device(device_id))
124164

125-
assert np.all(np.allclose(a,b) \
126-
for a,b in zip(result, expected))
165+
assert np.all(np.allclose(a,b) \
166+
for a,b in zip(result, expected))
127167

128168

129169
@pytest.mark.parametrize("one_hot_batch", [
130170
([[2,5],
131171
[0,1,6]]),
132-
([[1],
133-
[1],[2],[3]]),
172+
([[1],[1],[2],[3]]),
173+
([[1,5],
174+
[4]]),
134175
])
135176
def test_eval_one_hot_seq(one_hot_batch, device_id):
136-
if cntk_device(device_id)!=cpu(): # FIXME
137-
pytest.skip("sparse is not yet supported on GPU")
138177
dim = 10
139178
multiplier = 2
140-
# FIXME
141-
# in1 = input_variable(shape=(dim,), is_sparse=True)
142-
in1 = input_variable(shape=(dim,))
143-
# Convert CNTK node value to dense so that we can compare it later
144-
z = times(in1, np.eye(dim).astype(np.float32))
145-
z *= multiplier
146-
# Convert expectation to dense
147-
expected = [np.eye(dim)[seq]*multiplier for seq in one_hot_batch]
148-
batch = one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id))
149-
assert np.all(np.allclose(a,b) \
150-
for a,b in zip(z.eval({in1: batch}, device=cntk_device(device_id)), expected))
179+
for var_is_sparse in [True, False]:
180+
in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse)
181+
# Convert CNTK node value to dense so that we can compare it later
182+
z = times(in1, np.eye(dim)*multiplier)
183+
# Convert expectation to dense
184+
expected = [np.eye(dim)[seq]*multiplier for seq in one_hot_batch]
185+
batch = one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id))
186+
assert np.all(np.allclose(a,b) \
187+
for a,b in zip(z.eval({in1: batch}, device=cntk_device(device_id)), expected))
151188

152189
@pytest.mark.parametrize("one_hot_batch, dim", [
153190
([[11]], 10),
154191
([[0, 1]], 1),
155192
])
156-
# FIXME
157-
def _test_eval_one_hot_bad(one_hot_batch, dim, device_id):
158-
in1 = input_variable(shape=dim)
159-
# Convert CNTK node value to dense so that we can compare it later
160-
z = times(in1, np.eye(dim).astype(np.float32))
161-
# Convert expectation to dense
162-
batch = one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id))
193+
def test_eval_one_hot_bad(one_hot_batch, dim, device_id):
163194
with pytest.raises(ValueError):
164-
z.eval({in1: batch})
195+
batch = one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id))
165196

0 commit comments

Comments
 (0)