liuliu
diff --git a/‎lib/nnc/ccv_cnnp_model_addons.c
+13-8 b/‎lib/nnc/ccv_cnnp_model_addons.c
+13-8
diff --git a/‎lib/nnc/ccv_nnc_cmd.c
+16-12 b/‎lib/nnc/ccv_nnc_cmd.c
+16-12
diff --git a/‎lib/nnc/ccv_nnc_easy.h
+3-3 b/‎lib/nnc/ccv_nnc_easy.h
+3-3
diff --git a/‎lib/nnc/ccv_nnc_internal.h
+6-2 b/‎lib/nnc/ccv_nnc_internal.h
+6-2
@@ -1102,16 +1102,18 @@ static void _ccv_cnnp_convolution_build(ccv_cnnp_model_t* const super, ccv_nnc_s
 	assert(output_size == 1);
 	const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
 	int i;
-	const int nd = CCV_NNC_MAX_DIM + 2;
+	const int k_nd = ccv_nnc_tensor_nd(self->kdim);
+	const int nd = k_nd + 2;
 	ccv_nnc_tensor_param_t weights_params = params;
 	if (self->format)
 		weights_params.format = self->format;
 	ccv_nnc_tensor_set_n(&weights_params, self->filters);
-	assert(ccv_nnc_tensor_get_c(params) % self->groups == 0);
-	ccv_nnc_tensor_set_c(&weights_params, nd, ccv_nnc_tensor_get_c(params) / self->groups);
+	const int c = ccv_nnc_tensor_get_c(params);
+	assert(c % self->groups == 0);
+	ccv_nnc_tensor_set_c(&weights_params, nd, c / self->groups);
 	const int hw = ccv_nnc_tensor_hw(weights_params, nd);
 	assert(hw >= 0);
-	for (i = 0; i < CCV_NNC_MAX_DIM; i++)
+	for (i = 0; i < k_nd; i++)
 		weights_params.dim[i + hw] = self->kdim[i];
 	if (!self->weights.graph)
 		self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
@@ -1122,12 +1124,13 @@ static void _ccv_cnnp_convolution_build(ccv_cnnp_model_t* const super, ccv_nnc_s
 	memset(bias_params.dim, 0, sizeof(bias_params.dim));
 	bias_params.dim[0] = self->filters;
 	ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(self->groups, self->filters);
-	for (i = 0; i < CCV_NNC_MAX_DIM; i++)
+	for (i = 0; i < k_nd; i++)
 		cmd.info.size.dim[i] = self->kdim[i];
+	cmd.info.size.dim[k_nd] = c;
 	memcpy(cmd.info.convolution.dilation, self->dilation, sizeof(self->dilation));
 	ccv_nnc_tensor_param_t output_params;
 	// Dilate weight size based on the dilation factor.
-	for (i = 0; i < CCV_NNC_MAX_DIM; i++)
+	for (i = 0; i < k_nd; i++)
 		weights_params.dim[i + hw] = (self->kdim[i] - 1) * ccv_max(self->dilation[i], 1) + 1;
 	ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
 			params,
@@ -1235,8 +1238,9 @@ static void _ccv_cnnp_convolution_transpose_build(ccv_cnnp_model_t* const super,
 	ccv_nnc_tensor_param_t weights_params = params;
 	if (self->format)
 		weights_params.format = self->format;
-	ccv_nnc_tensor_set_n(&weights_params, ccv_nnc_tensor_get_c(params));
-	assert(ccv_nnc_tensor_get_c(params) % self->groups == 0);
+	const int c = ccv_nnc_tensor_get_c(params);
+	ccv_nnc_tensor_set_n(&weights_params, c);
+	assert(c % self->groups == 0);
 	ccv_nnc_tensor_set_c(&weights_params, nd, self->filters / self->groups);
 	const int hw = ccv_nnc_tensor_hw(weights_params, nd);
 	assert(hw >= 0);
@@ -1253,6 +1257,7 @@ static void _ccv_cnnp_convolution_transpose_build(ccv_cnnp_model_t* const super,
 	ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(self->groups, self->filters, self->output_padding);
 	for (i = 0; i < CCV_NNC_MAX_DIM; i++)
 		cmd.info.size.dim[i] = self->kdim[i];
+	cmd.info.size.dim[CCV_NNC_MAX_DIM] = c;
 	memcpy(cmd.info.convolution_transpose.dilation, self->dilation, sizeof(self->dilation));
 	ccv_nnc_tensor_param_t output_params;
 	// Dilate weight size based on the dilation factor.
 
@@ -155,19 +155,21 @@ int ccv_nnc_hint_verify(const ccv_nnc_hint_t hint, const ccv_nnc_cmd_param_t cmd
 	int i;
 	assert(a.format == b.format);
 	const int nd = ccv_nnc_tensor_nd(a.dim);
-	assert(nd == CCV_NNC_MAX_DIM + 1 || nd == CCV_NNC_MAX_DIM + 2);
+	const int size_nd = ccv_max(2, ccv_nnc_tensor_nd(cmd.size.dim) - 1);
+	assert(size_nd == 2 || size_nd == 3); // Support 3D convolution.
+	assert(nd == size_nd + 1 || nd == size_nd + 2);
 	int hw;
 	if ((a.format == CCV_TENSOR_FORMAT_CHWN) ||
-		(a.format == CCV_TENSOR_FORMAT_NHWC && nd == CCV_NNC_MAX_DIM + 1))
+		(a.format == CCV_TENSOR_FORMAT_NHWC && nd == size_nd + 1))
 		hw = 0;
-	else if ((a.format == CCV_TENSOR_FORMAT_NHWC && nd == CCV_NNC_MAX_DIM + 2) ||
-			 (a.format == CCV_TENSOR_FORMAT_NCHW && nd == CCV_NNC_MAX_DIM + 1))
+	else if ((a.format == CCV_TENSOR_FORMAT_NHWC && nd == size_nd + 2) ||
+			 (a.format == CCV_TENSOR_FORMAT_NCHW && nd == size_nd + 1))
 		hw = 1;
-	else if (a.format == CCV_TENSOR_FORMAT_NCHW && nd == CCV_NNC_MAX_DIM + 2)
+	else if (a.format == CCV_TENSOR_FORMAT_NCHW && nd == size_nd + 2)
 		hw = 2;
 	else
 		assert(0 && "unknown format");
-	for (i = 0; i < CCV_NNC_MAX_DIM; i++)
+	for (i = 0; i < size_nd; i++)
 	{
 		if ((hint.border.begin[i] + hint.border.end[i] + a.dim[i + hw] - cmd.size.dim[i]) % hint.stride.dim[i] != 0)
 			return -1;
@@ -186,23 +188,25 @@ ccv_nnc_hint_t ccv_nnc_hint_auto(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_te
 	assert(a.format == b.format);
 	const int a_nd = ccv_nnc_tensor_nd(a.dim);
 	const int b_nd = ccv_nnc_tensor_nd(b.dim);
+	const int size_nd = ccv_max(2, ccv_nnc_tensor_nd(cmd.size.dim) - 1);
+	assert(size_nd == 2 || size_nd == 3); // Support 3D convolution.
 	// Is not auto hint deducible dimensions.
-	if (a_nd != b_nd || (a_nd != CCV_NNC_MAX_DIM + 1 && a_nd != CCV_NNC_MAX_DIM + 2))
+	if (a_nd != b_nd || (a_nd != size_nd + 1 && a_nd != size_nd + 2))
 		return ccv_nnc_no_hint;
 	int hw;
 	if ((a.format == CCV_TENSOR_FORMAT_CHWN) ||
-		(a.format == CCV_TENSOR_FORMAT_NHWC && a_nd == CCV_NNC_MAX_DIM + 1))
+		(a.format == CCV_TENSOR_FORMAT_NHWC && a_nd == size_nd + 1))
 		hw = 0;
-	else if ((a.format == CCV_TENSOR_FORMAT_NHWC && a_nd == CCV_NNC_MAX_DIM + 2) ||
-			 (a.format == CCV_TENSOR_FORMAT_NCHW && a_nd == CCV_NNC_MAX_DIM + 1))
+	else if ((a.format == CCV_TENSOR_FORMAT_NHWC && a_nd == size_nd + 2) ||
+			 (a.format == CCV_TENSOR_FORMAT_NCHW && a_nd == size_nd + 1))
 		hw = 1;
-	else if (a.format == CCV_TENSOR_FORMAT_NCHW && a_nd == CCV_NNC_MAX_DIM + 2)
+	else if (a.format == CCV_TENSOR_FORMAT_NCHW && a_nd == size_nd + 2)
 		hw = 2;
 	else
 		assert(0 && "unknown format");
 	ccv_nnc_hint_t hint_auto = {};
 	// 0-dim is reserved for channels
-	for (i = 0; i < CCV_NNC_MAX_DIM; i++)
+	for (i = 0; i < size_nd; i++)
 	{
 		// Cannot have one of the dim is zero, we cannot auto the hint, return no hint.
 		assert(a.dim[i + hw] && b.dim[i + hw]);
 
@@ -317,16 +317,16 @@ static inline void ccv_nnc_tensor_view_get_stride(const ccv_nnc_tensor_view_t* c
 	int x;
 	const int nd = ccv_nnc_tensor_nd(tv->info.dim);
 	const int offset = ccv_max(CCV_NNC_MAX_DIM + 2 - nd, 0);
-	stride[CCV_NNC_MAX_DIM + 2] = 0;
+	stride[ccv_max(nd, CCV_NNC_MAX_DIM + 2)] = 0;
 	if (CCV_IS_TENSOR_VIEW(tv))
 	{
-		for (x = offset; x < CCV_NNC_MAX_DIM + 2; x++)
+		for (x = offset; x < ccv_max(nd, CCV_NNC_MAX_DIM + 2); x++)
 			stride[x] = tv->stride[x - offset];
 		for (x = 0; x < offset; x++)
 			stride[x] = stride[offset];
 	} else {
 		int cstride = 1;
-		for (x = CCV_NNC_MAX_DIM + 1; x >= offset; x--)
+		for (x = ccv_max(CCV_NNC_MAX_DIM + 1, nd - 1); x >= offset; x--)
 		{
 			stride[x] = cstride;
 			cstride *= tv->info.dim[x - offset];
 
@@ -62,7 +62,9 @@ static inline void ccv_nnc_hint_tensor_forward(const ccv_nnc_cmd_param_t cmd, co
 	assert(nd == CCV_NNC_MAX_DIM + 1 || nd == CCV_NNC_MAX_DIM + 2);
 	int hw = ccv_nnc_tensor_hw(a, nd);
 	assert(hw >= 0);
-	for (i = 0; i < CCV_NNC_MAX_DIM; i++)
+	const int size_nd = ccv_nnc_tensor_nd(cmd.size.dim) - 1;
+	assert(size_nd == 2 || size_nd == 3); // Support 3D convolution.
+	for (i = 0; i < size_nd; i++)
 	{
 		int stride = ccv_max(1, hint.stride.dim[i]);
 		b->dim[i + hw] = (a.dim[i + hw] + hint.border.begin[i] + hint.border.end[i] - cmd.size.dim[i]) / stride + 1;
@@ -77,7 +79,9 @@ static inline void ccv_nnc_hint_tensor_backward(const ccv_nnc_cmd_param_t cmd, c
 	assert(nd == CCV_NNC_MAX_DIM + 1 || nd == CCV_NNC_MAX_DIM + 2);
 	int hw = ccv_nnc_tensor_hw(a, nd);
 	assert(hw >= 0);
-	for (i = 0; i < CCV_NNC_MAX_DIM; i++)
+	const int size_nd = ccv_nnc_tensor_nd(cmd.size.dim) - 1;
+	assert(size_nd == 2 || size_nd == 3); // Support 3D convolution.
+	for (i = 0; i < size_nd; i++)
 	{
 		int stride = ccv_max(1, hint.stride.dim[i]);
 		b->dim[i + hw] = (a.dim[i + hw] - 1) * stride - hint.border.begin[i] - hint.border.end[i] + cmd.size.dim[i];