Skip to content

Commit 876d52f

Browse files
committed
montgomery conversion for cuda
1 parent dfc3a53 commit 876d52f

13 files changed

+183
-147
lines changed

icicle_v3/backend/cpu/src/curve/cpu_mont_conversion.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ using namespace curve_config;
1010
using namespace icicle;
1111

1212
template <typename T>
13-
eIcicleError cpu_convert_mont(
14-
const Device& device, const T* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, T* output)
13+
eIcicleError
14+
cpu_convert_mont(const Device& device, const T* input, size_t n, bool is_into, const VecOpsConfig& config, T* output)
1515
{
1616
for (size_t i = 0; i < n; ++i) {
1717
output[i] = is_into ? T::to_montgomery(input[i]) : T::from_montgomery(input[i]);

icicle_v3/backend/cuda/CMakeLists.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@ target_link_libraries(icicle_cuda_field PRIVATE ${CUDA_LIBRARIES}) # Link to CUD
3737

3838
# curve API library
3939
if (CURVE)
40-
add_library(icicle_cuda_curve SHARED src/curve/cuda_msm.cu)
40+
add_library(icicle_cuda_curve SHARED
41+
src/curve/cuda_msm.cu
42+
src/curve/cuda_mont.cu
43+
)
4144
target_include_directories(icicle_cuda_curve PRIVATE include)
4245
target_link_libraries(icicle_cuda_curve PUBLIC icicle_device icicle_curve)
4346
set_target_properties(icicle_cuda_curve PROPERTIES OUTPUT_NAME "icicle_cuda_curve_${FIELD}")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#include <cuda.h>
2+
#include <stdexcept>
3+
4+
#include "icicle/errors.h"
5+
#include "icicle/vec_ops.h"
6+
#include "gpu-utils/error_handler.h"
7+
8+
namespace montgomery {
9+
#define MAX_THREADS_PER_BLOCK 256
10+
11+
template <typename E, bool is_into>
12+
__global__ void MontgomeryKernel(const E* input, int n, E* output)
13+
{
14+
int tid = blockIdx.x * blockDim.x + threadIdx.x;
15+
if (tid < n) { output[tid] = is_into ? E::to_montgomery(input[tid]) : E::from_montgomery(input[tid]); }
16+
}
17+
18+
template <typename E, bool is_into>
19+
cudaError_t ConvertMontgomery(const E* input, size_t n, const VecOpsConfig& config, E* output)
20+
{
21+
cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(config.stream);
22+
23+
E *d_alloc_out = nullptr, *d_alloc_in = nullptr, *d_out;
24+
const E* d_in;
25+
if (!config.is_a_on_device) {
26+
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_in, n * sizeof(E), cuda_stream));
27+
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_in, input, n * sizeof(E), cudaMemcpyHostToDevice, cuda_stream));
28+
d_in = d_alloc_in;
29+
} else {
30+
d_in = input;
31+
}
32+
33+
if (!config.is_result_on_device) {
34+
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_out, n * sizeof(E), cuda_stream));
35+
d_out = d_alloc_out;
36+
} else {
37+
d_out = output;
38+
}
39+
40+
int num_threads = MAX_THREADS_PER_BLOCK;
41+
int num_blocks = (n + num_threads - 1) / num_threads;
42+
MontgomeryKernel<E, is_into><<<num_blocks, num_threads, 0, cuda_stream>>>(d_in, n, d_out);
43+
44+
if (d_alloc_in) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_in, cuda_stream)); }
45+
if (d_alloc_out) {
46+
CHK_IF_RETURN(cudaMemcpyAsync(output, d_out, n * sizeof(E), cudaMemcpyDeviceToHost, cuda_stream));
47+
CHK_IF_RETURN(cudaFreeAsync(d_out, cuda_stream));
48+
}
49+
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(cuda_stream));
50+
51+
return CHK_LAST();
52+
}
53+
54+
} // namespace montgomery
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#include <cuda.h>
2+
#include <stdexcept>
3+
4+
#include "icicle/errors.h"
5+
#include "icicle/curves/montgomery_conversion.h"
6+
#include "gpu-utils/error_handler.h"
7+
#include "error_translation.h"
8+
#include "cuda_mont.cuh"
9+
10+
#include "icicle/curves/curve_config.h"
11+
using namespace curve_config;
12+
using namespace icicle;
13+
14+
namespace icicle {
15+
16+
template <typename T>
17+
eIcicleError
18+
cuda_convert_mont(const Device& device, const T* input, size_t n, bool is_into, const VecOpsConfig& config, T* output)
19+
{
20+
cudaError_t err = is_into ? montgomery::ConvertMontgomery<T, true>(input, n, config, output)
21+
: montgomery::ConvertMontgomery<T, false>(input, n, config, output);
22+
return translateCudaError(err);
23+
}
24+
25+
REGISTER_AFFINE_CONVERT_MONTGOMERY_BACKEND("CUDA", cuda_convert_mont<affine_t>);
26+
REGISTER_PROJECTIVE_CONVERT_MONTGOMERY_BACKEND("CUDA", cuda_convert_mont<projective_t>);
27+
28+
#ifdef G2
29+
REGISTER_AFFINE_G2_CONVERT_MONTGOMERY_BACKEND("CUDA", cuda_convert_mont<g2_affine_t>);
30+
REGISTER_PROJECTIVE_G2_CONVERT_MONTGOMERY_BACKEND("CUDA", cuda_convert_mont<g2_projective_t>);
31+
#endif // G2
32+
33+
} // namespace icicle

icicle_v3/backend/cuda/src/field/cuda_mont.cu

+3-48
Original file line numberDiff line numberDiff line change
@@ -5,64 +5,19 @@
55
#include "icicle/vec_ops.h"
66
#include "gpu-utils/error_handler.h"
77
#include "error_translation.h"
8+
#include "cuda_mont.cuh"
89

910
namespace icicle {
1011

11-
#define MAX_THREADS_PER_BLOCK 256
12-
13-
template <typename E>
14-
__global__ void MontgomeryKernel(const E* input, int n, bool is_into, E* output)
15-
{
16-
int tid = blockIdx.x * blockDim.x + threadIdx.x;
17-
if (tid < n) { output[tid] = is_into ? E::to_montgomery(input[tid]) : E::from_montgomery(input[tid]); }
18-
}
19-
20-
template <typename E>
21-
cudaError_t ConvertMontgomery(const E* input, int n, bool is_into, const VecOpsConfig& config, E* output)
22-
{
23-
cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(config.stream);
24-
25-
E *d_alloc_out = nullptr, *d_alloc_in = nullptr, *d_out;
26-
const E* d_in;
27-
if (!config.is_a_on_device) {
28-
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_in, n * sizeof(E), cuda_stream));
29-
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_in, input, n * sizeof(E), cudaMemcpyHostToDevice, cuda_stream));
30-
d_in = d_alloc_in;
31-
} else {
32-
d_in = input;
33-
}
34-
35-
if (!config.is_result_on_device) {
36-
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_out, n * sizeof(E), cuda_stream));
37-
d_out = d_alloc_out;
38-
} else {
39-
d_out = output;
40-
}
41-
42-
int num_threads = MAX_THREADS_PER_BLOCK;
43-
int num_blocks = (n + num_threads - 1) / num_threads;
44-
MontgomeryKernel<E><<<num_blocks, num_threads, 0, cuda_stream>>>(d_in, n, is_into, d_out);
45-
46-
if (d_alloc_in) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_in, cuda_stream)); }
47-
if (d_alloc_out) {
48-
CHK_IF_RETURN(cudaMemcpyAsync(output, d_out, n * sizeof(E), cudaMemcpyDeviceToHost, cuda_stream));
49-
CHK_IF_RETURN(cudaFreeAsync(d_out, cuda_stream));
50-
}
51-
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(cuda_stream));
52-
53-
return CHK_LAST();
54-
}
55-
56-
/************************************ REGISTRATION ************************************/
57-
5812
#include "icicle/fields/field_config.h"
5913
using namespace field_config;
6014

6115
template <typename F>
6216
eIcicleError convert_montgomery_cuda(
6317
const Device& device, const F* input, uint64_t n, bool is_into, const VecOpsConfig& config, F* output)
6418
{
65-
auto err = ConvertMontgomery<F>(input, n, is_into, config, output);
19+
auto err = is_into ? montgomery::ConvertMontgomery<F, true>(input, n, config, output)
20+
: montgomery::ConvertMontgomery<F, false>(input, n, config, output);
6621
return translateCudaError(err);
6722
}
6823

icicle_v3/include/icicle/curves/montgomery_conversion.h

+5-35
Original file line numberDiff line numberDiff line change
@@ -9,48 +9,18 @@
99

1010
#include "icicle/curves/affine.h"
1111
#include "icicle/curves/projective.h"
12+
#include "icicle/vec_ops.h"
1213
#include "icicle/fields/field.h"
1314
#include "icicle/curves/curve_config.h"
1415

1516
using namespace curve_config;
1617

1718
namespace icicle {
1819

19-
/*************************** Frontend APIs ***************************/
20-
21-
struct ConvertMontgomeryConfig {
22-
icicleStreamHandle stream; /**< stream for async execution. */
23-
bool is_input_on_device;
24-
bool is_output_on_device;
25-
bool is_async;
26-
27-
ConfigExtension ext; /** backend specific extensions*/
28-
};
29-
30-
static ConvertMontgomeryConfig default_convert_montgomery_config()
31-
{
32-
ConvertMontgomeryConfig config = {
33-
nullptr, // stream
34-
false, // is_input_on_device
35-
false, // is_output_on_device
36-
false, // is_async
37-
};
38-
return config;
39-
}
40-
41-
template <typename T>
42-
eIcicleError
43-
points_convert_montgomery(const T* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, T* output);
44-
4520
/*************************** Backend registration ***************************/
4621

4722
using AffineConvertMontImpl = std::function<eIcicleError(
48-
const Device& device,
49-
const affine_t* input,
50-
size_t n,
51-
bool is_into,
52-
const ConvertMontgomeryConfig& config,
53-
affine_t* output)>;
23+
const Device& device, const affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, affine_t* output)>;
5424

5525
void register_affine_convert_montgomery(const std::string& deviceType, AffineConvertMontImpl);
5626

@@ -67,7 +37,7 @@ namespace icicle {
6737
const projective_t* input,
6838
size_t n,
6939
bool is_into,
70-
const ConvertMontgomeryConfig& config,
40+
const VecOpsConfig& config,
7141
projective_t* output)>;
7242

7343
void register_projective_convert_montgomery(const std::string& deviceType, ProjectiveConvertMontImpl);
@@ -86,7 +56,7 @@ namespace icicle {
8656
const g2_affine_t* input,
8757
size_t n,
8858
bool is_into,
89-
const ConvertMontgomeryConfig& config,
59+
const VecOpsConfig& config,
9060
g2_affine_t* output)>;
9161

9262
void register_affine_g2_convert_montgomery(const std::string& deviceType, AffineG2ConvertMontImpl);
@@ -104,7 +74,7 @@ namespace icicle {
10474
const g2_projective_t* input,
10575
size_t n,
10676
bool is_into,
107-
const ConvertMontgomeryConfig& config,
77+
const VecOpsConfig& config,
10878
g2_projective_t* output)>;
10979

11080
void register_projective_g2_convert_montgomery(const std::string& deviceType, ProjectiveG2ConvertMontImpl);

icicle_v3/include/icicle/curves/projective.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ class Projective
231231
out[i] = (i % size < 100) ? rand_host() : out[i - 100];
232232
}
233233

234-
static void rand_host_many_affine(Affine<FF>* out, int size)
234+
static void rand_host_many(Affine<FF>* out, int size)
235235
{
236236
for (int i = 0; i < size; i++)
237237
out[i] = (i % size < 100) ? to_affine(rand_host()) : out[i - 100];

icicle_v3/include/icicle/vec_ops.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ namespace icicle {
5656
eIcicleError vector_mul(const S* vec_a, const S* vec_b, uint64_t n, const VecOpsConfig& config, S* output);
5757

5858
template <typename S>
59-
eIcicleError
60-
scalar_convert_montgomery(const S* input, uint64_t size, bool is_into, const VecOpsConfig& config, S* output);
59+
eIcicleError convert_montgomery(const S* input, uint64_t size, bool is_into, const VecOpsConfig& config, S* output);
6160

6261
/*************************** Backend registration ***************************/
6362

icicle_v3/src/curves/ffi_extern.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ extern "C" void CONCAT_EXPAND(CURVE, generate_projective_points)(projective_t* p
2727

2828
extern "C" void CONCAT_EXPAND(CURVE, generate_affine_points)(affine_t* points, int size)
2929
{
30-
projective_t::rand_host_many_affine(points, size);
30+
projective_t::rand_host_many(points, size);
3131
}
3232

3333
/********************************** G2 **********************************/
@@ -53,6 +53,6 @@ extern "C" void CONCAT_EXPAND(CURVE, g2_generate_projective_points)(g2_projectiv
5353

5454
extern "C" void CONCAT_EXPAND(CURVE, g2_generate_affine_points)(g2_affine_t* points, int size)
5555
{
56-
g2_projective_t::rand_host_many_affine(points, size);
56+
g2_projective_t::rand_host_many(points, size);
5757
}
5858
#endif // G2

icicle_v3/src/curves/montgomery_conversion.cpp

+12-20
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ namespace icicle {
1010
ICICLE_DISPATCHER_INST(AffineConvertMont, affine_convert_montgomery, AffineConvertMontImpl);
1111

1212
extern "C" eIcicleError CONCAT_EXPAND(CURVE, affine_convert_montgomery)(
13-
const affine_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, affine_t* output)
13+
const affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, affine_t* output)
1414
{
1515
return AffineConvertMont::execute(input, n, is_into, config, output);
1616
}
1717

1818
template <>
19-
eIcicleError points_convert_montgomery(
20-
const affine_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, affine_t* output)
19+
eIcicleError
20+
convert_montgomery(const affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, affine_t* output)
2121
{
2222
return CONCAT_EXPAND(CURVE, affine_convert_montgomery)(input, n, is_into, config, output);
2323
}
@@ -26,15 +26,15 @@ namespace icicle {
2626
ICICLE_DISPATCHER_INST(AffineG2ConvertMont, affine_g2_convert_montgomery, AffineG2ConvertMontImpl);
2727

2828
extern "C" eIcicleError CONCAT_EXPAND(CURVE, affine_g2_convert_montgomery)(
29-
const g2_affine_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, g2_affine_t* output)
29+
const g2_affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, g2_affine_t* output)
3030
{
3131
return AffineG2ConvertMont::execute(input, n, is_into, config, output);
3232
}
3333

3434
#ifndef G1_AFFINE_SAME_TYPE_AS_G2_AFFINE
3535
template <>
36-
eIcicleError points_convert_montgomery(
37-
const g2_affine_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, g2_affine_t* output)
36+
eIcicleError
37+
convert_montgomery(const g2_affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, g2_affine_t* output)
3838
{
3939
return CONCAT_EXPAND(CURVE, affine_g2_convert_montgomery)(input, n, is_into, config, output);
4040
}
@@ -44,14 +44,14 @@ namespace icicle {
4444
ICICLE_DISPATCHER_INST(ProjectiveConvertMont, projective_convert_montgomery, ProjectiveConvertMontImpl);
4545

4646
extern "C" eIcicleError CONCAT_EXPAND(CURVE, projective_convert_montgomery)(
47-
const projective_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, projective_t* output)
47+
const projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, projective_t* output)
4848
{
4949
return ProjectiveConvertMont::execute(input, n, is_into, config, output);
5050
}
5151

5252
template <>
53-
eIcicleError points_convert_montgomery(
54-
const projective_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, projective_t* output)
53+
eIcicleError convert_montgomery(
54+
const projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, projective_t* output)
5555
{
5656
return CONCAT_EXPAND(CURVE, projective_convert_montgomery)(input, n, is_into, config, output);
5757
}
@@ -60,22 +60,14 @@ namespace icicle {
6060
ICICLE_DISPATCHER_INST(ProjectiveG2ConvertMont, projective_g2_convert_montgomery, ProjectiveG2ConvertMontImpl);
6161

6262
extern "C" eIcicleError CONCAT_EXPAND(CURVE, projective_g2_convert_montgomery)(
63-
const g2_projective_t* input,
64-
size_t n,
65-
bool is_into,
66-
const ConvertMontgomeryConfig& config,
67-
g2_projective_t* output)
63+
const g2_projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, g2_projective_t* output)
6864
{
6965
return ProjectiveG2ConvertMont::execute(input, n, is_into, config, output);
7066
}
7167

7268
template <>
73-
eIcicleError points_convert_montgomery(
74-
const g2_projective_t* input,
75-
size_t n,
76-
bool is_into,
77-
const ConvertMontgomeryConfig& config,
78-
g2_projective_t* output)
69+
eIcicleError convert_montgomery(
70+
const g2_projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, g2_projective_t* output)
7971
{
8072
return CONCAT_EXPAND(CURVE, projective_g2_convert_montgomery)(input, n, is_into, config, output);
8173
}

icicle_v3/src/vec_ops.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ namespace icicle {
113113
}
114114

115115
template <>
116-
eIcicleError scalar_convert_montgomery(
117-
const scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, scalar_t* output)
116+
eIcicleError
117+
convert_montgomery(const scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, scalar_t* output)
118118
{
119119
return CONCAT_EXPAND(FIELD, scalar_convert_montgomery)(input, size, is_into, config, output);
120120
}

0 commit comments

Comments
 (0)