-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cu
208 lines (165 loc) · 4.64 KB
/
main.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include <stdio.h>
#include <fstream>
#include <time.h>
// Generic utils
typedef float3 pixel;
void check_result(cudaError_t value) {
cudaError_t status = value;
if (status != cudaSuccess) {
printf("Error %s at line %d in file %s\n",
cudaGetErrorString(status), __LINE__, __FILE__);
// exit(1);
}
}
__device__ float3 operator+(const float3 &a, const float3 &b) {
return make_float3(a.x + b.x,a.y + b.y,a.z + b.z);
}
__device__ float3 operator*(const float3 &a, const float &b) {
return make_float3(a.x * b, a.y * b, a.z * b);
}
__device__ float length(const float3 &vec) {
return sqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z);
}
__device__ float3 normalize(const float3 vec) {
float inverted_len = 1.0f / length(vec);
return vec * inverted_len;
}
// Raymarcher
typedef struct {
float3 o;
float3 d;
} ray;
__device__ ray get_ray(const float& u, const float& v) {
ray r;
r.o = make_float3(-5.0, 0.0, 0.0);
r.d = normalize(make_float3(1.0, u, v));
return r;
}
__device__ float mandelbulb_de(float3 pos) {
// pos = fmod(fabs(pos), 4.0) - 2.0;
float3 z = pos;
float dr = 1.0;
float r = 0.0;
int Iterations = 4;
float Bailout = 4.0;
float Power = 16.0;
for(int i = 0; i < Iterations; i++) {
r = length(z);
if (r > Bailout) break;
// convert to polar coordinates
float theta = acos(z.z / r);
float phi = atan2(z.y, z.x);
dr = powf(r, Power - 1.0) * Power * dr + 1.0;
// scale and rotate the point
float zr = pow(r, Power);
theta = theta * Power;
phi = phi * Power;
// convert back to cartesian coordinates
z = make_float3(sin(theta) * cos(phi),
sin(phi) * sin(theta), cos(theta)) * zr;
z = z + pos;
//z += pos * cos(time * 2.0);
}
return 0.5 * log(r) * r / dr;
}
__device__ float march(ray r) {
float total_dist = 0.0;
int max_ray_steps = 64;
float min_distance = 0.002;
int steps;
for (steps = 0; steps < max_ray_steps; ++steps) {
float3 p = r.o + r.d * total_dist;
float distance = mandelbulb_de(p);
total_dist += distance;
if (distance < min_distance) break;
}
return 1.0 - (float) steps / (float) max_ray_steps;
}
// Main kernel
__global__ void d_main(
pixel* screen_buffer,
const size_t width,
const size_t height
) {
size_t x = (blockIdx.x * blockDim.x) + threadIdx.x;
size_t y = (blockIdx.y * blockDim.y) + threadIdx.y;
if(x < width && y < height) {
float min_w_h = (float) min(width, height);
float ar = (float) width / (float) height;
float u = (float) x / min_w_h - ar * 0.5f;
float v = (float) y / min_w_h - 0.5f;
ray r = get_ray(u, v);
float c = march(r) * 255.0f;
float3 color = make_float3(c, c, c);
screen_buffer[y * width + x] = color;
}
}
void write_image(
char* file_name,
pixel* screen_buff,
size_t width,
size_t height
) {
FILE* image = fopen(file_name, "w");
fprintf(image, "P3\n");
fprintf(image, "%i %i\n", width, height);
fprintf(image, "%i\n", 255);
for (size_t y = 0; y < height; y++) {
for (size_t x = 0; x < width; x++) {
float3 pixel = screen_buff[y * width + x];
fprintf(image, "%i %i %i\n", (int) pixel.x, (int) pixel.y, (int) pixel.z);
}
}
fclose(image);
}
int main(int argc, char** argv) {
// printf("Mandelbulb\n");
if(argc < 7) {
printf("Not enought params.\n");
return 1;
}
char* file_name = argv[1];
size_t width = atoi(argv[2]);
size_t height = atoi(argv[3]);
size_t num_pixels = width * height;
size_t group_width = atoi(argv[4]);
size_t group_height = atoi(argv[5]);
bool test = false;
if (*argv[6] == 't') {
test = true;
}
// Setup buffers
pixel* h_screen_buff;
pixel* d_screen_buff;
check_result(cudaMallocHost(&h_screen_buff, num_pixels * sizeof(pixel)));
check_result(cudaMalloc(&d_screen_buff, num_pixels * sizeof(pixel)));
dim3 block_dim(width / group_width, height / group_height);
dim3 group_dim(group_width, group_height);
// Execute on devicie
clock_t t_start = clock();
if(!test)
printf("Starting kernel execution...\n");
d_main<<<block_dim, group_dim>>>(d_screen_buff, width, height);
if(!test)
printf("Kernel execution ended.\n");
if(!test)
printf("Reading screan buffer from device...\n");
check_result(cudaMemcpy(h_screen_buff, d_screen_buff, num_pixels * sizeof(pixel), cudaMemcpyDeviceToHost));
if(!test)
printf("Done.\n");
printf("Time taken (ms): %i\n", (int) ((double) (clock() - t_start) / CLOCKS_PER_SEC * 1000.0f));
if(!test){
printf("Writing to file...\n");
write_image(file_name, h_screen_buff, width, height);
printf("Done\n");
}
//for(size_t y = 0;y < height;y++) {
// for(size_t x = 0;x < width;x++) {
// printf("%i ", (int) h_screen_buff[y * width + x].x);
// }
// printf("\n");
//}
cudaFreeHost(h_screen_buff);
cudaFree(d_screen_buff);
return 0;
}