@@ -250,7 +250,7 @@ int main(int argc, char **argv) {
250
250
251
251
// Compute capital u, capital v, z and h
252
252
c1 = wtime ();
253
- #pragma acc parallel loop independent present(p[:SIZE],u[:SIZE],v[:SIZE]) deviceptr(cu,cv,z,h)
253
+ #pragma acc parallel loop collapse(2) present(p[:SIZE],u[:SIZE],v[:SIZE]) deviceptr(cu,cv,z,h)
254
254
for (i = 0 ;i < M ;i ++ ) {
255
255
for (j = 0 ;j < N ;j ++ ) {
256
256
int idx00 = (i * N_LEN ) + j ;
@@ -296,7 +296,7 @@ int main(int argc, char **argv) {
296
296
}
297
297
c1 = wtime ();
298
298
299
- #pragma acc parallel loop independent present(unew[:SIZE],vnew[:SIZE],pnew[:SIZE]) deviceptr(cu,cv,z,h,uold,vold,pold)
299
+ #pragma acc parallel loop collapse(2) present(unew[:SIZE],vnew[:SIZE],pnew[:SIZE]) deviceptr(cu,cv,z,h,uold,vold,pold)
300
300
for (i = 0 ;i < M ;i ++ ) {
301
301
for (j = 0 ;j < N ;j ++ ) {
302
302
int idx00 = (i * N_LEN ) + j ;
@@ -315,7 +315,7 @@ int main(int argc, char **argv) {
315
315
// Periodic continuation
316
316
#pragma acc parallel loop independent present(unew[:SIZE],vnew[:SIZE],pnew[:SIZE])
317
317
for (j = 0 ;j < N ;j ++ ) {
318
- #pragma acc cache(unew[M*N_LEN:N_LEN],vnew[:N_LEN],pnew[:N_LEN])
318
+ // #pragma acc cache(unew[M*N_LEN:N_LEN],vnew[:N_LEN],pnew[:N_LEN])
319
319
//printf("N loop unew %d -> %d, vnew %d -> %d , pnew %d -> %d\n",M*N_LEN+j,j,j+1,M*N_LEN +j + 1,j,M*N_LEN +j);
320
320
unew [j ] = unew [M * N_LEN + j ];
321
321
vnew [M * N_LEN + j + 1 ] = vnew [j + 1 ];
0 commit comments