diff --git a/src_c/IMB_cpu_exploit.c b/src_c/IMB_cpu_exploit.c
index e8388e10..d7643f9f 100644
--- a/src_c/IMB_cpu_exploit.c
+++ b/src_c/IMB_cpu_exploit.c
@@ -143,3 +143,109 @@ in: target_secs: desired runtime (about) of the current call
     }
 }
 
+
+double IMB_cpu_exploit_reworked_With_Test(float target_secs, int count,int initialize) {
+     MPI_Request REQUESTS[count];// = MPI_REQUEST_NULL;                                                               
+      for (int j = 0; j < count; j++)
+         REQUESTS[j] = MPI_REQUEST_NULL;
+     IMB_cpu_exploit_With_Test(target_secs,REQUESTS, count,initialize);
+    //IMB_cpu_exploit(target_secs, initialize);
+    
+    return MFlops;
+}
+
+void IMB_cpu_exploit_With_Test(float target_secs, MPI_Request* REQUEST, int count , int initialize) {
+/*
+
+
+                      Runs a CPU intensive code (matrix multiply) for a
+                      user defined amount of CPU time
+
+
+
+Input variables:
+
+-target_secs          (type float)
+                      That many seconds (about) the matrix multiply is run
+
+
+-initialize           (type int)
+                      1/0 for first / subsequent calls. If 1, the program
+                      determines how to run in order to achieve target CPU time.
+
+
+
+*/
+
+/*
+in: target_secs: desired runtime (about) of the current call
+    initialize: 1/0 for first/following call with this value of target_secs
+*/
+
+#define SIZE 10
+    static float a[SIZE][SIZE], x[SIZE], y[SIZE];
+    double t1,t2;
+    static int Nrep, target_reps;
+    int i,j,repeat,flag;
+
+
+    if (target_secs <= 0.) {
+        return;
+    }
+
+    if (MFlops < 0.) {
+        for (i = 0; i < SIZE; i++) {
+            x[i] = y[i] = 0.;
+            for (j=0; j< SIZE; j++) {
+                a[i][j] = 1.;
+            }
+        }
+        Nrep = (50000000 / (2 * SIZE*SIZE)) + 1;
+        t1 = MPI_Wtime();
+        for (repeat = 0; repeat < Nrep; repeat++) {
+            for (i=0; i<SIZE; i++) {
+                for (j=0; j<SIZE; j++) {
+                    x[i] = x[i] + a[i][j] * y[j];
+                }
+            }
+                //MPI_Testall(count,REQUEST, &flag, MPI_STATUS_IGNORE);
+        }
+        t2 = MPI_Wtime();
+
+        MFlops = (Nrep * 2 * SIZE * SIZE) * 1.e-6 / (t2 - t1);
+        Nrep = (int)(1. / (t2 - t1) * Nrep);
+        target_reps = 0;
+    }
+
+    if (initialize) {
+        target_reps = max(1, (int)(target_secs * Nrep));
+        t1 = MPI_Wtime();
+        //MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+        //printf("\t\033[1;31m rank %i > performing %i cacluations x %i times\n \033[0m",rank, SIZE*SIZE,target_reps);
+        for (repeat = 0; repeat < target_reps; repeat++) {
+            for (i = 0; i < SIZE; i++) {
+                for (j = 0; j < SIZE; j++) {
+                    x[i] = x[i] + a[i][j] * y[j];
+                }
+            }
+                MPI_Testall(count,REQUEST, &flag, MPI_STATUS_IGNORE);
+        }
+        t2 = MPI_Wtime();
+
+        tCPU = 1000000. * (t2-t1);
+    } else {
+        //MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+        //printf("\t\033[1;31m rank %i > performing %i cacluations x %i times\n \033[0m",rank, SIZE*SIZE,target_reps);
+        for( repeat=0; repeat < target_reps; repeat++ ) {
+            for (i=0; i<SIZE; i++) {
+                for (j=0; j<SIZE; j++) {
+                    x[i] = x[i] + a[i][j] * y[j];
+                }
+            }
+                MPI_Testall(count,REQUEST, &flag, MPI_STATUS_IGNORE);
+        }
+    }
+    //  int rank;
+    //  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    //  printf("rank %d -> Nrep: %d, target rep: %d\n",rank, Nrep,target_reps);
+}
diff --git a/src_c/IMB_prototypes.h b/src_c/IMB_prototypes.h
index a43ef9ae..5f61b449 100644
--- a/src_c/IMB_prototypes.h
+++ b/src_c/IMB_prototypes.h
@@ -490,6 +490,9 @@ void IMB_warm_up(struct Bench* Bmark, struct comm_info* c_info, int size, struct
 void IMB_cpu_exploit(float target_secs, int initialize);
 double IMB_cpu_exploit_reworked(float target_secs, int initialize);
 
+void IMB_cpu_exploit_With_Test(float target_secs, MPI_Request* request, int count,int initialize);
+double IMB_cpu_exploit_reworked_With_Test(float target_secs,int count, int initialize);
+
 void IMB_general_info();
 
 void IMB_make_sys_info();
diff --git a/src_cpp/helpers/original_benchmark.h b/src_cpp/helpers/original_benchmark.h
index 59c373fc..301bbc9b 100644
--- a/src_cpp/helpers/original_benchmark.h
+++ b/src_cpp/helpers/original_benchmark.h
@@ -177,9 +177,15 @@ class OriginalBenchmark : public Benchmark {
                 glob.iter = c_info.n_lens - 1;
             }
 #ifdef MPIIO
-            if(c_info.w_rank == 0 &&
+            if(c_info.w_rank != 0 && do_nonblocking_)
+                IMB_cpu_exploit_reworked_With_Test(TARGET_CPU_SECS, 10,1);
+				// or 
+				// IMB_cpu_exploit_reworked(TARGET_CPU_SECS, 1);
+			if(c_info.w_rank == 0 &&
                do_nonblocking_) {
-                double MFlops = IMB_cpu_exploit_reworked(TARGET_CPU_SECS, 1);
+				double MFlops = IMB_cpu_exploit_reworked_With_Test(TARGET_CPU_SECS,10, 1);
+				// or
+				// double MFlops = IMB_cpu_exploit_reworked(TARGET_CPU_SECS, 1);
                 printf("\n\n# For nonblocking benchmarks:\n\n");
                 printf("# Function CPU_Exploit obtains an undisturbed\n");
                 printf("# performance of %7.2f MFlops\n",MFlops);