Skip to content
This repository was archived by the owner on May 27, 2021. It is now read-only.

Commit 899edb5

Browse files
Set kernel names directly
1 parent 8911e98 commit 899edb5

File tree

3 files changed

+3
-3
lines changed

3 files changed

+3
-3
lines changed

test/perf/matmul_kernels/wmma/cutlass-mma-turing.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ for i in {7..14}; do
1616
N=$((2**i))
1717

1818
# runtime in ns
19-
runtime=$(nv-nsight-cu-cli -f --summary per-kernel --csv --units base -k Kernel ${CUTLASS_BUILD_PATH}/tools/profiler/cutlass_profiler --op_class=tensorop --A=f16:col --B=f16:col --C=f32 --accum=f32 --m=$N --n=$N --k=$N --inst_m=16 --inst_n=8 --inst_k=8 --warmup-iterations=1 --profiling-iterations=10 --verification-enabled=false 2>/dev/null | grep 'gpu__time_duration' | tail -1 | awk -F',' '{print $NF}' | sed 's/"//g')
19+
runtime=$(nv-nsight-cu-cli -f --summary per-kernel --csv --units base -k Kernel ${CUTLASS_BUILD_PATH}/tools/profiler/cutlass_profiler --m=$N --n=$N --k=$N --warmup-iterations=1 --profiling-iterations=10 --verification-enabled=false --kernels=cutlass_tensorop_s1688gemm_f16_128x128_nn 2>/dev/null | grep 'gpu__time_duration' | tail -1 | awk -F',' '{print $NF}' | sed 's/"//g')
2020

2121
printf "$N,$runtime\n" >>cutlass-mma-turing.csv
2222
done

test/perf/matmul_kernels/wmma/cutlass-mma.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ for i in {7..14}; do
1616
N=$((2**i))
1717

1818
# runtime in ns
19-
runtime=$(nv-nsight-cu-cli -f --summary per-kernel --csv --units base -k Kernel ${CUTLASS_BUILD_PATH}/tools/profiler/cutlass_profiler --op_class=tensorop --A=f16:col --B=f16:col --C=f32 --accum=f32 --m=$N --n=$N --k=$N --inst_m=8 --inst_n=8 --inst_k=4 --warmup-iterations=1 --profiling-iterations=10 --verification-enabled=false 2>/dev/null | grep 'gpu__time_duration' | tail -1 | awk -F',' '{print $NF}' | sed 's/"//g')
19+
runtime=$(nv-nsight-cu-cli -f --summary per-kernel --csv --units base -k Kernel ${CUTLASS_BUILD_PATH}/tools/profiler/cutlass_profiler --m=$N --n=$N --k=$N --warmup-iterations=1 --profiling-iterations=10 --verification-enabled=false --kernels=cutlass_tensorop_s884gemm_f16_128x128_nn 2>/dev/null | grep 'gpu__time_duration' | tail -1 | awk -F',' '{print $NF}' | sed 's/"//g')
2020

2121
printf "$N,$runtime\n" >>cutlass-mma.csv
2222
done

test/perf/matmul_kernels/wmma/cutlass-wmma.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ for i in {7..14}; do
1616
N=$((2**i))
1717

1818
# runtime in ns
19-
runtime=$(nv-nsight-cu-cli -f --summary per-kernel --csv --units base -k Kernel ${CUTLASS_BUILD_PATH}/tools/profiler/cutlass_profiler --op_class=wmmatensorop --A=f16:col --B=f16:col --C=f32 --accum=f32 --m=$N --n=$N --k=$N --warmup-iterations=1 --profiling-iterations=10 --verification-enabled=false 2>/dev/null | grep 'gpu__time_duration' | tail -1 | awk -F',' '{print $NF}' | sed 's/"//g')
19+
runtime=$(nv-nsight-cu-cli -f --summary per-kernel --csv --units base -k Kernel ${CUTLASS_BUILD_PATH}/tools/profiler/cutlass_profiler --m=$N --n=$N --k=$N --warmup-iterations=1 --profiling-iterations=10 --verification-enabled=false --kernels=cutlass_wmma_tensorop_s161616gemm_f16_128x128_nn 2>/dev/null | grep 'gpu__time_duration' | tail -1 | awk -F',' '{print $NF}' | sed 's/"//g')
2020

2121
printf "$N,$runtime\n" >>cutlass-wmma.csv
2222
done

0 commit comments

Comments
 (0)