|
| 1 | +B,M,N,K,kernelId,splitK,us,kernelName |
| 2 | +16,1,1280,8192,78,0,96.9067,bf16_batched_64x16x16x64_16x16_1x1_8x8x1_8x8x1_1x16x1x4_4x4x1_1x1_interwave_v2 |
| 3 | +16,32,1280,8192,28,0,112.8655,bf16_batched_256x32x128x128_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 4 | +16,64,1280,8192,21,0,130.2174,bf16_batched_256x64x128x128_32x32_1x2_16x16x1_16x16x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 5 | +16,128,1280,8192,14,0,165.8107,bf16_batched_256x128x96x128_32x32_1x3_16x16x1_16x16x1_1x64x1x4_8x8x1_1x1_intrawave_v3 |
| 6 | +16,192,1280,8192,21,0,245.0521,bf16_batched_256x64x128x128_32x32_1x2_16x16x1_16x16x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 7 | +16,256,1280,8192,11,0,272.8916,bf16_batched_256x128x160x64_32x32_1x5_8x32x1_8x32x1_1x64x1x4_8x8x1_1x1_intrawave_v3 |
| 8 | +16,320,1280,8192,8,0,341.1548,bf16_batched_256x128x256x64_32x32_2x4_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 9 | +16,512,1280,8192,14,0,486.314,bf16_batched_256x128x96x128_32x32_1x3_16x16x1_16x16x1_1x64x1x4_8x8x1_1x1_intrawave_v3 |
| 10 | +16,1024,1280,8192,10,0,804.6945,bf16_batched_256x128x192x64_32x32_2x3_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 11 | +16,2048,1280,8192,41,0,1491.0997,bf16_batched_256x128x128x64_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v5 |
| 12 | +16,4096,1280,8192,41,0,2898.0224,bf16_batched_256x128x128x64_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v5 |
| 13 | +16,8192,1280,8192,8,0,5616.5567,bf16_batched_256x128x256x64_32x32_2x4_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 14 | +16,16384,1280,8192,8,0,11396.9711,bf16_batched_256x128x256x64_32x32_2x4_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 15 | +16,1,8192,1024,81,0,57.5454,bf16_batched_128x32x64x64_32x32_1x1_8x16x1_8x16x1_1x16x1x8_8x8x1_1x1_interwave_v2 |
| 16 | +16,32,8192,1024,25,0,67.8632,bf16_batched_256x32x224x128_16x16_1x7_16x16x1_16x16x1_1x32x1x8_4x4x1_1x1_intrawave_v3 |
| 17 | +16,64,8192,1024,20,0,88.4667,bf16_batched_256x64x160x128_16x16_2x5_16x16x1_16x16x1_1x64x1x4_8x8x1_2x1_intrawave_v3 |
| 18 | +16,128,8192,1024,13,0,124.6653,bf16_batched_256x128x128x64_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 19 | +16,192,8192,1024,41,0,177.1559,bf16_batched_256x128x128x64_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v5 |
| 20 | +16,256,8192,1024,13,0,192.2976,bf16_batched_256x128x128x64_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 21 | +16,320,8192,1024,13,0,257.184,bf16_batched_256x128x128x64_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 22 | +16,512,8192,1024,13,0,340.1269,bf16_batched_256x128x128x64_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 23 | +16,1024,8192,1024,13,0,624.9993,bf16_batched_256x128x128x64_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8x8x1_1x1_intrawave_v3 |
| 24 | +16,2048,8192,1024,0,0,1176.1171,bf16_batched_256x256x256x32_32x32_4x4_4x64x1_4x64x1_1x32x1x8_8x8x1_1x1_intrawave_v4 |
| 25 | +16,4096,8192,1024,0,0,2271.2554,bf16_batched_256x256x256x32_32x32_4x4_4x64x1_4x64x1_1x32x1x8_8x8x1_1x1_intrawave_v4 |
| 26 | +16,8192,8192,1024,0,0,4531.6427,bf16_batched_256x256x256x32_32x32_4x4_4x64x1_4x64x1_1x32x1x8_8x8x1_1x1_intrawave_v4 |
| 27 | +16,16384,8192,1024,0,0,8533.7636,bf16_batched_256x256x256x32_32x32_4x4_4x64x1_4x64x1_1x32x1x8_8x8x1_1x1_intrawave_v4 |
0 commit comments