|
8 | 8 | Unum, Tnum = LoopVectorization.register_count() == 16 ? (2, 6) : (4, 6)
|
9 | 9 | end
|
10 | 10 | Unumt, Tnumt = LoopVectorization.register_count() == 16 ? (2, 6) : (5, 5)
|
11 |
| - if LoopVectorization.register_count() != 8 |
| 11 | + if (LoopVectorization.register_count() != 8) && ((LoopVectorization.pick_vector_width(Float64) != 2) ||(LoopVectorization.register_count() != 16)) |
12 | 12 | @test @inferred(LoopVectorization.matmul_params()) == (Unum, Tnum)
|
13 | 13 | end
|
14 | 14 |
|
|
30 | 30 | end
|
31 | 31 | )
|
32 | 32 | lsAmulBt1 = LoopVectorization.loopset(AmulBtq1)
|
33 |
| - if LoopVectorization.register_count() != 8 |
| 33 | + if (LoopVectorization.register_count() != 8) && ((LoopVectorization.pick_vector_width(Float64) != 2) ||(LoopVectorization.register_count() != 16)) |
34 | 34 | @test LoopVectorization.choose_order(lsAmulBt1) ==
|
35 | 35 | (Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
|
36 | 36 | end
|
|
43 | 43 | end
|
44 | 44 | )
|
45 | 45 | lsAmulB1 = LoopVectorization.loopset(AmulBq1)
|
46 |
| - if LoopVectorization.register_count() != 8 |
| 46 | + if (LoopVectorization.register_count() != 8) && ((LoopVectorization.pick_vector_width(Float64) != 2) ||(LoopVectorization.register_count() != 16)) |
47 | 47 | @test LoopVectorization.choose_order(lsAmulB1) ==
|
48 | 48 | (Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
|
49 | 49 | end
|
|
56 | 56 | end
|
57 | 57 | )
|
58 | 58 | lsAmulB2 = LoopVectorization.loopset(AmulBq2)
|
59 |
| - if LoopVectorization.register_count() != 8 |
| 59 | + if (LoopVectorization.register_count() != 8) && ((LoopVectorization.pick_vector_width(Float64) != 2) ||(LoopVectorization.register_count() != 16)) |
60 | 60 | @test LoopVectorization.choose_order(lsAmulB2) ==
|
61 | 61 | (Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
|
62 | 62 | end
|
|
70 | 70 | end
|
71 | 71 | )
|
72 | 72 | lsAmulB3 = LoopVectorization.loopset(AmulBq3)
|
73 |
| - if LoopVectorization.register_count() != 8 |
| 73 | + if (LoopVectorization.register_count() != 8) && ((LoopVectorization.pick_vector_width(Float64) != 2) ||(LoopVectorization.register_count() != 16)) |
74 | 74 | @test LoopVectorization.choose_order(lsAmulB3) ==
|
75 | 75 | (Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
|
76 |
| - end |
77 |
| - if LoopVectorization.register_count() != 8 |
78 | 76 | for (fA, fB, v, Un, Tn) ∈ [
|
79 | 77 | (identity, identity, :m, Unum, Tnum),
|
80 | 78 | (adjoint, identity, :k, Unumt, Tnumt),
|
|
178 | 176 | )
|
179 | 177 | lsAmuladd = LoopVectorization.loopset(Amuladdq)
|
180 | 178 | if LoopVectorization.register_count() != 8
|
| 179 | + if LoopVectorization.pick_vector_width(Float64) != 2 |
181 | 180 | @test LoopVectorization.choose_order(lsAmuladd) ==
|
182 | 181 | (Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
|
| 182 | + else |
| 183 | + @test LoopVectorization.choose_order(lsAmuladd) == |
| 184 | + (Symbol[:m, :n, :k], :n, :m, :m, Unum, Tnum) |
| 185 | + end |
183 | 186 | end
|
184 | 187 | Atmuladdq = :(
|
185 | 188 | for m ∈ axes(A, 2), n ∈ axes(B, 2)
|
|
410 | 413 | @test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 3, 7)
|
411 | 414 | end
|
412 | 415 | elseif LoopVectorization.register_count() == 16
|
| 416 | + if LoopVectorization.pick_vector_width(Float64) == 4 |
413 | 417 | # @test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 1, 6)
|
414 | 418 | # @test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 2, 4)
|
415 |
| - @test LoopVectorization.choose_order(lsr2amb) == ([:n, :m, :k], :n, :m, :m, 3, 3) |
| 419 | + @test LoopVectorization.choose_order(lsr2amb) == ([:n, :m, :k], :n, :m, :m, 3, 3) |
| 420 | + elseif LoopVectorization.pick_vector_width(Float64) == 2 |
| 421 | + @test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :n, :m, :m, 3, 3) |
| 422 | + end |
416 | 423 | end
|
417 | 424 | function rank2AmulBavx!(C, Aₘ, Aₖ, B)
|
418 | 425 | @turbo for m ∈ axes(C, 1), n ∈ axes(C, 2)
|
|
0 commit comments