Skip to content

Commit 28a66fc

Browse files
committed
Test cleanup
define NNlib.leakyrelu(::AbstractSIMD) for Julia 1.6 test, and don't test `check_order` for westmere like architectures (i.e., simd width of 16 bytes, 16 registers) because we don't have a set of validated reasonable results to compare with.
1 parent 51ee029 commit 28a66fc

File tree

2 files changed

+34
-11
lines changed

2 files changed

+34
-11
lines changed

test/forwarddiffext.jl

+5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ function tovec(x::ForwardDiff.Dual{T,V,N}) where {T,V,N}
1515
return ret
1616
end
1717

18+
if LoopVectorization.ifelse !== Base.ifelse
19+
@inline function NNlib.leakyrelu(x::LoopVectorization.AbstractSIMD, a=NNlib.oftf(x, NNlib.leakyrelu_a))
20+
LoopVectorization.ifelse(x>0, float(x), NNlib.oftf(x, a*x)) # max(a*x, x) is 3x slower
21+
end
22+
end
1823

1924
vx0 = randnvec()
2025
vx1 = randnvec()

test/gemm.jl

+29-11
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
Unum, Tnum = LoopVectorization.register_count() == 16 ? (2, 6) : (4, 6)
99
end
1010
Unumt, Tnumt = LoopVectorization.register_count() == 16 ? (2, 6) : (5, 5)
11-
if LoopVectorization.register_count() != 8
11+
if (LoopVectorization.register_count() != 8) && (
12+
(LoopVectorization.pick_vector_width(Float64) != 2) ||
13+
(LoopVectorization.register_count() != 16)
14+
)
1215
@test @inferred(LoopVectorization.matmul_params()) == (Unum, Tnum)
1316
end
1417

@@ -30,7 +33,10 @@
3033
end
3134
)
3235
lsAmulBt1 = LoopVectorization.loopset(AmulBtq1)
33-
if LoopVectorization.register_count() != 8
36+
if (LoopVectorization.register_count() != 8) && (
37+
(LoopVectorization.pick_vector_width(Float64) != 2) ||
38+
(LoopVectorization.register_count() != 16)
39+
)
3440
@test LoopVectorization.choose_order(lsAmulBt1) ==
3541
(Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
3642
end
@@ -43,7 +49,10 @@
4349
end
4450
)
4551
lsAmulB1 = LoopVectorization.loopset(AmulBq1)
46-
if LoopVectorization.register_count() != 8
52+
if (LoopVectorization.register_count() != 8) && (
53+
(LoopVectorization.pick_vector_width(Float64) != 2) ||
54+
(LoopVectorization.register_count() != 16)
55+
)
4756
@test LoopVectorization.choose_order(lsAmulB1) ==
4857
(Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
4958
end
@@ -56,7 +65,10 @@
5665
end
5766
)
5867
lsAmulB2 = LoopVectorization.loopset(AmulBq2)
59-
if LoopVectorization.register_count() != 8
68+
if (LoopVectorization.register_count() != 8) && (
69+
(LoopVectorization.pick_vector_width(Float64) != 2) ||
70+
(LoopVectorization.register_count() != 16)
71+
)
6072
@test LoopVectorization.choose_order(lsAmulB2) ==
6173
(Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
6274
end
@@ -70,11 +82,12 @@
7082
end
7183
)
7284
lsAmulB3 = LoopVectorization.loopset(AmulBq3)
73-
if LoopVectorization.register_count() != 8
85+
if (LoopVectorization.register_count() != 8) && (
86+
(LoopVectorization.pick_vector_width(Float64) != 2) ||
87+
(LoopVectorization.register_count() != 16)
88+
)
7489
@test LoopVectorization.choose_order(lsAmulB3) ==
7590
(Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
76-
end
77-
if LoopVectorization.register_count() != 8
7891
for (fA, fB, v, Un, Tn) [
7992
(identity, identity, :m, Unum, Tnum),
8093
(adjoint, identity, :k, Unumt, Tnumt),
@@ -177,7 +190,8 @@
177190
end
178191
)
179192
lsAmuladd = LoopVectorization.loopset(Amuladdq)
180-
if LoopVectorization.register_count() != 8
193+
if LoopVectorization.register_count() != 8 &&
194+
LoopVectorization.pick_vector_width(Float64) != 2
181195
@test LoopVectorization.choose_order(lsAmuladd) ==
182196
(Symbol[:n, :m, :k], :m, :n, :m, Unum, Tnum)
183197
end
@@ -410,9 +424,13 @@
410424
@test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 3, 7)
411425
end
412426
elseif LoopVectorization.register_count() == 16
413-
# @test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 1, 6)
414-
# @test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 2, 4)
415-
@test LoopVectorization.choose_order(lsr2amb) == ([:n, :m, :k], :n, :m, :m, 3, 3)
427+
if LoopVectorization.pick_vector_width(Float64) == 4
428+
# @test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 1, 6)
429+
# @test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :m, :n, :m, 2, 4)
430+
@test LoopVectorization.choose_order(lsr2amb) == ([:n, :m, :k], :n, :m, :m, 3, 3)
431+
elseif LoopVectorization.pick_vector_width(Float64) == 2
432+
@test LoopVectorization.choose_order(lsr2amb) == ([:m, :n, :k], :n, :m, :m, 3, 3)
433+
end
416434
end
417435
function rank2AmulBavx!(C, Aₘ, Aₖ, B)
418436
@turbo for m axes(C, 1), n axes(C, 2)

0 commit comments

Comments
 (0)