Skip to content

Commit 97c09ac

Browse files
committed
pulley: Add some disas tests of suboptimal codegen patterns
1 parent 887e5c9 commit 97c09ac

File tree

2 files changed

+173
-0
lines changed

2 files changed

+173
-0
lines changed

tests/disas/pulley/coremark-1.wat

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
;;! target = "pulley64"
2+
;;! test = "compile"
3+
4+
;; Small test of a loop extracted from "coremark-minimal.wasm" here:
5+
;; https://github.com/wasmi-labs/wasmi-benchmarks/blob/d045a88246d3ac9b0b80b188feda54b89ca126b5/benches/res/wasm/coremark-minimal.wasm
6+
;;
7+
;; This doesn't reproduce the exact regalloc decisions but does currently show
8+
;; something suboptimal for Pulley which is at the end of the loop it's
9+
;; currently:
10+
;;
11+
;; * `br_if_not32` to exit the loop
12+
;; * `xmov` to move some registers in place
13+
;; * `jump` to resume the loop
14+
;;
15+
;; Ideally to minimize Pulley opcodes this would skip the `xmov` and `jump`
16+
;; with different register allocation and the back-edge would be a single
17+
;; conditional branch.
18+
19+
(module
20+
(memory 10)
21+
(func (param $p1 i32) (param $p2 i32) (param $cnt i32)
22+
(param $stride i32)
23+
(result i32)
24+
(local $accum i32)
25+
loop
26+
local.get $accum
27+
28+
local.get $p1
29+
i32.load16_u
30+
local.get $p2
31+
i32.load16_u
32+
i32.mul
33+
local.tee $accum
34+
i32.const 2
35+
i32.shr_u
36+
i32.const 15
37+
i32.and
38+
local.get $accum
39+
i32.const 5
40+
i32.shr_u
41+
i32.const 127
42+
i32.and
43+
i32.mul
44+
i32.add
45+
local.set $accum
46+
47+
local.get $p2
48+
i32.const 2
49+
i32.add
50+
local.set $p2
51+
52+
local.get $p1
53+
local.get $stride
54+
i32.add
55+
local.set $p1
56+
57+
local.get $cnt
58+
i32.const -1
59+
i32.add
60+
local.tee $cnt
61+
62+
br_if 0
63+
end
64+
65+
call $other
66+
67+
(local.get $accum)
68+
)
69+
70+
(func $other)
71+
)
72+
;; wasm[0]::function[0]:
73+
;; push_frame_save 16, x16
74+
;; xzero x6
75+
;; xload64le_offset8 x11, x0, 80
76+
;; xload64le_offset8 x12, x0, 88
77+
;; xbc32_bound_trap x2, x12, 2
78+
;; xload16le_u32_g32 x13, x11, x2, 0
79+
;; xbc32_bound_trap x3, x12, 2
80+
;; xload16le_u32_g32 x14, x11, x3, 0
81+
;; xsub32_u8 x4, x4, 1
82+
;; xmul32 x14, x13, x14
83+
;; xshr32_u_u6 x15, x14, 2
84+
;; xband32_s8 x15, x15, 15
85+
;; xshr32_u_u6 x14, x14, 5
86+
;; xband32_s8 x14, x14, 127
87+
;; xmadd32 x6, x15, x14, x6
88+
;; xmov x16, x6
89+
;; xadd32 x2, x2, x5
90+
;; xadd32_u8 x3, x3, 2
91+
;; br_if_not32 x4, 0xe // target = 0x53
92+
;; 4b: xmov x6, x16
93+
;; jump -0x43 // target = 0xb
94+
;; 53: call2 x0, x0, 0x10 // target = 0x63
95+
;; xmov x0, x16
96+
;; pop_frame_restore 16, x16
97+
;; ret
98+
;;
99+
;; wasm[0]::function[1]::other:
100+
;; push_frame
101+
;; pop_frame
102+
;; ret

tests/disas/pulley/fib.wat

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
;;! target = "pulley64"
2+
;;! test = "compile"
3+
4+
;; Test of a recursive fibonacci routine and its codegen
5+
;;
6+
;; FIXME(#9942) this test currently has an extraneous `xmov` after the second
7+
;; call instruction.
8+
9+
(module
10+
(func $fib (export "fib") (param $n i32) (result i32)
11+
(if (result i32)
12+
(i32.eq
13+
(i32.const 0)
14+
(local.get $n)
15+
)
16+
(then
17+
(i32.const 1)
18+
)
19+
(else
20+
(if (result i32)
21+
(i32.eq
22+
(i32.const 1)
23+
(local.get $n)
24+
)
25+
(then
26+
(i32.const 1)
27+
)
28+
(else
29+
(i32.add
30+
;; fib(n - 1)
31+
(call $fib
32+
(i32.add
33+
(local.get $n)
34+
(i32.const -1)
35+
)
36+
)
37+
;; fib(n - 2)
38+
(call $fib
39+
(i32.add
40+
(local.get $n)
41+
(i32.const -2)
42+
)
43+
)
44+
)
45+
)
46+
)
47+
)
48+
)
49+
)
50+
)
51+
;; wasm[0]::function[0]::fib:
52+
;; push_frame_save 32, x17, x24, x29
53+
;; br_if_xeq32_i8 x2, 0, 0x47 // target = 0x4c
54+
;; br_if_xeq32_i8 x2, 1, 0x39 // target = 0x45
55+
;; 13: xsub32_u8 x14, x2, 1
56+
;; xmov x24, x0
57+
;; xmov x29, x2
58+
;; call3 x24, x24, x14, -0x1d // target = 0x0
59+
;; xmov x17, x0
60+
;; xmov x2, x29
61+
;; xmov x0, x24
62+
;; xsub32_u8 x14, x2, 2
63+
;; call3 x0, x0, x14, -0x32 // target = 0x0
64+
;; xmov x5, x17
65+
;; xadd32 x0, x5, x0
66+
;; jump 0xe // target = 0x4e
67+
;; 45: xone x0
68+
;; jump 0x7 // target = 0x4e
69+
;; 4c: xone x0
70+
;; pop_frame_restore 32, x17, x24, x29
71+
;; ret

0 commit comments

Comments
 (0)