Skip to content

Commit d3c8762

Browse files
committed
Fix aarch64 assembly for macOS/M1
Give up advocating to use asm_linkage.h to unify assembly work between the platforms and just pepper the file with #ifdef instead. Signed-off-by: Jorgen Lundman <[email protected]>
1 parent ef7d122 commit d3c8762

File tree

2 files changed

+123
-3
lines changed

2 files changed

+123
-3
lines changed

module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S

+43-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333

3434
#if defined(__aarch64__)
3535
.text
36+
#ifndef __APPLE__
3637
.section .note.gnu.property,"a",@note
38+
#endif
3739
.p2align 3
3840
.word 4
3941
.word 16
@@ -47,7 +49,9 @@
4749
.text
4850
.globl zfs_blake3_compress_in_place_sse2
4951
.p2align 2
52+
#ifndef __APPLE__
5053
.type zfs_blake3_compress_in_place_sse2,@function
54+
#endif
5155
zfs_blake3_compress_in_place_sse2:
5256
.cfi_startproc
5357
hint #25
@@ -79,28 +83,43 @@ zfs_blake3_compress_in_place_sse2:
7983
hint #29
8084
ret
8185
.Lfunc_end0:
82-
.size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-zfs_blake3_compress_in_place_sse2
86+
#ifndef __APPLE__
87+
.size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-zfs_blake3_compress_in_place_sse2
88+
#endif
8389
.cfi_endproc
8490

91+
92+
#ifndef __APPLE__
8593
.section .rodata.cst16,"aM",@progbits,16
94+
#endif
8695
.p2align 4
8796
.LCPI1_0:
8897
.xword -4942790177982912921
8998
.xword -6534734903820487822
9099
.text
91100
.p2align 2
101+
#ifndef __APPLE__
92102
.type compress_pre,@function
103+
#endif
93104
compress_pre:
94105
.cfi_startproc
95106
hint #34
96107
fmov s1, w3
97108
movi d0, #0x0000ff000000ff
98109
ldr q2, [x1]
99110
fmov d3, x4
111+
#ifndef __APPLE__
100112
adrp x8, .LCPI1_0
113+
#else
114+
adrp x8, .LCPI1_0@PAGE
115+
#endif
101116
mov v1.s[1], w5
102117
str q2, [x0]
118+
#ifndef __APPLE__
103119
ldr q4, [x8, :lo12:.LCPI1_0]
120+
#else
121+
ldr q4, [x8, :lo12:.LCPI1_0@PAGEOFF]
122+
#endif
104123
add x8, x2, #32
105124
ldr q5, [x1, #16]
106125
and v0.8b, v1.8b, v0.8b
@@ -546,12 +565,16 @@ compress_pre:
546565
stp q0, q1, [x0]
547566
ret
548567
.Lfunc_end1:
568+
#ifndef __APPLE__
549569
.size compress_pre, .Lfunc_end1-compress_pre
570+
#endif
550571
.cfi_endproc
551572

552573
.globl zfs_blake3_compress_xof_sse2
553574
.p2align 2
575+
#ifndef __APPLE__
554576
.type zfs_blake3_compress_xof_sse2,@function
577+
#endif
555578
zfs_blake3_compress_xof_sse2:
556579
.cfi_startproc
557580
hint #25
@@ -591,10 +614,14 @@ zfs_blake3_compress_xof_sse2:
591614
hint #29
592615
ret
593616
.Lfunc_end2:
617+
#ifndef __APPLE__
594618
.size zfs_blake3_compress_xof_sse2, .Lfunc_end2-zfs_blake3_compress_xof_sse2
619+
#endif
595620
.cfi_endproc
596621

622+
#ifndef __APPLE__
597623
.section .rodata.cst16,"aM",@progbits,16
624+
#endif
598625
.p2align 4
599626
.LCPI3_0:
600627
.word 0
@@ -604,7 +631,9 @@ zfs_blake3_compress_xof_sse2:
604631
.text
605632
.globl zfs_blake3_hash_many_sse2
606633
.p2align 2
634+
#ifndef __APPLE__
607635
.type zfs_blake3_hash_many_sse2,@function
636+
#endif
608637
zfs_blake3_hash_many_sse2:
609638
.cfi_startproc
610639
hint #25
@@ -650,13 +679,21 @@ zfs_blake3_hash_many_sse2:
650679
cmp x1, #4
651680
str x3, [sp, #40]
652681
b.lo .LBB3_6
682+
#ifndef __APPLE__
653683
adrp x8, .LCPI3_0
684+
#else
685+
adrp x8, .LCPI3_0@PAGE
686+
#endif
654687
sbfx w9, w5, #0, #1
655688
mov w10, #44677
656689
mov w11, #62322
657690
movk w10, #47975, lsl #16
658691
movk w11, #15470, lsl #16
692+
#ifndef __APPLE__
659693
ldr q0, [x8, :lo12:.LCPI3_0]
694+
#else
695+
ldr q0, [x8, :lo12:.LCPI3_0@PAGEOFF]
696+
#endif
660697
dup v1.4s, w9
661698
mov w9, #58983
662699
orr w8, w7, w19
@@ -2055,7 +2092,11 @@ zfs_blake3_hash_many_sse2:
20552092
hint #29
20562093
ret
20572094
.Lfunc_end3:
2095+
#ifndef __APPLE__
20582096
.size zfs_blake3_hash_many_sse2, .Lfunc_end3-zfs_blake3_hash_many_sse2
2097+
#endif
20592098
.cfi_endproc
2099+
#ifndef __APPLE__
20602100
.section ".note.GNU-stack","",@progbits
2061-
#endif
2101+
#endif
2102+
#endif

module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S

+80-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333

3434
#if defined(__aarch64__)
3535
.text
36+
#ifndef __APPLE__
3637
.section .note.gnu.property,"a",@note
38+
#endif
3739
.p2align 3
3840
.word 4
3941
.word 16
@@ -47,7 +49,9 @@
4749
.text
4850
.globl zfs_blake3_compress_in_place_sse41
4951
.p2align 2
52+
#ifndef __APPLE__
5053
.type zfs_blake3_compress_in_place_sse41,@function
54+
#endif
5155
zfs_blake3_compress_in_place_sse41:
5256
.cfi_startproc
5357
hint #25
@@ -79,10 +83,14 @@ zfs_blake3_compress_in_place_sse41:
7983
hint #29
8084
ret
8185
.Lfunc_end0:
86+
#ifndef __APPLE__
8287
.size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
88+
#endif
8389
.cfi_endproc
8490

91+
#ifndef __APPLE__
8592
.section .rodata.cst16,"aM",@progbits,16
93+
#endif
8694
.p2align 4
8795
.LCPI1_0:
8896
.xword -4942790177982912921
@@ -123,19 +131,33 @@ zfs_blake3_compress_in_place_sse41:
123131
.byte 12
124132
.text
125133
.p2align 2
134+
#ifndef __APPLE__
126135
.type compress_pre,@function
136+
#endif
127137
compress_pre:
128138
.cfi_startproc
129139
hint #34
130140
fmov s1, w3
131141
movi d0, #0x0000ff000000ff
132142
ldr q2, [x1]
143+
#ifndef __APPLE__
133144
adrp x8, .LCPI1_0
145+
#else
146+
adrp x8, .LCPI1_0@PAGE
147+
#endif
134148
mov v1.s[1], w5
135149
str q2, [x0]
150+
#ifndef __APPLE__
136151
ldr q4, [x8, :lo12:.LCPI1_0]
152+
#else
153+
ldr q4, [x8, :lo12:.LCPI1_0@PAGEOFF]
154+
#endif
137155
ldr q5, [x1, #16]
156+
#ifndef __APPLE__
138157
adrp x8, .LCPI1_1
158+
#else
159+
adrp x8, .LCPI1_1@PAGE
160+
#endif
139161
and v0.8b, v1.8b, v0.8b
140162
fmov d1, x4
141163
stp q5, q4, [x0, #16]
@@ -146,8 +168,13 @@ compress_pre:
146168
add v0.4s, v2.4s, v3.4s
147169
uzp2 v2.4s, v6.4s, v7.4s
148170
add v16.4s, v0.4s, v5.4s
171+
#ifndef __APPLE__
149172
ldr q0, [x8, :lo12:.LCPI1_1]
150173
adrp x8, .LCPI1_2
174+
#else
175+
ldr q0, [x8, :lo12:.LCPI1_1@PAGEOFF]
176+
adrp x8, .LCPI1_2@PAGE
177+
#endif
151178
eor v1.16b, v16.16b, v1.16b
152179
add v7.4s, v16.4s, v2.4s
153180
tbl v1.16b, { v1.16b }, v0.16b
@@ -158,7 +185,11 @@ compress_pre:
158185
orr v5.16b, v5.16b, v6.16b
159186
add v6.4s, v7.4s, v5.4s
160187
eor v7.16b, v1.16b, v6.16b
188+
#ifndef __APPLE__
161189
ldr q1, [x8, :lo12:.LCPI1_2]
190+
#else
191+
ldr q1, [x8, :lo12:.LCPI1_2@PAGEOFF]
192+
#endif
162193
add x8, x2, #32
163194
tbl v7.16b, { v7.16b }, v1.16b
164195
ld2 { v16.4s, v17.4s }, [x8]
@@ -556,12 +587,16 @@ compress_pre:
556587
stp q2, q3, [x0]
557588
ret
558589
.Lfunc_end1:
590+
#ifndef __APPLE__
559591
.size compress_pre, .Lfunc_end1-compress_pre
592+
#endif
560593
.cfi_endproc
561594

562595
.globl zfs_blake3_compress_xof_sse41
563596
.p2align 2
597+
#ifndef __APPLE__
564598
.type zfs_blake3_compress_xof_sse41,@function
599+
#endif
565600
zfs_blake3_compress_xof_sse41:
566601
.cfi_startproc
567602
hint #25
@@ -601,10 +636,14 @@ zfs_blake3_compress_xof_sse41:
601636
hint #29
602637
ret
603638
.Lfunc_end2:
639+
#ifndef __APPLE__
604640
.size zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41
641+
#endif
605642
.cfi_endproc
606643

644+
#ifndef __APPLE__
607645
.section .rodata.cst16,"aM",@progbits,16
646+
#endif
608647
.p2align 4
609648
.LCPI3_0:
610649
.word 0
@@ -653,7 +692,9 @@ zfs_blake3_compress_xof_sse41:
653692
.text
654693
.globl zfs_blake3_hash_many_sse41
655694
.p2align 2
695+
#ifndef __APPLE__
656696
.type zfs_blake3_hash_many_sse41,@function
697+
#endif
657698
zfs_blake3_hash_many_sse41:
658699
.cfi_startproc
659700
hint #34
@@ -687,25 +728,45 @@ zfs_blake3_hash_many_sse41:
687728
.cfi_offset b14, -136
688729
.cfi_offset b15, -144
689730
ldr x8, [sp, #520]
731+
#ifndef __APPLE__
690732
adrp x11, .LCPI3_1
733+
#else
734+
adrp x11, .LCPI3_1@PAGE
735+
#endif
691736
ldrb w9, [sp, #512]
737+
#ifndef __APPLE__
692738
adrp x10, .LCPI3_2
739+
#else
740+
adrp x10, .LCPI3_2@PAGE
741+
#endif
693742
cmp x1, #4
694743
b.lo .LBB3_6
744+
#ifndef __APPLE__
695745
adrp x12, .LCPI3_0
746+
#else
747+
adrp x12, .LCPI3_0@PAGE
748+
#endif
696749
sbfx w13, w5, #0, #1
697750
mov w15, #58983
698751
mov w16, #44677
699752
movk w15, #27145, lsl #16
700753
movk w16, #47975, lsl #16
754+
#ifndef __APPLE__
701755
ldr q0, [x12, :lo12:.LCPI3_0]
756+
#else
757+
ldr q0, [x12, :lo12:.LCPI3_0@PAGEOFF]
758+
#endif
702759
dup v1.4s, w13
703760
movi v13.4s, #64
704761
mov w13, #62322
705762
mov w14, #62778
706763
orr w12, w7, w6
707764
and v0.16b, v1.16b, v0.16b
765+
#ifndef __APPLE__
708766
ldr q1, [x11, :lo12:.LCPI3_1]
767+
#else
768+
ldr q1, [x11, :lo12:.LCPI3_1@PAGEOFF]
769+
#endif
709770
movk w13, #15470, lsl #16
710771
movk w14, #42319, lsl #16
711772
dup v14.4s, w15
@@ -876,7 +937,11 @@ zfs_blake3_hash_many_sse41:
876937
ushr v8.4s, v25.4s, #12
877938
shl v25.4s, v25.4s, #20
878939
orr v3.16b, v20.16b, v18.16b
940+
#ifndef __APPLE__
879941
ldr q18, [x10, :lo12:.LCPI3_2]
942+
#else
943+
ldr q18, [x10, :lo12:.LCPI3_2@PAGEOFF]
944+
#endif
880945
orr v13.16b, v17.16b, v26.16b
881946
orr v24.16b, v24.16b, v29.16b
882947
orr v14.16b, v25.16b, v8.16b
@@ -1935,11 +2000,21 @@ zfs_blake3_hash_many_sse41:
19352000
b .LBB3_2
19362001
.LBB3_6:
19372002
cbz x1, .LBB3_14
2003+
#ifndef __APPLE__
19382004
adrp x12, .LCPI3_3
19392005
ldr q0, [x11, :lo12:.LCPI3_1]
2006+
#else
2007+
adrp x12, .LCPI3_3@PAGE
2008+
ldr q0, [x11, :lo12:.LCPI3_1@PAGEOFF]
2009+
#endif
19402010
orr w11, w7, w6
2011+
#ifndef __APPLE__
19412012
ldr q2, [x10, :lo12:.LCPI3_2]
19422013
ldr q1, [x12, :lo12:.LCPI3_3]
2014+
#else
2015+
ldr q2, [x10, :lo12:.LCPI3_2@PAGEOFF]
2016+
ldr q1, [x12, :lo12:.LCPI3_3@PAGEOFF]
2017+
#endif
19432018
and x12, x5, #0x1
19442019
.LBB3_8:
19452020
movi v3.4s, #64
@@ -2392,7 +2467,11 @@ zfs_blake3_hash_many_sse41:
23922467
ldp d15, d14, [sp], #144
23932468
ret
23942469
.Lfunc_end3:
2470+
#ifndef __APPLE__
23952471
.size zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41
2472+
#endif
23962473
.cfi_endproc
2474+
#ifndef __APPLE__
23972475
.section ".note.GNU-stack","",@progbits
2398-
#endif
2476+
#endif
2477+
#endif

0 commit comments

Comments
 (0)