Skip to content

Commit

Permalink
Test asm slice reads/writes in race tests (#286)
Browse files Browse the repository at this point in the history
* Test asm slice reads/writes in race tests
* Update CI and go.mod

When calling asm functions in race mode, check inputs and outputs for races.
  • Loading branch information
klauspost committed Aug 31, 2024
1 parent 67157af commit 6a9df69
Show file tree
Hide file tree
Showing 15 changed files with 329 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
build:
strategy:
matrix:
go-version: [1.20.x, 1.21.x, 1.22.x]
go-version: [1.21.x, 1.22.x, 1.23.x]
os: [ubuntu-latest, macos-latest, windows-latest]
env:
CGO_ENABLED: 0
Expand Down
40 changes: 36 additions & 4 deletions _gen/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,14 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma
if pshufb {
w.WriteString(`
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
n := stop-start
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) (n int) {
n = stop - start
if raceEnabled {
defer func() {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}()
}
`)

Expand All @@ -197,8 +203,14 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
}
func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
n := (stop-start)
func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) (n int) {
n = stop - start
if raceEnabled {
defer func() {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}()
}
`)

Expand All @@ -223,6 +235,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
n := (stop-start) & (maxInt - (64 - 1))
if raceEnabled {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}
`)

w.WriteString(`switch len(in) {
Expand All @@ -242,6 +259,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
n := (stop-start) & (maxInt - (64 - 1))
if raceEnabled {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}
`)

w.WriteString(`switch len(in) {
Expand All @@ -264,6 +286,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int
func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
n := (stop-start) & (maxInt - (32 - 1))
if raceEnabled {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}
`)

w.WriteString(`switch len(in) {
Expand All @@ -283,6 +310,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int
func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
n := (stop-start) & (maxInt - (32 - 1))
if raceEnabled {
raceReadSlices(in, start, n)
raceWriteSlices(out, start, n)
}
`)

w.WriteString(`switch len(in) {
Expand Down
117 changes: 102 additions & 15 deletions galois_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,32 @@ func galMulSlice(c byte, in, out []byte, o *options) {
}
if o.useAVX2 {
if len(in) >= bigSwitchover {
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
if len(in) > 32 {
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 5) << 5
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
} else if o.useSSSE3 {
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 4) << 4
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
Expand All @@ -85,20 +97,32 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {

if o.useAVX2 {
if len(in) >= bigSwitchover {
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
if len(in) >= 32 {
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 5) << 5
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
} else if o.useSSSE3 {
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 4) << 4
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
in = in[done:]
out = out[done:]
}
Expand All @@ -117,20 +141,32 @@ func sliceXor(in, out []byte, o *options) {
if o.useSSE2 {
if len(in) >= bigSwitchover {
if o.useAVX2 {
avx2XorSlice_64(in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
avx2XorSlice_64(in, out)
in = in[done:]
out = out[done:]
} else {
sSE2XorSlice_64(in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
sSE2XorSlice_64(in, out)
in = in[done:]
out = out[done:]
}
}
if len(in) >= 16 {
sSE2XorSlice(in, out)
done := (len(in) >> 4) << 4
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
sSE2XorSlice(in, out)
in = in[done:]
out = out[done:]
}
Expand Down Expand Up @@ -462,9 +498,17 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) {
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}
fftDIT2_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}
fftDIT2_ssse3(x, y, tmp)
} else {
// Reference version:
Expand All @@ -480,11 +524,15 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) {
}

if o.useAVX2 {
done := (len(y) >> 6) << 6
if raceEnabled {
raceReadSlice(y[:done])
raceWriteSlice(x[:done])
}
fftDIT28_avx2(x, y, &multiply256LUT8[log_m])
if len(x)&63 == 0 {
return
}
done := (len(y) >> 6) << 6
y = y[done:]
x = x[done:]
}
Expand All @@ -499,11 +547,15 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
}

if o.useAVX2 {
done := (len(y) >> 6) << 6
if raceEnabled {
raceReadSlice(y[:done])
raceWriteSlice(x[:done])
}
ifftDIT28_avx2(x, y, &multiply256LUT8[log_m])
if len(x)&63 == 0 {
return
}
done := (len(y) >> 6) << 6
y = y[done:]
x = x[done:]
}
Expand All @@ -514,14 +566,22 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
func mulAdd8(x, y []byte, log_m ffe8, o *options) {
if o.useAVX2 {
t := &multiply256LUT8[log_m]
galMulAVX2Xor_64(t[:16], t[16:32], y, x)
done := (len(y) >> 6) << 6
if raceEnabled {
raceReadSlice(y[:done])
raceWriteSlice(x[:done])
}
galMulAVX2Xor_64(t[:16], t[16:32], y, x)
y = y[done:]
x = x[done:]
} else if o.useSSSE3 {
t := &multiply256LUT8[log_m]
galMulSSSE3Xor(t[:16], t[16:32], y, x)
done := (len(y) >> 4) << 4
if raceEnabled {
raceReadSlice(y[:done])
raceWriteSlice(x[:done])
}
galMulSSSE3Xor(t[:16], t[16:32], y, x)
y = y[done:]
x = x[done:]
}
Expand All @@ -535,9 +595,19 @@ func ifftDIT2(x, y []byte, log_m ffe, o *options) {
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}

ifftDIT2_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}

ifftDIT2_ssse3(x, y, tmp)
} else {
// Reference version:
Expand All @@ -552,9 +622,17 @@ func mulgf16(x, y []byte, log_m ffe, o *options) {
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}
mulgf16_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
if raceEnabled {
raceReadSlice(y)
raceWriteSlice(x)
}
mulgf16_ssse3(x, y, tmp)
} else {
refMul(x, y, log_m)
Expand All @@ -564,14 +642,23 @@ func mulgf16(x, y []byte, log_m ffe, o *options) {
func mulgf8(out, in []byte, log_m ffe8, o *options) {
if o.useAVX2 {
t := &multiply256LUT8[log_m]
galMulAVX2_64(t[:16], t[16:32], in, out)
done := (len(in) >> 6) << 6
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}

galMulAVX2_64(t[:16], t[16:32], in, out)
in = in[done:]
out = out[done:]
} else if o.useSSSE3 {
t := &multiply256LUT8[log_m]
galMulSSSE3(t[:16], t[16:32], in, out)
done := (len(in) >> 4) << 4
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulSSSE3(t[:16], t[16:32], in, out)
in = in[done:]
out = out[done:]
}
Expand Down
13 changes: 10 additions & 3 deletions galois_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ func galMulSlice(c byte, in, out []byte, o *options) {
return
}
var done int
galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)

remain := len(in) - done
if remain > 0 {
Expand All @@ -50,9 +54,12 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
sliceXor(in, out, o)
return
}
var done int
done := (len(in) >> 5) << 5
if raceEnabled {
raceReadSlice(in[:done])
raceWriteSlice(out[:done])
}
galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5

remain := len(in) - done
if remain > 0 {
Expand Down
Loading

0 comments on commit 6a9df69

Please sign in to comment.