Skip to content

Commit

Permalink
[DeviceSaniitizer] Force outline call for setting private shadow memo…
Browse files Browse the repository at this point in the history
…ry (#14818)

By default, address sanitizer will inline call for setting private
shadow memory with small size. However, if work group size is too large,
the private shadow memory may allocate failed. We need to check if
shadow base is null before trying to poison it.

---------

Co-authored-by: Yang Zhao <[email protected]>
  • Loading branch information
zhaomaosu and AllanZyne authored Jul 31, 2024
1 parent a66958b commit f203826
Show file tree
Hide file tree
Showing 10 changed files with 101 additions and 23 deletions.
31 changes: 31 additions & 0 deletions libdevice/sanitizer_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -862,4 +862,35 @@ __asan_set_shadow_dynamic_local(uptr ptr, uint32_t num_args) {
__spirv_ocl_printf(__mem_set_shadow_dynamic_local_end);
}

///
/// ASAN initialize shdadow memory of private memory
///

static __SYCL_CONSTANT__ const char __mem_set_shadow_private_begin[] =
"[kernel] BEGIN __asan_set_shadow_private\n";
static __SYCL_CONSTANT__ const char __mem_set_shadow_private_end[] =
"[kernel] END __asan_set_shadow_private\n";
static __SYCL_CONSTANT__ const char __mem_set_shadow_private[] =
"[kernel] set_shadow_private(beg=%p, end=%p, val:%02X)\n";

DEVICE_EXTERN_C_NOINLINE void __asan_set_shadow_private(uptr begin, uptr size,
char val) {
if (__AsanDebug)
__spirv_ocl_printf(__mem_set_shadow_private_begin);

auto *launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo;
if (launch_info->PrivateShadowOffset == 0)
return;

if (__AsanDebug)
__spirv_ocl_printf(__mem_set_shadow_private, (void *)begin,
(void *)(begin + size), val & 0xFF);

for (size_t i = 0; i < size; i++)
((__SYCL_GLOBAL__ u8 *)begin)[i] = val;

if (__AsanDebug)
__spirv_ocl_printf(__mem_set_shadow_private_end);
}

#endif // __SPIR__ || __SPIRV__
37 changes: 27 additions & 10 deletions llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
FunctionCallee AsanSetShadowFunc[0x100] = {};
FunctionCallee AsanSetShadowPrivateFunc;
FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc;
FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc;

Expand Down Expand Up @@ -1257,10 +1258,11 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// ShadowMask is not zero. If ShadowMask[i] is zero, we assume that
// ShadowBytes[i] is constantly zero and doesn't need to be overwritten.
void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
IRBuilder<> &IRB, Value *ShadowBase);
IRBuilder<> &IRB, Value *ShadowBase,
bool ForceOutline = false);
void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
size_t Begin, size_t End, IRBuilder<> &IRB,
Value *ShadowBase);
Value *ShadowBase, bool ForceOutline = false);
void copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
ArrayRef<uint8_t> ShadowBytes, size_t Begin,
size_t End, IRBuilder<> &IRB, Value *ShadowBase);
Expand Down Expand Up @@ -3593,6 +3595,9 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
AsanSetShadowFunc[Val] =
M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy);
}
AsanSetShadowPrivateFunc =
M.getOrInsertFunction("__asan_set_shadow_private", IRB.getVoidTy(),
IntptrTy, IntptrTy, IRB.getInt8Ty());

AsanAllocaPoisonFunc = M.getOrInsertFunction(
kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
Expand Down Expand Up @@ -3655,14 +3660,17 @@ void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,

void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
ArrayRef<uint8_t> ShadowBytes,
IRBuilder<> &IRB, Value *ShadowBase) {
copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase);
IRBuilder<> &IRB, Value *ShadowBase,
bool ForceOutline) {
copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase,
ForceOutline);
}

void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
ArrayRef<uint8_t> ShadowBytes,
size_t Begin, size_t End,
IRBuilder<> &IRB, Value *ShadowBase) {
IRBuilder<> &IRB, Value *ShadowBase,
bool ForceOutline) {
assert(ShadowMask.size() == ShadowBytes.size());
size_t Done = Begin;
for (size_t i = Begin, j = Begin + 1; i < End; i = j++) {
Expand All @@ -3671,14 +3679,20 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
continue;
}
uint8_t Val = ShadowBytes[i];
if (!AsanSetShadowFunc[Val])
if (!AsanSetShadowFunc[Val] && !ForceOutline)
continue;

// Skip same values.
for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) {
}

if (j - i >= ASan.MaxInlinePoisoningSize) {
if (ForceOutline) {
RTCI.createRuntimeCall(
IRB, AsanSetShadowPrivateFunc,
{IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)),
ConstantInt::get(IntptrTy, j - i),
ConstantInt::get(IRB.getInt8Ty(), Val)});
} else if (j - i >= ASan.MaxInlinePoisoningSize) {
copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase);
RTCI.createRuntimeCall(
IRB, AsanSetShadowFunc[Val],
Expand All @@ -3688,7 +3702,8 @@ void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
}
}

copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase);
if (!ForceOutline)
copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase);
}

// Fake stack allocator (asan_fake_stack.h) has 11 size classes
Expand Down Expand Up @@ -4062,7 +4077,8 @@ void FunctionStackPoisoner::processStaticAllocas() {
ASan.memToShadow(LocalStackBase, IRB, kSpirOffloadPrivateAS);
// As mask we must use most poisoned case: red zones and after scope.
// As bytes we can use either the same or just red zones only.
copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase);
copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase,
TargetTriple.isSPIROrSPIRV());

if (!StaticAllocaPoisonCallVec.empty()) {
const auto &ShadowInScope = GetShadowBytes(SVD, L);
Expand Down Expand Up @@ -4132,7 +4148,8 @@ void FunctionStackPoisoner::processStaticAllocas() {
IRBuilder<> IRBElse(ElseTerm);
copyToShadow(ShadowAfterScope, ShadowClean, IRBElse, ShadowBase);
} else {
copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase);
copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase,
TargetTriple.isSPIROrSPIRV());
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s
; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-mapping-scale=4 -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
Expand All @@ -21,11 +21,15 @@ define spir_kernel void @kernel() #0 {
entry:
%p.i = alloca [4 x i32], align 4
; CHECK: %shadow_ptr = call i64 @__asan_mem_to_shadow(i64 %0, i32 0)
; CHECK: call void @__asan_set_shadow_private(i64 %4, i64 2, i8 -15)
; CHECK: call void @__asan_set_shadow_private(i64 %5, i64 1, i8 -13)
call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %p.i)
call void @llvm.memcpy.p0.p1.i64(ptr align 4 %p.i, ptr addrspace(1) align 4 @__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p, i64 16, i1 false)
%arraydecay.i = getelementptr inbounds [4 x i32], ptr %p.i, i64 0, i64 0
%0 = addrspacecast ptr %arraydecay.i to ptr addrspace(4)
%call.i = call spir_func i32 @_Z3fooPii(ptr addrspace(4) %0)
; CHECK: call void @__asan_set_shadow_private(i64 %7, i64 2, i8 0)
; CHECK: call void @__asan_set_shadow_private(i64 %8, i64 1, i8 0)
ret void
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// REQUIRES: linux
// RUN: %{build} %device_asan_flags -O2 -g -o %t
// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK %s

#include <sycl/detail/core.hpp>

#include <sycl/usm.hpp>

int main() {
sycl::queue Q;
constexpr std::size_t N = 12345678;
auto *array = sycl::malloc_device<char>(N, Q);

Q.submit([&](sycl::handler &h) {
h.parallel_for<class MyKernelR_4>(
sycl::nd_range<1>(N + 1, 1),
[=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; });
});
Q.wait();
// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
// CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12345678, 0, 0\)}}
// CHECK: {{ #0 .* .*large_group_size.cpp:}}[[@LINE-5]]

sycl::free(array, Q);
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

int main() {
sycl::queue Q;
constexpr std::size_t N = 12345;
constexpr std::size_t N = 12;
#if defined(MALLOC_HOST)
auto *array = sycl::malloc_host<char>(N, Q);
#elif defined(MALLOC_SHARED)
Expand All @@ -34,7 +34,7 @@ int main() {
// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
// CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
// CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
// CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12345, 0, 0\)}}
// CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
// CHECK: {{ #0 .* .*parallel_for_char.cpp:}}[[@LINE-7]]

sycl::free(array, Q);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

int main() {
sycl::queue Q;
constexpr std::size_t N = 123456;
constexpr std::size_t N = 12;
#if defined(MALLOC_HOST)
auto *array = sycl::malloc_host<double>(N, Q);
#elif defined(MALLOC_SHARED)
Expand All @@ -34,7 +34,7 @@ int main() {
// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
// CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
// CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
// CHECK: {{READ of size 8 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(123456, 0, 0\)}}
// CHECK: {{READ of size 8 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
// CHECK: {{ #0 .* .*parallel_for_double.cpp:}}[[@LINE-7]]

sycl::free(array, Q);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ __attribute__((noinline)) void foo(int *array, size_t i) { array[i] = 1; }
// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
// CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
// CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
// CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(0, 0, 0\) GID\(123, 0, 0\)}}
// CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
// CHECK: {{ #0 foo\(int\*, unsigned long\) .*parallel_for_func.cpp:}}[[@LINE-5]]

int main() {
sycl::queue Q;
constexpr std::size_t N = 123;
constexpr std::size_t N = 12;
#if defined(MALLOC_HOST)
auto *array = sycl::malloc_host<int>(N, Q);
#elif defined(MALLOC_SHARED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

int main() {
sycl::queue Q;
constexpr std::size_t N = 512;
constexpr std::size_t N = 12;
#if defined(MALLOC_HOST)
auto *array = sycl::malloc_host<int>(N, Q);
#elif defined(MALLOC_SHARED)
Expand All @@ -34,7 +34,7 @@ int main() {
// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
// CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
// CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
// CHECK: {{READ of size 4 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(512, 0, 0\)}}
// CHECK: {{READ of size 4 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
// CHECK: {{ #0 .* .*parallel_for_int.cpp:}}[[@LINE-7]]

sycl::free(array, Q);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

int main() {
sycl::queue Q;
constexpr std::size_t N = 1024;
constexpr std::size_t N = 12;
#if defined(MALLOC_HOST)
auto *array = sycl::malloc_host<short>(N, Q);
#elif defined(MALLOC_SHARED)
Expand All @@ -34,7 +34,7 @@ int main() {
// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
// CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
// CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
// CHECK: {{READ of size 2 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(1024, 0, 0\)}}
// CHECK: {{READ of size 2 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12, 0, 0\)}}
// CHECK: {{ #0 .* .*parallel_for_short.cpp:}}[[@LINE-7]]

sycl::free(array, Q);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

int main() {
sycl::queue Q;
constexpr std::size_t N = 12345;
constexpr std::size_t N = 12;
#if defined(MALLOC_HOST)
auto *array = sycl::malloc_host<char>(N, Q);
#elif defined(MALLOC_SHARED)
Expand All @@ -33,7 +33,7 @@ int main() {
// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM
// CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM
// CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM
// CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12345, 0, 0)
// CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12, 0, 0)
// CHECK: {{ #0 .* .*parallel_no_local_size.cpp:}}[[@LINE-7]]

sycl::free(array, Q);
Expand Down

0 comments on commit f203826

Please sign in to comment.