Skip to content

Commit 27d0861

Browse files
committed
Revert "[OpenMP] Replace AMDGPU fences with generic scoped fences (#119619)"
This reverts commit f4ee5a6.
1 parent 363d50e commit 27d0861

File tree

2 files changed

+39
-13
lines changed

2 files changed

+39
-13
lines changed

Diff for: offload/DeviceRTL/include/Synchronization.h

-8
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,6 @@ enum OrderingTy {
2626
seq_cst = __ATOMIC_SEQ_CST,
2727
};
2828

29-
enum ScopeTy {
30-
system = __MEMORY_SCOPE_SYSTEM,
31-
device_ = __MEMORY_SCOPE_DEVICE,
32-
workgroup = __MEMORY_SCOPE_WRKGRP,
33-
wavefront = __MEMORY_SCOPE_WVFRNT,
34-
single = __MEMORY_SCOPE_SINGLE,
35-
};
36-
3729
enum MemScopeTy {
3830
all, // All threads on all devices
3931
device, // All threads on the device

Diff for: offload/DeviceRTL/src/Synchronization.cpp

+39-5
Original file line numberDiff line numberDiff line change
@@ -232,16 +232,50 @@ void namedBarrier() {
232232
fence::team(atomic::release);
233233
}
234234

235+
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
236+
// so that it is usable within a template environment and so that a runtime
237+
// value of the memory order is expanded to this switch within clang/llvm.
235238
void fenceTeam(atomic::OrderingTy Ordering) {
236-
return __scoped_atomic_thread_fence(Ordering, atomic::workgroup);
239+
switch (Ordering) {
240+
default:
241+
__builtin_unreachable();
242+
case atomic::aquire:
243+
return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
244+
case atomic::release:
245+
return __builtin_amdgcn_fence(atomic::release, "workgroup");
246+
case atomic::acq_rel:
247+
return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
248+
case atomic::seq_cst:
249+
return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
250+
}
237251
}
238-
239252
void fenceKernel(atomic::OrderingTy Ordering) {
240-
return __scoped_atomic_thread_fence(Ordering, atomic::device_);
253+
switch (Ordering) {
254+
default:
255+
__builtin_unreachable();
256+
case atomic::aquire:
257+
return __builtin_amdgcn_fence(atomic::aquire, "agent");
258+
case atomic::release:
259+
return __builtin_amdgcn_fence(atomic::release, "agent");
260+
case atomic::acq_rel:
261+
return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
262+
case atomic::seq_cst:
263+
return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
264+
}
241265
}
242-
243266
void fenceSystem(atomic::OrderingTy Ordering) {
244-
return __scoped_atomic_thread_fence(Ordering, atomic::system);
267+
switch (Ordering) {
268+
default:
269+
__builtin_unreachable();
270+
case atomic::aquire:
271+
return __builtin_amdgcn_fence(atomic::aquire, "");
272+
case atomic::release:
273+
return __builtin_amdgcn_fence(atomic::release, "");
274+
case atomic::acq_rel:
275+
return __builtin_amdgcn_fence(atomic::acq_rel, "");
276+
case atomic::seq_cst:
277+
return __builtin_amdgcn_fence(atomic::seq_cst, "");
278+
}
245279
}
246280

247281
void syncWarp(__kmpc_impl_lanemask_t) {

0 commit comments

Comments
 (0)