Skip to content

Commit 4e7f53a

Browse files
committed
make SC fences stronger, to be correct wrt C++20
1 parent 9542bc4 commit 4e7f53a

File tree

6 files changed

+120
-147
lines changed

6 files changed

+120
-147
lines changed

README.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,10 @@ Further caveats that Miri users should be aware of:
6868
not support networking. System API support varies between targets; if you run
6969
on Windows it is a good idea to use `--target x86_64-unknown-linux-gnu` to get
7070
better support.
71-
* Weak memory emulation may [produce weak behaviors](https://github.com/rust-lang/miri/issues/2301)
72-
when `SeqCst` fences are used that are not actually permitted by the Rust memory model, and it
73-
cannot produce all behaviors possibly observable on real hardware.
71+
* Weak memory emulation is not complete: there are legal behaviors that Miri will never produce.
72+
However, Miri produces many behaviors that are hard to observe on real hardware, so it can help
73+
quite a bit in finding weak memory concurrency bugs. To be really sure about complicated atomic
74+
code, use specialized tools such as [loom](https://github.com/tokio-rs/loom).
7475

7576
Moreover, Miri fundamentally cannot ensure that your code is *sound*. [Soundness] is the property of
7677
never causing undefined behavior when invoked from arbitrary safe code, even in combination with

src/concurrency/data_race.rs

+18-14
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,6 @@ pub(super) struct ThreadClockSet {
111111
/// have been released by this thread by a release fence.
112112
fence_release: VClock,
113113

114-
/// Timestamps of the last SC fence performed by each
115-
/// thread, updated when this thread performs an SC fence.
116-
/// This is never acquired into the thread's clock, it
117-
/// just limits which old writes can be seen in weak memory emulation.
118-
pub(super) fence_seqcst: VClock,
119-
120114
/// Timestamps of the last SC write performed by each
121115
/// thread, updated when this thread performs an SC fence.
122116
/// This is never acquired into the thread's clock, it
@@ -344,11 +338,13 @@ pub struct GlobalState {
344338
/// active vector-clocks catch up with the threads timestamp.
345339
reuse_candidates: RefCell<FxHashSet<VectorIdx>>,
346340

347-
/// The timestamp of last SC fence performed by each thread
341+
/// We make SC fences act like RMWs on a global location.
342+
/// To implement that, they all release and acquire into this clock.
348343
last_sc_fence: RefCell<VClock>,
349344

350-
/// The timestamp of last SC write performed by each thread
351-
last_sc_write: RefCell<VClock>,
345+
/// The timestamp of last SC write performed by each thread.
346+
/// Threads only update their own index here!
347+
last_sc_write_per_thread: RefCell<VClock>,
352348

353349
/// Track when an outdated (weak memory) load happens.
354350
pub track_outdated_loads: bool,
@@ -883,9 +879,17 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
883879
clocks.apply_release_fence();
884880
}
885881
if atomic == AtomicFenceOrd::SeqCst {
886-
data_race.last_sc_fence.borrow_mut().set_at_index(&clocks.clock, index);
887-
clocks.fence_seqcst.join(&data_race.last_sc_fence.borrow());
888-
clocks.write_seqcst.join(&data_race.last_sc_write.borrow());
882+
// Behave like an RMW on the global fence location. This takes full care of
883+
// all the SC fence requirements, including C++17 §32.4 [atomics.order]
884+
// paragraph 6 (which would limit what future reads can see). It also rules
885+
// out many legal behaviors, but we don't currently have a model that would
886+
// be more precise.
887+
let mut sc_fence_clock = data_race.last_sc_fence.borrow_mut();
888+
sc_fence_clock.join(&clocks.clock);
889+
clocks.clock.join(&sc_fence_clock);
890+
// Also establish some sort of order with the last SC write that happened, globally
891+
// (but this is only respected by future reads).
892+
clocks.write_seqcst.join(&data_race.last_sc_write_per_thread.borrow());
889893
}
890894

891895
// Increment timestamp in case of release semantics.
@@ -1555,7 +1559,7 @@ impl GlobalState {
15551559
thread_info: RefCell::new(IndexVec::new()),
15561560
reuse_candidates: RefCell::new(FxHashSet::default()),
15571561
last_sc_fence: RefCell::new(VClock::default()),
1558-
last_sc_write: RefCell::new(VClock::default()),
1562+
last_sc_write_per_thread: RefCell::new(VClock::default()),
15591563
track_outdated_loads: config.track_outdated_loads,
15601564
};
15611565

@@ -1857,7 +1861,7 @@ impl GlobalState {
18571861
// SC ATOMIC STORE rule in the paper.
18581862
pub(super) fn sc_write(&self, thread_mgr: &ThreadManager<'_>) {
18591863
let (index, clocks) = self.active_thread_state(thread_mgr);
1860-
self.last_sc_write.borrow_mut().set_at_index(&clocks.clock, index);
1864+
self.last_sc_write_per_thread.borrow_mut().set_at_index(&clocks.clock, index);
18611865
}
18621866

18631867
// SC ATOMIC READ rule in the paper.

src/concurrency/weak_memory.rs

+11-13
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,17 @@
1111
//! This implementation is not fully correct under the revised C++20 model and may generate behaviours C++20
1212
//! disallows (<https://github.com/rust-lang/miri/issues/2301>).
1313
//!
14-
//! A modification is made to the paper's model to partially address C++20 changes.
15-
//! Specifically, if an SC load reads from an atomic store of any ordering, then a later SC load cannot read from
16-
//! an earlier store in the location's modification order. This is to prevent creating a backwards S edge from the second
17-
//! load to the first, as a result of C++20's coherence-ordered before rules.
18-
//! (This seems to rule out behaviors that were actually permitted by the RC11 model that C++20
19-
//! intended to copy (https://plv.mpi-sws.org/scfix/paper.pdf); a change was introduced when
20-
//! translating the math to English. According to Viktor Vafeiadis, this difference is harmless. So
21-
//! we stick to what the standard says, and allow fewer behaviors.)
14+
//! Modifications are made to the paper's model to address C++20 changes:
15+
//! - If an SC load reads from an atomic store of any ordering, then a later SC load cannot read
16+
//! from an earlier store in the location's modification order. This is to prevent creating a
17+
//! backwards S edge from the second load to the first, as a result of C++20's coherence-ordered
18+
//! before rules. (This seems to rule out behaviors that were actually permitted by the RC11 model
19+
//! that C++20 intended to copy (<https://plv.mpi-sws.org/scfix/paper.pdf>); a change was
20+
//! introduced when translating the math to English. According to Viktor Vafeiadis, this
21+
//! difference is harmless. So we stick to what the standard says, and allow fewer behaviors.)
22+
//! - SC fences are treated like AcqRel RMWs to a global clock, to ensure they induce enough
23+
//! synchronization with the surrounding accesses. This rules out legal behavior, but it is really
24+
//! hard to be more precise here.
2225
//!
2326
//! Rust follows the C++20 memory model (except for the Consume ordering and some operations not performable through C++'s
2427
//! `std::atomic<T>` API). It is therefore possible for this implementation to generate behaviours never observable when the
@@ -340,11 +343,6 @@ impl<'tcx> StoreBuffer {
340343
// then we cannot read-from anything earlier in modification order.
341344
// C++20 §6.9.2.2 [intro.races] paragraph 16
342345
false
343-
} else if store_elem.timestamp <= clocks.fence_seqcst[store_elem.store_index] {
344-
// The current load, which may be sequenced-after an SC fence, cannot read-before
345-
// the last store sequenced-before an SC fence in another thread.
346-
// C++17 §32.4 [atomics.order] paragraph 6
347-
false
348346
} else if store_elem.timestamp <= clocks.write_seqcst[store_elem.store_index]
349347
&& store_elem.is_seqcst
350348
{

tests/fail/should-pass/cpp20_rwc_syncs.rs

-87
This file was deleted.

tests/fail/should-pass/cpp20_rwc_syncs.stderr

-20
This file was deleted.

tests/pass/0weak_memory_consistency.rs

+87-10
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
// Available: https://ss265.host.cs.st-andrews.ac.uk/papers/n3132.pdf.
2323

2424
use std::sync::atomic::Ordering::*;
25-
use std::sync::atomic::{AtomicBool, AtomicI32, fence};
25+
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering, fence};
2626
use std::thread::spawn;
2727

2828
#[derive(Copy, Clone)]
@@ -45,8 +45,8 @@ fn static_atomic_bool(val: bool) -> &'static AtomicBool {
4545
}
4646

4747
// Spins until it acquires a pre-determined value.
48-
fn acquires_value(loc: &AtomicI32, val: i32) -> i32 {
49-
while loc.load(Acquire) != val {
48+
fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 {
49+
while loc.load(ord) != val {
5050
std::hint::spin_loop();
5151
}
5252
val
@@ -69,7 +69,7 @@ fn test_corr() {
6969
}); // | |
7070
#[rustfmt::skip] // |synchronizes-with |happens-before
7171
let j3 = spawn(move || { // | |
72-
acquires_value(&y, 1); // <------------------+ |
72+
loads_value(&y, Acquire, 1); // <------------+ |
7373
x.load(Relaxed) // <----------------------------------------------+
7474
// The two reads on x are ordered by hb, so they cannot observe values
7575
// differently from the modification order. If the first read observed
@@ -94,12 +94,12 @@ fn test_wrc() {
9494
}); // | |
9595
#[rustfmt::skip] // |synchronizes-with |
9696
let j2 = spawn(move || { // | |
97-
acquires_value(&x, 1); // <------------------+ |
97+
loads_value(&x, Acquire, 1); // <------------+ |
9898
y.store(1, Release); // ---------------------+ |happens-before
9999
}); // | |
100100
#[rustfmt::skip] // |synchronizes-with |
101101
let j3 = spawn(move || { // | |
102-
acquires_value(&y, 1); // <------------------+ |
102+
loads_value(&y, Acquire, 1); // <------------+ |
103103
x.load(Relaxed) // <-----------------------------------------------+
104104
});
105105

@@ -125,7 +125,7 @@ fn test_message_passing() {
125125
#[rustfmt::skip] // |synchronizes-with | happens-before
126126
let j2 = spawn(move || { // | |
127127
let x = x; // avoid field capturing | |
128-
acquires_value(&y, 1); // <------------------+ |
128+
loads_value(&y, Acquire, 1); // <------------+ |
129129
unsafe { *x.0 } // <---------------------------------------------+
130130
});
131131

@@ -268,9 +268,6 @@ fn test_iriw_sc_rlx() {
268268
let x = static_atomic_bool(false);
269269
let y = static_atomic_bool(false);
270270

271-
x.store(false, Relaxed);
272-
y.store(false, Relaxed);
273-
274271
let a = spawn(move || x.store(true, Relaxed));
275272
let b = spawn(move || y.store(true, Relaxed));
276273
let c = spawn(move || {
@@ -290,6 +287,84 @@ fn test_iriw_sc_rlx() {
290287
assert!(c || d);
291288
}
292289

290+
// Similar to `test_iriw_sc_rlx` but with fences instead of SC accesses.
291+
fn test_cpp20_sc_fence_fix() {
292+
let x = static_atomic_bool(false);
293+
let y = static_atomic_bool(false);
294+
295+
let thread1 = spawn(|| {
296+
let a = x.load(Relaxed);
297+
fence(SeqCst);
298+
let b = y.load(Relaxed);
299+
(a, b)
300+
});
301+
302+
let thread2 = spawn(|| {
303+
x.store(true, Relaxed);
304+
});
305+
let thread3 = spawn(|| {
306+
y.store(true, Relaxed);
307+
});
308+
309+
let thread4 = spawn(|| {
310+
let c = y.load(Relaxed);
311+
fence(SeqCst);
312+
let d = x.load(Relaxed);
313+
(c, d)
314+
});
315+
316+
let (a, b) = thread1.join().unwrap();
317+
thread2.join().unwrap();
318+
thread3.join().unwrap();
319+
let (c, d) = thread4.join().unwrap();
320+
let bad = a == true && b == false && c == true && d == false;
321+
assert!(!bad);
322+
}
323+
324+
// https://plv.mpi-sws.org/scfix/paper.pdf
325+
// 2.2 Second Problem: SC Fences are Too Weak
326+
fn test_cpp20_rwc_syncs() {
327+
/*
328+
int main() {
329+
atomic_int x = 0;
330+
atomic_int y = 0;
331+
{{{ x.store(1,mo_relaxed);
332+
||| { r1=x.load(mo_relaxed).readsvalue(1);
333+
fence(mo_seq_cst);
334+
r2=y.load(mo_relaxed); }
335+
||| { y.store(1,mo_relaxed);
336+
fence(mo_seq_cst);
337+
r3=x.load(mo_relaxed); }
338+
}}}
339+
return 0;
340+
}
341+
*/
342+
let x = static_atomic(0);
343+
let y = static_atomic(0);
344+
345+
let j1 = spawn(move || {
346+
x.store(1, Relaxed);
347+
});
348+
349+
let j2 = spawn(move || {
350+
loads_value(&x, Relaxed, 1);
351+
fence(SeqCst);
352+
y.load(Relaxed)
353+
});
354+
355+
let j3 = spawn(move || {
356+
y.store(1, Relaxed);
357+
fence(SeqCst);
358+
x.load(Relaxed)
359+
});
360+
361+
j1.join().unwrap();
362+
let b = j2.join().unwrap();
363+
let c = j3.join().unwrap();
364+
365+
assert!((b, c) != (0, 0));
366+
}
367+
293368
pub fn main() {
294369
for _ in 0..50 {
295370
test_single_thread();
@@ -301,5 +376,7 @@ pub fn main() {
301376
test_sc_store_buffering();
302377
test_sync_through_rmw_and_fences();
303378
test_iriw_sc_rlx();
379+
test_cpp20_sc_fence_fix();
380+
test_cpp20_rwc_syncs();
304381
}
305382
}

0 commit comments

Comments
 (0)