Skip to content

Commit a6624f1

Browse files
authored
Remove delay-based backpressure in favor of explicit queue limits (#1515)
Right now, we have a backpressure delay that's a function of `(job count, bytes in flight)`. The actual queue length is set implicitly by the shape of that function (e.g. its gain) and the actual downstairs IO time. If a downstairs slows down due to load, then the queue length has to grow to compensate! Having an implicitly-defined queue length is tricky, because it's not well controlled. Queue length also affects latency in subtle ways, so directly controlling the queue length would be helpful. This PR removes the delay-based backpressure implementation in favor of a simple pair of semaphores: there are a certain number of job and byte permits available, and the `Guest` has to acquire them before sending a job to the `Upstairs`. In other words, writes will be accepted as fast as possible _until_ we run out of permits; we will then shift to a one-in, one-out operation mode where jobs have to be completed by the downstairs before a new job is submitted. The maximum queue length (either in jobs or bytes) is well known and set by global constants. Architecturally, we replace the `BackpressureGuard` with a new `IOLimitGuard`, which is claimed by the `Guest` and stored in relevant `BlockOp` variants (then moved into the `DownstairsIO`). It still uses RAII to automatically release permits when dropped, and we still manually drop it early (once a job is complete on a particular downstairs).
1 parent 61b1df7 commit a6624f1

13 files changed

+551
-680
lines changed

cmon/src/main.rs

-8
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ enum DtraceDisplay {
4141
Replaced,
4242
ExtentLiveRepair,
4343
ExtentLimit,
44-
Backpressure,
4544
NextJobId,
4645
JobDelta,
4746
DsDelay,
@@ -61,7 +60,6 @@ impl fmt::Display for DtraceDisplay {
6160
DtraceDisplay::Replaced => write!(f, "replaced"),
6261
DtraceDisplay::ExtentLiveRepair => write!(f, "extent_live_repair"),
6362
DtraceDisplay::ExtentLimit => write!(f, "extent_under_repair"),
64-
DtraceDisplay::Backpressure => write!(f, "backpressure"),
6563
DtraceDisplay::NextJobId => write!(f, "next_job_id"),
6664
DtraceDisplay::JobDelta => write!(f, "job_delta"),
6765
DtraceDisplay::DsDelay => write!(f, "ds_delay"),
@@ -229,9 +227,6 @@ fn print_dtrace_header(dd: &[DtraceDisplay]) {
229227
DtraceDisplay::ExtentLimit => {
230228
print!(" {:>4}", "EXTL");
231229
}
232-
DtraceDisplay::Backpressure => {
233-
print!(" {:>5}", "BAKPR");
234-
}
235230
DtraceDisplay::NextJobId => {
236231
print!(" {:>7}", "NEXTJOB");
237232
}
@@ -348,9 +343,6 @@ fn print_dtrace_row(d_out: Arg, dd: &[DtraceDisplay], last_job_id: &mut u64) {
348343
DtraceDisplay::ExtentLimit => {
349344
print!(" {:4}", d_out.ds_extent_limit);
350345
}
351-
DtraceDisplay::Backpressure => {
352-
print!(" {:>5}", d_out.up_backpressure);
353-
}
354346
DtraceDisplay::NextJobId => {
355347
print!(" {:>7}", d_out.next_job_id);
356348
}

tools/dtrace/single_up_info.d

+1-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ crucible_upstairs*:::up-status
4141
* I'm not very happy about this, but if we don't print it all on one
4242
* line, then multiple sessions will clobber each others output.
4343
*/
44-
printf("%8s %17s %17s %17s %5s %5s %9s %5s %10s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s\n",
44+
printf("%8s %17s %17s %17s %5s %5s %5s %10s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s\n",
4545

4646
substr(session_id, 0, 8),
4747

@@ -62,7 +62,6 @@ crucible_upstairs*:::up-status
6262
* Job ID delta and backpressure
6363
*/
6464
json(copyinstr(arg1), "ok.next_job_id"),
65-
json(copyinstr(arg1), "ok.up_backpressure"),
6665
json(copyinstr(arg1), "ok.write_bytes_out"),
6766

6867
/*

tools/dtrace/sled_upstairs_info.d

+1-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ crucible_upstairs*:::up-status
4646
* we don't print it all on one line, then multiple sessions will
4747
* clobber each others output.
4848
*/
49-
printf("%5d %8s %17s %17s %17s %5s %5s %9s %5s %10s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s\n",
49+
printf("%5d %8s %17s %17s %17s %5s %5s %5s %10s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s\n",
5050

5151
pid,
5252
substr(session_id, 0, 8),
@@ -62,7 +62,6 @@ crucible_upstairs*:::up-status
6262

6363
/* Job ID and backpressure */
6464
json(copyinstr(arg1), "ok.next_job_id"),
65-
json(copyinstr(arg1), "ok.up_backpressure"),
6665
json(copyinstr(arg1), "ok.write_bytes_out"),
6766

6867
/* In progress jobs on the work list for each downstairs */

tools/dtrace/upstairs_info.d

+2-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ tick-1s
2121
{
2222
printf("%6s ", "PID");
2323
printf("%17s %17s %17s", "DS STATE 0", "DS STATE 1", "DS STATE 2");
24-
printf(" %5s %5s %9s %5s", "UPW", "DSW", "JOBID", "BAKPR");
24+
printf(" %5s %5s %9s", "UPW", "DSW", "JOBID");
2525
printf(" %10s", "WRITE_BO");
2626
printf(" %5s %5s %5s", "IP0", "IP1", "IP2");
2727
printf(" %5s %5s %5s", "D0", "D1", "D2");
@@ -49,10 +49,9 @@ crucible_upstairs*:::up-status
4949
printf(" %5s", json(copyinstr(arg1), "ok.ds_count"));
5050

5151
/*
52-
* Job ID and backpressure
52+
* Job ID and outstanding bytes
5353
*/
5454
printf(" %9s", json(copyinstr(arg1), "ok.next_job_id"));
55-
printf(" %5s", json(copyinstr(arg1), "ok.up_backpressure"));
5655
printf(" %10s", json(copyinstr(arg1), "ok.write_bytes_out"));
5756

5857
/*

0 commit comments

Comments
 (0)