1 |
From 67c883e51a2cd1ce30f36947443bc5811a499ea3 Mon Sep 17 00:00:00 2001 |
2 |
From: Paolo Valente <paolo.valente@linaro.org> |
3 |
Date: Thu, 4 May 2017 10:53:43 +0200 |
4 |
Subject: [PATCH 4/4] block, bfq: improve and refactor throughput-boosting |
5 |
logic |
6 |
|
7 |
When a queue associated with a process remains empty, there are cases |
8 |
where throughput gets boosted if the device is idled to await the |
9 |
arrival of a new I/O request for that queue. Currently, BFQ assumes |
10 |
that one of these cases is when the device has no internal queueing |
11 |
(regardless of the properties of the I/O being served). Unfortunately, |
12 |
this condition has proved to be too general. So, this commit refines it |
13 |
as "the device has no internal queueing and is rotational". |
14 |
|
15 |
This refinement provides a significant throughput boost with random |
16 |
I/O, on flash-based storage without internal queueing. For example, on |
17 |
a HiKey board, throughput increases by up to 125%, growing, e.g., from |
18 |
6.9MB/s to 15.6MB/s with two or three random readers in parallel. |
19 |
|
20 |
This commit also refactors the code related to device idling, for the |
21 |
following reason. Finding the change that provides the above large |
22 |
improvement has been slightly more difficult than it had to be, |
23 |
because the logic that decides whether to idle the device is still |
24 |
scattered across three functions. Almost all of the logic is in the |
25 |
function bfq_bfqq_may_idle, but (1) part of the decision is made in |
26 |
bfq_update_idle_window, and (2) the function bfq_bfqq_must_idle may |
27 |
switch off idling regardless of the output of bfq_bfqq_may_idle. In |
28 |
addition, both bfq_update_idle_window and bfq_bfqq_must_idle make |
29 |
their decisions as a function of parameters that are used, for similar |
30 |
purposes, also in bfq_bfqq_may_idle. This commit addresses this issue |
31 |
by moving all the logic into bfq_bfqq_may_idle. |
32 |
|
33 |
Signed-off-by: Paolo Valente <paolo.valente@linaro.org> |
34 |
Signed-off-by: Luca Miccio <lucmiccio@gmail.com> |
35 |
--- |
36 |
block/bfq-iosched.c | 141 ++++++++++++++++++++++++++++------------------------ |
37 |
block/bfq.h | 12 ++--- |
38 |
2 files changed, 83 insertions(+), 70 deletions(-) |
39 |
|
40 |
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c |
41 |
index d15ef0b87d12..68ed264e5e3c 100644 |
42 |
--- a/block/bfq-iosched.c |
43 |
+++ b/block/bfq-iosched.c |
44 |
@@ -693,10 +693,10 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, |
45 |
unsigned int old_wr_coeff; |
46 |
bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq); |
47 |
|
48 |
- if (bic->saved_idle_window) |
49 |
- bfq_mark_bfqq_idle_window(bfqq); |
50 |
+ if (bic->saved_has_short_ttime) |
51 |
+ bfq_mark_bfqq_has_short_ttime(bfqq); |
52 |
else |
53 |
- bfq_clear_bfqq_idle_window(bfqq); |
54 |
+ bfq_clear_bfqq_has_short_ttime(bfqq); |
55 |
|
56 |
if (bic->saved_IO_bound) |
57 |
bfq_mark_bfqq_IO_bound(bfqq); |
58 |
@@ -2060,7 +2060,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) |
59 |
if (!bic) |
60 |
return; |
61 |
|
62 |
- bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); |
63 |
+ bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq); |
64 |
bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); |
65 |
bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); |
66 |
bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); |
67 |
@@ -3226,9 +3226,9 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, |
68 |
} |
69 |
|
70 |
bfq_log_bfqq(bfqd, bfqq, |
71 |
- "expire (%d, slow %d, num_disp %d, idle_win %d, weight %d)", |
72 |
+ "expire (%d, slow %d, num_disp %d, short_ttime %d, weight %d)", |
73 |
reason, slow, bfqq->dispatched, |
74 |
- bfq_bfqq_idle_window(bfqq), entity->weight); |
75 |
+ bfq_bfqq_has_short_ttime(bfqq), entity->weight); |
76 |
|
77 |
/* |
78 |
* Increase, decrease or leave budget unchanged according to |
79 |
@@ -3310,35 +3310,55 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) |
80 |
static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) |
81 |
{ |
82 |
struct bfq_data *bfqd = bfqq->bfqd; |
83 |
- bool idling_boosts_thr, idling_boosts_thr_without_issues, |
84 |
+ bool rot_without_queueing = |
85 |
+ !blk_queue_nonrot(bfqd->queue) && !bfqd->hw_tag, |
86 |
+ bfqq_sequential_and_IO_bound, |
87 |
+ idling_boosts_thr, idling_boosts_thr_without_issues, |
88 |
idling_needed_for_service_guarantees, |
89 |
asymmetric_scenario; |
90 |
|
91 |
if (bfqd->strict_guarantees) |
92 |
return true; |
93 |
|
94 |
+ /* |
95 |
+ * Idling is performed only if slice_idle > 0. In addition, we |
96 |
+ * do not idle if |
97 |
+ * (a) bfqq is async |
98 |
+ * (b) bfqq is in the idle io prio class: in this case we do |
99 |
+ * not idle because we want to minimize the bandwidth that |
100 |
+ * queues in this class can steal to higher-priority queues |
101 |
+ */ |
102 |
+ if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) || |
103 |
+ bfq_class_idle(bfqq)) |
104 |
+ return false; |
105 |
+ |
106 |
+ bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) && |
107 |
+ bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq); |
108 |
/* |
109 |
* The next variable takes into account the cases where idling |
110 |
* boosts the throughput. |
111 |
* |
112 |
* The value of the variable is computed considering, first, that |
113 |
* idling is virtually always beneficial for the throughput if: |
114 |
- * (a) the device is not NCQ-capable, or |
115 |
- * (b) regardless of the presence of NCQ, the device is rotational |
116 |
- * and the request pattern for bfqq is I/O-bound and sequential. |
117 |
+ * (a) the device is not NCQ-capable and rotational, or |
118 |
+ * (b) regardless of the presence of NCQ, the device is rotational and |
119 |
+ * the request pattern for bfqq is I/O-bound and sequential, or |
120 |
+ * (c) regardless of whether it is rotational, the device is |
121 |
+ * not NCQ-capable and the request pattern for bfqq is |
122 |
+ * I/O-bound and sequential. |
123 |
* |
124 |
* Secondly, and in contrast to the above item (b), idling an |
125 |
* NCQ-capable flash-based device would not boost the |
126 |
* throughput even with sequential I/O; rather it would lower |
127 |
* the throughput in proportion to how fast the device |
128 |
* is. Accordingly, the next variable is true if any of the |
129 |
- * above conditions (a) and (b) is true, and, in particular, |
130 |
- * happens to be false if bfqd is an NCQ-capable flash-based |
131 |
- * device. |
132 |
+ * above conditions (a), (b) or (c) is true, and, in |
133 |
+ * particular, happens to be false if bfqd is an NCQ-capable |
134 |
+ * flash-based device. |
135 |
*/ |
136 |
- idling_boosts_thr = !bfqd->hw_tag || |
137 |
- (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) && |
138 |
- bfq_bfqq_idle_window(bfqq)); |
139 |
+ idling_boosts_thr = rot_without_queueing || |
140 |
+ ((!blk_queue_nonrot(bfqd->queue) || !bfqd->hw_tag) && |
141 |
+ bfqq_sequential_and_IO_bound); |
142 |
|
143 |
/* |
144 |
* The value of the next variable, |
145 |
@@ -3509,12 +3529,10 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) |
146 |
asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq); |
147 |
|
148 |
/* |
149 |
- * We have now all the components we need to compute the return |
150 |
- * value of the function, which is true only if both the following |
151 |
- * conditions hold: |
152 |
- * 1) bfqq is sync, because idling make sense only for sync queues; |
153 |
- * 2) idling either boosts the throughput (without issues), or |
154 |
- * is necessary to preserve service guarantees. |
155 |
+ * We have now all the components we need to compute the |
156 |
+ * return value of the function, which is true only if idling |
157 |
+ * either boosts the throughput (without issues), or is |
158 |
+ * necessary to preserve service guarantees. |
159 |
*/ |
160 |
bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d", |
161 |
bfq_bfqq_sync(bfqq), idling_boosts_thr); |
162 |
@@ -3526,9 +3544,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) |
163 |
bfq_bfqq_IO_bound(bfqq), |
164 |
idling_needed_for_service_guarantees); |
165 |
|
166 |
- return bfq_bfqq_sync(bfqq) && |
167 |
- (idling_boosts_thr_without_issues || |
168 |
- idling_needed_for_service_guarantees); |
169 |
+ return idling_boosts_thr_without_issues || |
170 |
+ idling_needed_for_service_guarantees; |
171 |
} |
172 |
|
173 |
/* |
174 |
@@ -3544,10 +3561,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) |
175 |
*/ |
176 |
static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) |
177 |
{ |
178 |
- struct bfq_data *bfqd = bfqq->bfqd; |
179 |
- |
180 |
- return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 && |
181 |
- bfq_bfqq_may_idle(bfqq); |
182 |
+ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_may_idle(bfqq); |
183 |
} |
184 |
|
185 |
/* |
186 |
@@ -4006,7 +4020,6 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, |
187 |
case IOPRIO_CLASS_IDLE: |
188 |
bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE; |
189 |
bfqq->new_ioprio = 7; |
190 |
- bfq_clear_bfqq_idle_window(bfqq); |
191 |
break; |
192 |
} |
193 |
|
194 |
@@ -4070,8 +4083,14 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
195 |
bfq_set_next_ioprio_data(bfqq, bic); |
196 |
|
197 |
if (is_sync) { |
198 |
+ /* |
199 |
+ * No need to mark as has_short_ttime if in |
200 |
+ * idle_class, because no device idling is performed |
201 |
+ * for queues in idle class |
202 |
+ */ |
203 |
if (!bfq_class_idle(bfqq)) |
204 |
- bfq_mark_bfqq_idle_window(bfqq); |
205 |
+ /* tentatively mark as has_short_ttime */ |
206 |
+ bfq_mark_bfqq_has_short_ttime(bfqq); |
207 |
bfq_mark_bfqq_sync(bfqq); |
208 |
bfq_mark_bfqq_just_created(bfqq); |
209 |
} else |
210 |
@@ -4206,18 +4225,19 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
211 |
blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT); |
212 |
} |
213 |
|
214 |
-/* |
215 |
- * Disable idle window if the process thinks too long or seeks so much that |
216 |
- * it doesn't matter. |
217 |
- */ |
218 |
-static void bfq_update_idle_window(struct bfq_data *bfqd, |
219 |
- struct bfq_queue *bfqq, |
220 |
- struct bfq_io_cq *bic) |
221 |
+static void bfq_update_has_short_ttime(struct bfq_data *bfqd, |
222 |
+ struct bfq_queue *bfqq, |
223 |
+ struct bfq_io_cq *bic) |
224 |
{ |
225 |
- int enable_idle; |
226 |
+ bool has_short_ttime = true; |
227 |
|
228 |
- /* Don't idle for async or idle io prio class. */ |
229 |
- if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) |
230 |
+ /* |
231 |
+ * No need to update has_short_ttime if bfqq is async or in |
232 |
+ * idle io prio class, or if bfq_slice_idle is zero, because |
233 |
+ * no device idling is performed for bfqq in this case. |
234 |
+ */ |
235 |
+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq) || |
236 |
+ bfqd->bfq_slice_idle == 0) |
237 |
return; |
238 |
|
239 |
/* Idle window just restored, statistics are meaningless. */ |
240 |
@@ -4225,27 +4245,22 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, |
241 |
bfqd->bfq_wr_min_idle_time)) |
242 |
return; |
243 |
|
244 |
- enable_idle = bfq_bfqq_idle_window(bfqq); |
245 |
- |
246 |
+ /* Think time is infinite if no process is linked to |
247 |
+ * bfqq. Otherwise check average think time to |
248 |
+ * decide whether to mark as has_short_ttime |
249 |
+ */ |
250 |
if (atomic_read(&bic->icq.ioc->active_ref) == 0 || |
251 |
- bfqd->bfq_slice_idle == 0 || |
252 |
- (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && |
253 |
- bfqq->wr_coeff == 1)) |
254 |
- enable_idle = 0; |
255 |
- else if (bfq_sample_valid(bic->ttime.ttime_samples)) { |
256 |
- if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle && |
257 |
- bfqq->wr_coeff == 1) |
258 |
- enable_idle = 0; |
259 |
- else |
260 |
- enable_idle = 1; |
261 |
- } |
262 |
- bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", |
263 |
- enable_idle); |
264 |
+ (bfq_sample_valid(bic->ttime.ttime_samples) && |
265 |
+ bic->ttime.ttime_mean > bfqd->bfq_slice_idle)) |
266 |
+ has_short_ttime = false; |
267 |
+ |
268 |
+ bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d", |
269 |
+ has_short_ttime); |
270 |
|
271 |
- if (enable_idle) |
272 |
- bfq_mark_bfqq_idle_window(bfqq); |
273 |
+ if (has_short_ttime) |
274 |
+ bfq_mark_bfqq_has_short_ttime(bfqq); |
275 |
else |
276 |
- bfq_clear_bfqq_idle_window(bfqq); |
277 |
+ bfq_clear_bfqq_has_short_ttime(bfqq); |
278 |
} |
279 |
|
280 |
/* |
281 |
@@ -4261,14 +4276,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
282 |
bfqq->meta_pending++; |
283 |
|
284 |
bfq_update_io_thinktime(bfqd, bic); |
285 |
+ bfq_update_has_short_ttime(bfqd, bfqq, bic); |
286 |
bfq_update_io_seektime(bfqd, bfqq, rq); |
287 |
- if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || |
288 |
- !BFQQ_SEEKY(bfqq)) |
289 |
- bfq_update_idle_window(bfqd, bfqq, bic); |
290 |
|
291 |
bfq_log_bfqq(bfqd, bfqq, |
292 |
- "rq_enqueued: idle_window=%d (seeky %d)", |
293 |
- bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq)); |
294 |
+ "rq_enqueued: has_short_ttime=%d (seeky %d)", |
295 |
+ bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq)); |
296 |
|
297 |
bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); |
298 |
|
299 |
diff --git a/block/bfq.h b/block/bfq.h |
300 |
index 1fa6ed3c604e..231277cff67d 100644 |
301 |
--- a/block/bfq.h |
302 |
+++ b/block/bfq.h |
303 |
@@ -349,11 +349,11 @@ struct bfq_io_cq { |
304 |
#endif |
305 |
|
306 |
/* |
307 |
- * Snapshot of the idle window before merging; taken to |
308 |
- * remember this value while the queue is merged, so as to be |
309 |
- * able to restore it in case of split. |
310 |
+ * Snapshot of the has_short_time flag before merging; taken |
311 |
+ * to remember its value while the queue is merged, so as to |
312 |
+ * be able to restore it in case of split. |
313 |
*/ |
314 |
- bool saved_idle_window; |
315 |
+ bool saved_has_short_ttime; |
316 |
/* |
317 |
* Same purpose as the previous two fields for the I/O bound |
318 |
* classification of a queue. |
319 |
@@ -610,7 +610,7 @@ enum bfqq_state_flags { |
320 |
*/ |
321 |
BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ |
322 |
BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ |
323 |
- BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ |
324 |
+ BFQ_BFQQ_FLAG_has_short_ttime, /* queue has a short think time */ |
325 |
BFQ_BFQQ_FLAG_sync, /* synchronous queue */ |
326 |
BFQ_BFQQ_FLAG_IO_bound, /* |
327 |
* bfqq has timed-out at least once |
328 |
@@ -649,7 +649,7 @@ BFQ_BFQQ_FNS(wait_request); |
329 |
BFQ_BFQQ_FNS(non_blocking_wait_rq); |
330 |
BFQ_BFQQ_FNS(must_alloc); |
331 |
BFQ_BFQQ_FNS(fifo_expire); |
332 |
-BFQ_BFQQ_FNS(idle_window); |
333 |
+BFQ_BFQQ_FNS(has_short_ttime); |
334 |
BFQ_BFQQ_FNS(sync); |
335 |
BFQ_BFQQ_FNS(IO_bound); |
336 |
BFQ_BFQQ_FNS(in_large_burst); |
337 |
-- |
338 |
2.14.1 |
339 |
|