1 |
From 81aaea92bcd825975fbed1b7c2f85e259b29b6b8 Mon Sep 17 00:00:00 2001 |
2 |
From: Masahito S <firelzrd@gmail.com> |
3 |
Date: Mon, 20 Jan 2025 07:24:54 +0900 |
4 |
Subject: [PATCH] linux6.6.67-bore5.9.6 |
5 |
|
6 |
--- |
7 |
include/linux/sched.h | 21 +- |
8 |
include/linux/sched/bore.h | 40 ++++ |
9 |
init/Kconfig | 17 ++ |
10 |
kernel/Kconfig.hz | 17 ++ |
11 |
kernel/fork.c | 10 + |
12 |
kernel/sched/Makefile | 1 + |
13 |
kernel/sched/bore.c | 443 +++++++++++++++++++++++++++++++++++++ |
14 |
kernel/sched/core.c | 6 + |
15 |
kernel/sched/debug.c | 61 ++++- |
16 |
kernel/sched/fair.c | 87 +++++++- |
17 |
kernel/sched/features.h | 4 + |
18 |
kernel/sched/sched.h | 9 + |
19 |
12 files changed, 710 insertions(+), 6 deletions(-) |
20 |
create mode 100644 include/linux/sched/bore.h |
21 |
create mode 100644 kernel/sched/bore.c |
22 |
|
23 |
diff --git a/include/linux/sched.h b/include/linux/sched.h |
24 |
index 4809f27b52..ce5c55cbd9 100644 |
25 |
--- a/include/linux/sched.h |
26 |
+++ b/include/linux/sched.h |
27 |
@@ -548,6 +548,15 @@ struct sched_statistics { |
28 |
#endif /* CONFIG_SCHEDSTATS */ |
29 |
} ____cacheline_aligned; |
30 |
|
31 |
+#ifdef CONFIG_SCHED_BORE |
32 |
+struct sched_burst_cache { |
33 |
+ u8 score; |
34 |
+ u32 count; |
35 |
+ u64 timestamp; |
36 |
+ spinlock_t lock; |
37 |
+}; |
38 |
+#endif // CONFIG_SCHED_BORE |
39 |
+ |
40 |
struct sched_entity { |
41 |
/* For load-balancing: */ |
42 |
struct load_weight load; |
43 |
@@ -556,12 +565,22 @@ struct sched_entity { |
44 |
u64 min_deadline; |
45 |
|
46 |
struct list_head group_node; |
47 |
- unsigned int on_rq; |
48 |
+ unsigned char on_rq; |
49 |
+ unsigned char rel_deadline; |
50 |
|
51 |
u64 exec_start; |
52 |
u64 sum_exec_runtime; |
53 |
u64 prev_sum_exec_runtime; |
54 |
u64 vruntime; |
55 |
+#ifdef CONFIG_SCHED_BORE |
56 |
+ u64 burst_time; |
57 |
+ u8 prev_burst_penalty; |
58 |
+ u8 curr_burst_penalty; |
59 |
+ u8 burst_penalty; |
60 |
+ u8 burst_score; |
61 |
+ struct sched_burst_cache child_burst; |
62 |
+ struct sched_burst_cache group_burst; |
63 |
+#endif // CONFIG_SCHED_BORE |
64 |
s64 vlag; |
65 |
u64 slice; |
66 |
|
67 |
diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h |
68 |
new file mode 100644 |
69 |
index 0000000000..32f15d7c1f |
70 |
--- /dev/null |
71 |
+++ b/include/linux/sched/bore.h |
72 |
@@ -0,0 +1,40 @@ |
73 |
+ |
74 |
+#include <linux/sched.h> |
75 |
+#include <linux/sched/cputime.h> |
76 |
+ |
77 |
+#ifndef _LINUX_SCHED_BORE_H |
78 |
+#define _LINUX_SCHED_BORE_H |
79 |
+#define SCHED_BORE_VERSION "5.9.6" |
80 |
+ |
81 |
+#ifdef CONFIG_SCHED_BORE |
82 |
+extern u8 __read_mostly sched_bore; |
83 |
+extern u8 __read_mostly sched_burst_exclude_kthreads; |
84 |
+extern u8 __read_mostly sched_burst_smoothness_long; |
85 |
+extern u8 __read_mostly sched_burst_smoothness_short; |
86 |
+extern u8 __read_mostly sched_burst_fork_atavistic; |
87 |
+extern u8 __read_mostly sched_burst_parity_threshold; |
88 |
+extern u8 __read_mostly sched_burst_penalty_offset; |
89 |
+extern uint __read_mostly sched_burst_penalty_scale; |
90 |
+extern uint __read_mostly sched_burst_cache_stop_count; |
91 |
+extern uint __read_mostly sched_burst_cache_lifetime; |
92 |
+extern uint __read_mostly sched_deadline_boost_mask; |
93 |
+ |
94 |
+extern void update_burst_score(struct sched_entity *se); |
95 |
+extern void update_burst_penalty(struct sched_entity *se); |
96 |
+ |
97 |
+extern void restart_burst(struct sched_entity *se); |
98 |
+extern void restart_burst_rescale_deadline(struct sched_entity *se); |
99 |
+ |
100 |
+extern int sched_bore_update_handler(struct ctl_table *table, int write, |
101 |
+ void __user *buffer, size_t *lenp, loff_t *ppos); |
102 |
+ |
103 |
+extern void sched_clone_bore( |
104 |
+ struct task_struct *p, struct task_struct *parent, u64 clone_flags, u64 now); |
105 |
+ |
106 |
+extern void reset_task_bore(struct task_struct *p); |
107 |
+extern void sched_bore_init(void); |
108 |
+ |
109 |
+extern void reweight_entity( |
110 |
+ struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight); |
111 |
+#endif // CONFIG_SCHED_BORE |
112 |
+#endif // _LINUX_SCHED_BORE_H |
113 |
diff --git a/init/Kconfig b/init/Kconfig |
114 |
index 60ed7713b5..629dac0eb4 100644 |
115 |
--- a/init/Kconfig |
116 |
+++ b/init/Kconfig |
117 |
@@ -1276,6 +1276,23 @@ config CHECKPOINT_RESTORE |
118 |
|
119 |
If unsure, say N here. |
120 |
|
121 |
+config SCHED_BORE |
122 |
+ bool "Burst-Oriented Response Enhancer" |
123 |
+ default y |
124 |
+ help |
125 |
+ In Desktop and Mobile computing, one might prefer interactive |
126 |
+ tasks to keep responsive no matter what they run in the background. |
127 |
+ |
128 |
+ Enabling this kernel feature modifies the scheduler to discriminate |
129 |
+ tasks by their burst time (runtime since it last went sleeping or |
130 |
+ yielding state) and prioritize those that run less bursty. |
131 |
+ Such tasks usually include window compositor, widgets backend, |
132 |
+ terminal emulator, video playback, games and so on. |
133 |
+ With a little impact to scheduling fairness, it may improve |
134 |
+ responsiveness especially under heavy background workload. |
135 |
+ |
136 |
+ If unsure, say Y here. |
137 |
+ |
138 |
config SCHED_AUTOGROUP |
139 |
bool "Automatic process group scheduling" |
140 |
select CGROUPS |
141 |
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz |
142 |
index 38ef6d0688..253c566b59 100644 |
143 |
--- a/kernel/Kconfig.hz |
144 |
+++ b/kernel/Kconfig.hz |
145 |
@@ -55,5 +55,22 @@ config HZ |
146 |
default 300 if HZ_300 |
147 |
default 1000 if HZ_1000 |
148 |
|
149 |
+config MIN_BASE_SLICE_NS |
150 |
+ int "Default value for min_base_slice_ns" |
151 |
+ default 2000000 |
152 |
+ help |
153 |
+ The BORE Scheduler automatically calculates the optimal base |
154 |
+ slice for the configured HZ using the following equation: |
155 |
+ |
156 |
+ base_slice_ns = |
157 |
+ 1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ) |
158 |
+ |
159 |
+ This option sets the default lower bound limit of the base slice |
160 |
+ to prevent the loss of task throughput due to overscheduling. |
161 |
+ |
162 |
+ Setting this value too high can cause the system to boot with |
163 |
+ an unnecessarily large base slice, resulting in high scheduling |
164 |
+ latency and poor system responsiveness. |
165 |
+ |
166 |
config SCHED_HRTICK |
167 |
def_bool HIGH_RES_TIMERS |
168 |
diff --git a/kernel/fork.c b/kernel/fork.c |
169 |
index 23efaa2c42..d8f36a8c2e 100644 |
170 |
--- a/kernel/fork.c |
171 |
+++ b/kernel/fork.c |
172 |
@@ -101,12 +101,18 @@ |
173 |
#include <linux/iommu.h> |
174 |
#include <linux/tick.h> |
175 |
|
176 |
+#ifdef CONFIG_USER_NS |
177 |
+#include <linux/user_namespace.h> |
178 |
+#endif |
179 |
+ |
180 |
#include <asm/pgalloc.h> |
181 |
#include <linux/uaccess.h> |
182 |
#include <asm/mmu_context.h> |
183 |
#include <asm/cacheflush.h> |
184 |
#include <asm/tlbflush.h> |
185 |
|
186 |
+#include <linux/sched/bore.h> |
187 |
+ |
188 |
#include <trace/events/sched.h> |
189 |
|
190 |
#define CREATE_TRACE_POINTS |
191 |
@@ -2619,6 +2625,10 @@ __latent_entropy struct task_struct *copy_process( |
192 |
p->start_time = ktime_get_ns(); |
193 |
p->start_boottime = ktime_get_boottime_ns(); |
194 |
|
195 |
+#ifdef CONFIG_SCHED_BORE |
196 |
+ if (likely(p->pid)) |
197 |
+ sched_clone_bore(p, current, clone_flags, p->start_time); |
198 |
+#endif // CONFIG_SCHED_BORE |
199 |
/* |
200 |
* Make it visible to the rest of the system, but dont wake it up yet. |
201 |
* Need tasklist lock for parent etc handling! |
202 |
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile |
203 |
index 976092b7bd..293aad6754 100644 |
204 |
--- a/kernel/sched/Makefile |
205 |
+++ b/kernel/sched/Makefile |
206 |
@@ -32,3 +32,4 @@ obj-y += core.o |
207 |
obj-y += fair.o |
208 |
obj-y += build_policy.o |
209 |
obj-y += build_utility.o |
210 |
+obj-y += bore.o |
211 |
diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c |
212 |
new file mode 100644 |
213 |
index 0000000000..b63dc1759e |
214 |
--- /dev/null |
215 |
+++ b/kernel/sched/bore.c |
216 |
@@ -0,0 +1,443 @@ |
217 |
+/* |
218 |
+ * Burst-Oriented Response Enhancer (BORE) CPU Scheduler |
219 |
+ * Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com> |
220 |
+ */ |
221 |
+#include <linux/cpuset.h> |
222 |
+#include <linux/sched/task.h> |
223 |
+#include <linux/sched/bore.h> |
224 |
+#include "sched.h" |
225 |
+ |
226 |
+#ifdef CONFIG_SCHED_BORE |
227 |
+u8 __read_mostly sched_bore = 1; |
228 |
+u8 __read_mostly sched_burst_exclude_kthreads = 1; |
229 |
+u8 __read_mostly sched_burst_smoothness_long = 1; |
230 |
+u8 __read_mostly sched_burst_smoothness_short = 0; |
231 |
+u8 __read_mostly sched_burst_fork_atavistic = 2; |
232 |
+u8 __read_mostly sched_burst_parity_threshold = 2; |
233 |
+u8 __read_mostly sched_burst_penalty_offset = 24; |
234 |
+uint __read_mostly sched_burst_penalty_scale = 1280; |
235 |
+uint __read_mostly sched_burst_cache_stop_count = 64; |
236 |
+uint __read_mostly sched_burst_cache_lifetime = 75000000; |
237 |
+uint __read_mostly sched_deadline_boost_mask = ENQUEUE_INITIAL |
238 |
+ | ENQUEUE_WAKEUP; |
239 |
+static int __maybe_unused sixty_four = 64; |
240 |
+static int __maybe_unused maxval_u8 = 255; |
241 |
+static int __maybe_unused maxval_12_bits = 4095; |
242 |
+ |
243 |
+#define MAX_BURST_PENALTY (39U <<2) |
244 |
+ |
245 |
+static inline u32 log2plus1_u64_u32f8(u64 v) { |
246 |
+ u32 integral = fls64(v); |
247 |
+ u8 fractional = v << (64 - integral) >> 55; |
248 |
+ return integral << 8 | fractional; |
249 |
+} |
250 |
+ |
251 |
+static inline u32 calc_burst_penalty(u64 burst_time) { |
252 |
+ u32 greed, tolerance, penalty, scaled_penalty; |
253 |
+ |
254 |
+ greed = log2plus1_u64_u32f8(burst_time); |
255 |
+ tolerance = sched_burst_penalty_offset << 8; |
256 |
+ penalty = max(0, (s32)(greed - tolerance)); |
257 |
+ scaled_penalty = penalty * sched_burst_penalty_scale >> 16; |
258 |
+ |
259 |
+ return min(MAX_BURST_PENALTY, scaled_penalty); |
260 |
+} |
261 |
+ |
262 |
+static inline u64 __scale_slice(u64 delta, u8 score) |
263 |
+{return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);} |
264 |
+ |
265 |
+static inline u64 __unscale_slice(u64 delta, u8 score) |
266 |
+{return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);} |
267 |
+ |
268 |
+static void reweight_task_by_prio(struct task_struct *p, int prio) { |
269 |
+ struct sched_entity *se = &p->se; |
270 |
+ unsigned long weight = scale_load(sched_prio_to_weight[prio]); |
271 |
+ |
272 |
+ reweight_entity(cfs_rq_of(se), se, weight); |
273 |
+ se->load.inv_weight = sched_prio_to_wmult[prio]; |
274 |
+} |
275 |
+ |
276 |
+static inline u8 effective_prio(struct task_struct *p) { |
277 |
+ u8 prio = p->static_prio - MAX_RT_PRIO; |
278 |
+ if (likely(sched_bore)) |
279 |
+ prio += p->se.burst_score; |
280 |
+ return min(39, prio); |
281 |
+} |
282 |
+ |
283 |
+void update_burst_score(struct sched_entity *se) { |
284 |
+ if (!entity_is_task(se)) return; |
285 |
+ struct task_struct *p = task_of(se); |
286 |
+ u8 prev_prio = effective_prio(p); |
287 |
+ |
288 |
+ u8 burst_score = 0; |
289 |
+ if (!((p->flags & PF_KTHREAD) && likely(sched_burst_exclude_kthreads))) |
290 |
+ burst_score = se->burst_penalty >> 2; |
291 |
+ se->burst_score = burst_score; |
292 |
+ |
293 |
+ u8 new_prio = effective_prio(p); |
294 |
+ if (new_prio != prev_prio) |
295 |
+ reweight_task_by_prio(p, new_prio); |
296 |
+} |
297 |
+ |
298 |
+void update_burst_penalty(struct sched_entity *se) { |
299 |
+ se->curr_burst_penalty = calc_burst_penalty(se->burst_time); |
300 |
+ se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty); |
301 |
+ update_burst_score(se); |
302 |
+} |
303 |
+ |
304 |
+static inline u32 binary_smooth(u32 new, u32 old) { |
305 |
+ int increment = new - old; |
306 |
+ return (0 <= increment)? |
307 |
+ old + ( increment >> (int)sched_burst_smoothness_long): |
308 |
+ old - (-increment >> (int)sched_burst_smoothness_short); |
309 |
+} |
310 |
+ |
311 |
+static void revolve_burst_penalty(struct sched_entity *se) { |
312 |
+ se->prev_burst_penalty = |
313 |
+ binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty); |
314 |
+ se->burst_time = 0; |
315 |
+ se->curr_burst_penalty = 0; |
316 |
+} |
317 |
+ |
318 |
+inline void restart_burst(struct sched_entity *se) { |
319 |
+ revolve_burst_penalty(se); |
320 |
+ se->burst_penalty = se->prev_burst_penalty; |
321 |
+ update_burst_score(se); |
322 |
+} |
323 |
+ |
324 |
+void restart_burst_rescale_deadline(struct sched_entity *se) { |
325 |
+ s64 vscaled, wremain, vremain = se->deadline - se->vruntime; |
326 |
+ struct task_struct *p = task_of(se); |
327 |
+ u8 prev_prio = effective_prio(p); |
328 |
+ restart_burst(se); |
329 |
+ u8 new_prio = effective_prio(p); |
330 |
+ if (prev_prio > new_prio) { |
331 |
+ wremain = __unscale_slice(abs(vremain), prev_prio); |
332 |
+ vscaled = __scale_slice(wremain, new_prio); |
333 |
+ if (unlikely(vremain < 0)) |
334 |
+ vscaled = -vscaled; |
335 |
+ se->deadline = se->vruntime + vscaled; |
336 |
+ } |
337 |
+} |
338 |
+ |
339 |
+static inline bool task_is_bore_eligible(struct task_struct *p) |
340 |
+{return p && p->sched_class == &fair_sched_class && !p->exit_state;} |
341 |
+ |
342 |
+static void reset_task_weights_bore(void) { |
343 |
+ struct task_struct *task; |
344 |
+ struct rq *rq; |
345 |
+ struct rq_flags rf; |
346 |
+ |
347 |
+ write_lock_irq(&tasklist_lock); |
348 |
+ for_each_process(task) { |
349 |
+ if (!task_is_bore_eligible(task)) continue; |
350 |
+ rq = task_rq(task); |
351 |
+ rq_pin_lock(rq, &rf); |
352 |
+ update_rq_clock(rq); |
353 |
+ reweight_task_by_prio(task, effective_prio(task)); |
354 |
+ rq_unpin_lock(rq, &rf); |
355 |
+ } |
356 |
+ write_unlock_irq(&tasklist_lock); |
357 |
+} |
358 |
+ |
359 |
+int sched_bore_update_handler(struct ctl_table *table, int write, |
360 |
+ void __user *buffer, size_t *lenp, loff_t *ppos) { |
361 |
+ int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); |
362 |
+ if (ret || !write) |
363 |
+ return ret; |
364 |
+ |
365 |
+ reset_task_weights_bore(); |
366 |
+ |
367 |
+ return 0; |
368 |
+} |
369 |
+ |
370 |
+#define for_each_child(p, t) \ |
371 |
+ list_for_each_entry(t, &(p)->children, sibling) |
372 |
+ |
373 |
+static u32 count_entries_upto2(struct list_head *head) { |
374 |
+ struct list_head *next = head->next; |
375 |
+ return (next != head) + (next->next != head); |
376 |
+} |
377 |
+ |
378 |
+static inline void init_task_burst_cache_lock(struct task_struct *p) { |
379 |
+ spin_lock_init(&p->se.child_burst.lock); |
380 |
+ spin_lock_init(&p->se.group_burst.lock); |
381 |
+} |
382 |
+ |
383 |
+static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now) |
384 |
+{return (s64)(bc->timestamp + sched_burst_cache_lifetime - now) < 0;} |
385 |
+ |
386 |
+static void update_burst_cache(struct sched_burst_cache *bc, |
387 |
+ struct task_struct *p, u32 cnt, u32 sum, u64 now) { |
388 |
+ u8 avg = cnt ? sum / cnt : 0; |
389 |
+ bc->score = max(avg, p->se.burst_penalty); |
390 |
+ bc->count = cnt; |
391 |
+ bc->timestamp = now; |
392 |
+} |
393 |
+ |
394 |
+static inline void update_child_burst_direct(struct task_struct *p, u64 now) { |
395 |
+ u32 cnt = 0, sum = 0; |
396 |
+ struct task_struct *child; |
397 |
+ |
398 |
+ for_each_child(p, child) { |
399 |
+ if (!task_is_bore_eligible(child)) continue; |
400 |
+ cnt++; |
401 |
+ sum += child->se.burst_penalty; |
402 |
+ } |
403 |
+ |
404 |
+ update_burst_cache(&p->se.child_burst, p, cnt, sum, now); |
405 |
+} |
406 |
+ |
407 |
+static inline u8 inherit_burst_direct( |
408 |
+ struct task_struct *p, u64 now, u64 clone_flags) { |
409 |
+ struct task_struct *parent = p; |
410 |
+ struct sched_burst_cache *bc; |
411 |
+ |
412 |
+ if (clone_flags & CLONE_PARENT) |
413 |
+ parent = parent->real_parent; |
414 |
+ |
415 |
+ bc = &parent->se.child_burst; |
416 |
+ guard(spinlock)(&bc->lock); |
417 |
+ if (burst_cache_expired(bc, now)) |
418 |
+ update_child_burst_direct(parent, now); |
419 |
+ |
420 |
+ return bc->score; |
421 |
+} |
422 |
+ |
423 |
+static void update_child_burst_topological( |
424 |
+ struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) { |
425 |
+ u32 cnt = 0, dcnt = 0, sum = 0; |
426 |
+ struct task_struct *child, *dec; |
427 |
+ struct sched_burst_cache *bc __maybe_unused; |
428 |
+ |
429 |
+ for_each_child(p, child) { |
430 |
+ dec = child; |
431 |
+ while ((dcnt = count_entries_upto2(&dec->children)) == 1) |
432 |
+ dec = list_first_entry(&dec->children, struct task_struct, sibling); |
433 |
+ |
434 |
+ if (!dcnt || !depth) { |
435 |
+ if (!task_is_bore_eligible(dec)) continue; |
436 |
+ cnt++; |
437 |
+ sum += dec->se.burst_penalty; |
438 |
+ continue; |
439 |
+ } |
440 |
+ bc = &dec->se.child_burst; |
441 |
+ spin_lock(&bc->lock); |
442 |
+ if (!burst_cache_expired(bc, now)) { |
443 |
+ cnt += bc->count; |
444 |
+ sum += (u32)bc->score * bc->count; |
445 |
+ if (sched_burst_cache_stop_count <= cnt) { |
446 |
+ spin_unlock(&bc->lock); |
447 |
+ break; |
448 |
+ } |
449 |
+ spin_unlock(&bc->lock); |
450 |
+ continue; |
451 |
+ } |
452 |
+ update_child_burst_topological(dec, now, depth - 1, &cnt, &sum); |
453 |
+ spin_unlock(&bc->lock); |
454 |
+ } |
455 |
+ |
456 |
+ update_burst_cache(&p->se.child_burst, p, cnt, sum, now); |
457 |
+ *acnt += cnt; |
458 |
+ *asum += sum; |
459 |
+} |
460 |
+ |
461 |
+static inline u8 inherit_burst_topological( |
462 |
+ struct task_struct *p, u64 now, u64 clone_flags) { |
463 |
+ struct task_struct *anc = p; |
464 |
+ struct sched_burst_cache *bc; |
465 |
+ u32 cnt = 0, sum = 0; |
466 |
+ u32 base_child_cnt = 0; |
467 |
+ |
468 |
+ if (clone_flags & CLONE_PARENT) { |
469 |
+ anc = anc->real_parent; |
470 |
+ base_child_cnt = 1; |
471 |
+ } |
472 |
+ |
473 |
+ for (struct task_struct *next; |
474 |
+ anc != (next = anc->real_parent) && |
475 |
+ count_entries_upto2(&anc->children) <= base_child_cnt;) { |
476 |
+ anc = next; |
477 |
+ base_child_cnt = 1; |
478 |
+ } |
479 |
+ |
480 |
+ bc = &anc->se.child_burst; |
481 |
+ guard(spinlock)(&bc->lock); |
482 |
+ if (burst_cache_expired(bc, now)) |
483 |
+ update_child_burst_topological( |
484 |
+ anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum); |
485 |
+ |
486 |
+ return bc->score; |
487 |
+} |
488 |
+ |
489 |
+static inline void update_tg_burst(struct task_struct *p, u64 now) { |
490 |
+ struct task_struct *task; |
491 |
+ u32 cnt = 0, sum = 0; |
492 |
+ |
493 |
+ for_each_thread(p, task) { |
494 |
+ if (!task_is_bore_eligible(task)) continue; |
495 |
+ cnt++; |
496 |
+ sum += task->se.burst_penalty; |
497 |
+ } |
498 |
+ |
499 |
+ update_burst_cache(&p->se.group_burst, p, cnt, sum, now); |
500 |
+} |
501 |
+ |
502 |
+static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) { |
503 |
+ struct task_struct *parent = rcu_dereference(p->group_leader); |
504 |
+ struct sched_burst_cache *bc = &parent->se.group_burst; |
505 |
+ guard(spinlock)(&bc->lock); |
506 |
+ if (burst_cache_expired(bc, now)) |
507 |
+ update_tg_burst(parent, now); |
508 |
+ |
509 |
+ return bc->score; |
510 |
+} |
511 |
+ |
512 |
+void sched_clone_bore(struct task_struct *p, |
513 |
+ struct task_struct *parent, u64 clone_flags, u64 now) { |
514 |
+ struct sched_entity *se = &p->se; |
515 |
+ u8 penalty; |
516 |
+ |
517 |
+ init_task_burst_cache_lock(p); |
518 |
+ |
519 |
+ if (!task_is_bore_eligible(p)) return; |
520 |
+ |
521 |
+ if (clone_flags & CLONE_THREAD) { |
522 |
+ rcu_read_lock(); |
523 |
+ penalty = inherit_burst_tg(parent, now); |
524 |
+ rcu_read_unlock(); |
525 |
+ } else { |
526 |
+ read_lock(&tasklist_lock); |
527 |
+ penalty = likely(sched_burst_fork_atavistic) ? |
528 |
+ inherit_burst_topological(parent, now, clone_flags): |
529 |
+ inherit_burst_direct(parent, now, clone_flags); |
530 |
+ read_unlock(&tasklist_lock); |
531 |
+ } |
532 |
+ |
533 |
+ revolve_burst_penalty(se); |
534 |
+ se->burst_penalty = se->prev_burst_penalty = |
535 |
+ max(se->prev_burst_penalty, penalty); |
536 |
+ se->child_burst.timestamp = 0; |
537 |
+ se->group_burst.timestamp = 0; |
538 |
+} |
539 |
+ |
540 |
+void reset_task_bore(struct task_struct *p) { |
541 |
+ p->se.burst_time = 0; |
542 |
+ p->se.prev_burst_penalty = 0; |
543 |
+ p->se.curr_burst_penalty = 0; |
544 |
+ p->se.burst_penalty = 0; |
545 |
+ p->se.burst_score = 0; |
546 |
+ memset(&p->se.child_burst, 0, sizeof(struct sched_burst_cache)); |
547 |
+ memset(&p->se.group_burst, 0, sizeof(struct sched_burst_cache)); |
548 |
+} |
549 |
+ |
550 |
+void __init sched_bore_init(void) { |
551 |
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification %s by Masahito Suzuki", SCHED_BORE_VERSION); |
552 |
+ reset_task_bore(&init_task); |
553 |
+ init_task_burst_cache_lock(&init_task); |
554 |
+} |
555 |
+ |
556 |
+#ifdef CONFIG_SYSCTL |
557 |
+static struct ctl_table sched_bore_sysctls[] = { |
558 |
+ { |
559 |
+ .procname = "sched_bore", |
560 |
+ .data = &sched_bore, |
561 |
+ .maxlen = sizeof(u8), |
562 |
+ .mode = 0644, |
563 |
+ .proc_handler = sched_bore_update_handler, |
564 |
+ .extra1 = SYSCTL_ZERO, |
565 |
+ .extra2 = SYSCTL_ONE, |
566 |
+ }, |
567 |
+ { |
568 |
+ .procname = "sched_burst_exclude_kthreads", |
569 |
+ .data = &sched_burst_exclude_kthreads, |
570 |
+ .maxlen = sizeof(u8), |
571 |
+ .mode = 0644, |
572 |
+ .proc_handler = proc_dou8vec_minmax, |
573 |
+ .extra1 = SYSCTL_ZERO, |
574 |
+ .extra2 = SYSCTL_ONE, |
575 |
+ }, |
576 |
+ { |
577 |
+ .procname = "sched_burst_smoothness_long", |
578 |
+ .data = &sched_burst_smoothness_long, |
579 |
+ .maxlen = sizeof(u8), |
580 |
+ .mode = 0644, |
581 |
+ .proc_handler = proc_dou8vec_minmax, |
582 |
+ .extra1 = SYSCTL_ZERO, |
583 |
+ .extra2 = SYSCTL_ONE, |
584 |
+ }, |
585 |
+ { |
586 |
+ .procname = "sched_burst_smoothness_short", |
587 |
+ .data = &sched_burst_smoothness_short, |
588 |
+ .maxlen = sizeof(u8), |
589 |
+ .mode = 0644, |
590 |
+ .proc_handler = proc_dou8vec_minmax, |
591 |
+ .extra1 = SYSCTL_ZERO, |
592 |
+ .extra2 = SYSCTL_ONE, |
593 |
+ }, |
594 |
+ { |
595 |
+ .procname = "sched_burst_fork_atavistic", |
596 |
+ .data = &sched_burst_fork_atavistic, |
597 |
+ .maxlen = sizeof(u8), |
598 |
+ .mode = 0644, |
599 |
+ .proc_handler = proc_dou8vec_minmax, |
600 |
+ .extra1 = SYSCTL_ZERO, |
601 |
+ .extra2 = SYSCTL_THREE, |
602 |
+ }, |
603 |
+ { |
604 |
+ .procname = "sched_burst_parity_threshold", |
605 |
+ .data = &sched_burst_parity_threshold, |
606 |
+ .maxlen = sizeof(u8), |
607 |
+ .mode = 0644, |
608 |
+ .proc_handler = proc_dou8vec_minmax, |
609 |
+ .extra1 = SYSCTL_ZERO, |
610 |
+ .extra2 = &maxval_u8, |
611 |
+ }, |
612 |
+ { |
613 |
+ .procname = "sched_burst_penalty_offset", |
614 |
+ .data = &sched_burst_penalty_offset, |
615 |
+ .maxlen = sizeof(u8), |
616 |
+ .mode = 0644, |
617 |
+ .proc_handler = proc_dou8vec_minmax, |
618 |
+ .extra1 = SYSCTL_ZERO, |
619 |
+ .extra2 = &sixty_four, |
620 |
+ }, |
621 |
+ { |
622 |
+ .procname = "sched_burst_penalty_scale", |
623 |
+ .data = &sched_burst_penalty_scale, |
624 |
+ .maxlen = sizeof(uint), |
625 |
+ .mode = 0644, |
626 |
+ .proc_handler = proc_douintvec_minmax, |
627 |
+ .extra1 = SYSCTL_ZERO, |
628 |
+ .extra2 = &maxval_12_bits, |
629 |
+ }, |
630 |
+ { |
631 |
+ .procname = "sched_burst_cache_stop_count", |
632 |
+ .data = &sched_burst_cache_stop_count, |
633 |
+ .maxlen = sizeof(uint), |
634 |
+ .mode = 0644, |
635 |
+ .proc_handler = proc_douintvec, |
636 |
+ }, |
637 |
+ { |
638 |
+ .procname = "sched_burst_cache_lifetime", |
639 |
+ .data = &sched_burst_cache_lifetime, |
640 |
+ .maxlen = sizeof(uint), |
641 |
+ .mode = 0644, |
642 |
+ .proc_handler = proc_douintvec, |
643 |
+ }, |
644 |
+ { |
645 |
+ .procname = "sched_deadline_boost_mask", |
646 |
+ .data = &sched_deadline_boost_mask, |
647 |
+ .maxlen = sizeof(uint), |
648 |
+ .mode = 0644, |
649 |
+ .proc_handler = proc_douintvec, |
650 |
+ }, |
651 |
+}; |
652 |
+ |
653 |
+static int __init sched_bore_sysctl_init(void) { |
654 |
+ register_sysctl_init("kernel", sched_bore_sysctls); |
655 |
+ return 0; |
656 |
+} |
657 |
+late_initcall(sched_bore_sysctl_init); |
658 |
+#endif // CONFIG_SYSCTL |
659 |
+#endif // CONFIG_SCHED_BORE |
660 |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c |
661 |
index 228f7c07da..76c54361fa 100644 |
662 |
--- a/kernel/sched/core.c |
663 |
+++ b/kernel/sched/core.c |
664 |
@@ -96,6 +96,8 @@ |
665 |
#include "../../io_uring/io-wq.h" |
666 |
#include "../smpboot.h" |
667 |
|
668 |
+#include <linux/sched/bore.h> |
669 |
+ |
670 |
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); |
671 |
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); |
672 |
|
673 |
@@ -9946,6 +9948,10 @@ void __init sched_init(void) |
674 |
BUG_ON(&dl_sched_class != &stop_sched_class + 1); |
675 |
#endif |
676 |
|
677 |
+#ifdef CONFIG_SCHED_BORE |
678 |
+ sched_bore_init(); |
679 |
+#endif // CONFIG_SCHED_BORE |
680 |
+ |
681 |
wait_bit_init(); |
682 |
|
683 |
#ifdef CONFIG_FAIR_GROUP_SCHED |
684 |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c |
685 |
index 4c3d0d9f3d..638a71f0fa 100644 |
686 |
--- a/kernel/sched/debug.c |
687 |
+++ b/kernel/sched/debug.c |
688 |
@@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = { |
689 |
}; |
690 |
|
691 |
#ifdef CONFIG_SMP |
692 |
+#ifdef CONFIG_SCHED_BORE |
693 |
+#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \ |
694 |
+static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \ |
695 |
+{ \ |
696 |
+ char buf[16]; \ |
697 |
+ unsigned int value; \ |
698 |
+\ |
699 |
+ if (cnt > 15) \ |
700 |
+ cnt = 15; \ |
701 |
+\ |
702 |
+ if (copy_from_user(&buf, ubuf, cnt)) \ |
703 |
+ return -EFAULT; \ |
704 |
+ buf[cnt] = '\0'; \ |
705 |
+\ |
706 |
+ if (kstrtouint(buf, 10, &value)) \ |
707 |
+ return -EINVAL; \ |
708 |
+\ |
709 |
+ sysctl_sched_##name = value; \ |
710 |
+ sched_update_##update_func(); \ |
711 |
+\ |
712 |
+ *ppos += cnt; \ |
713 |
+ return cnt; \ |
714 |
+} \ |
715 |
+\ |
716 |
+static int sched_##name##_show(struct seq_file *m, void *v) \ |
717 |
+{ \ |
718 |
+ seq_printf(m, "%d\n", sysctl_sched_##name); \ |
719 |
+ return 0; \ |
720 |
+} \ |
721 |
+\ |
722 |
+static int sched_##name##_open(struct inode *inode, struct file *filp) \ |
723 |
+{ \ |
724 |
+ return single_open(filp, sched_##name##_show, NULL); \ |
725 |
+} \ |
726 |
+\ |
727 |
+static const struct file_operations sched_##name##_fops = { \ |
728 |
+ .open = sched_##name##_open, \ |
729 |
+ .write = sched_##name##_write, \ |
730 |
+ .read = seq_read, \ |
731 |
+ .llseek = seq_lseek, \ |
732 |
+ .release = single_release, \ |
733 |
+}; |
734 |
+ |
735 |
+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice) |
736 |
|
737 |
+#undef DEFINE_SYSCTL_SCHED_FUNC |
738 |
+#else // !CONFIG_SCHED_BORE |
739 |
static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, |
740 |
size_t cnt, loff_t *ppos) |
741 |
{ |
742 |
@@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = { |
743 |
.llseek = seq_lseek, |
744 |
.release = single_release, |
745 |
}; |
746 |
- |
747 |
+#endif // CONFIG_SCHED_BORE |
748 |
#endif /* SMP */ |
749 |
|
750 |
#ifdef CONFIG_PREEMPT_DYNAMIC |
751 |
@@ -347,13 +393,20 @@ static __init int sched_init_debug(void) |
752 |
debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); |
753 |
#endif |
754 |
|
755 |
+#ifdef CONFIG_SCHED_BORE |
756 |
+ debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops); |
757 |
+ debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice); |
758 |
+#else // !CONFIG_SCHED_BORE |
759 |
debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice); |
760 |
+#endif // CONFIG_SCHED_BORE |
761 |
|
762 |
debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); |
763 |
debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); |
764 |
|
765 |
#ifdef CONFIG_SMP |
766 |
+#if !defined(CONFIG_SCHED_BORE) |
767 |
debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); |
768 |
+#endif // CONFIG_SCHED_BORE |
769 |
debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); |
770 |
debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); |
771 |
|
772 |
@@ -595,6 +648,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) |
773 |
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), |
774 |
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); |
775 |
|
776 |
+#ifdef CONFIG_SCHED_BORE |
777 |
+ SEQ_printf(m, " %2d", p->se.burst_score); |
778 |
+#endif // CONFIG_SCHED_BORE |
779 |
#ifdef CONFIG_NUMA_BALANCING |
780 |
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); |
781 |
#endif |
782 |
@@ -1068,6 +1124,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, |
783 |
|
784 |
P(se.load.weight); |
785 |
#ifdef CONFIG_SMP |
786 |
+#ifdef CONFIG_SCHED_BORE |
787 |
+ P(se.burst_score); |
788 |
+#endif // CONFIG_SCHED_BORE |
789 |
P(se.avg.load_sum); |
790 |
P(se.avg.runnable_sum); |
791 |
P(se.avg.util_sum); |
792 |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
793 |
index 3b2cfdb8d7..6268ccb187 100644 |
794 |
--- a/kernel/sched/fair.c |
795 |
+++ b/kernel/sched/fair.c |
796 |
@@ -57,6 +57,8 @@ |
797 |
#include "stats.h" |
798 |
#include "autogroup.h" |
799 |
|
800 |
+#include <linux/sched/bore.h> |
801 |
+ |
802 |
/* |
803 |
* The initial- and re-scaling of tunables is configurable |
804 |
* |
805 |
@@ -66,17 +68,30 @@ |
806 |
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) |
807 |
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus |
808 |
* |
809 |
- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) |
810 |
+ * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant |
811 |
+ * EEVDF: default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) |
812 |
*/ |
813 |
+#ifdef CONFIG_SCHED_BORE |
814 |
+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; |
815 |
+#else // !CONFIG_SCHED_BORE |
816 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; |
817 |
+#endif // CONFIG_SCHED_BORE |
818 |
|
819 |
/* |
820 |
* Minimal preemption granularity for CPU-bound tasks: |
821 |
* |
822 |
- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
823 |
+ * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice |
824 |
+ * (default min_base_slice = 2000000 constant, units: nanoseconds) |
825 |
+ * EEVDF: default 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds |
826 |
*/ |
827 |
+#ifdef CONFIG_SCHED_BORE |
828 |
+static const unsigned int nsecs_per_tick = 1000000000ULL / HZ; |
829 |
+unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS; |
830 |
+__read_mostly uint sysctl_sched_base_slice = nsecs_per_tick; |
831 |
+#else // !CONFIG_SCHED_BORE |
832 |
unsigned int sysctl_sched_base_slice = 750000ULL; |
833 |
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; |
834 |
+#endif // CONFIG_SCHED_BORE |
835 |
|
836 |
/* |
837 |
* After fork, child runs first. If set to 0 (default) then |
838 |
@@ -210,6 +225,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) |
839 |
* |
840 |
* This idea comes from the SD scheduler of Con Kolivas: |
841 |
*/ |
842 |
+#ifdef CONFIG_SCHED_BORE |
843 |
+static void update_sysctl(void) { |
844 |
+ sysctl_sched_base_slice = nsecs_per_tick * |
845 |
+ max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick)); |
846 |
+} |
847 |
+void sched_update_min_base_slice(void) { update_sysctl(); } |
848 |
+#else // !CONFIG_SCHED_BORE |
849 |
static unsigned int get_update_sysctl_factor(void) |
850 |
{ |
851 |
unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); |
852 |
@@ -240,6 +262,7 @@ static void update_sysctl(void) |
853 |
SET_SYSCTL(sched_base_slice); |
854 |
#undef SET_SYSCTL |
855 |
} |
856 |
+#endif // CONFIG_SCHED_BORE |
857 |
|
858 |
void __init sched_init_granularity(void) |
859 |
{ |
860 |
@@ -713,6 +736,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se) |
861 |
|
862 |
vlag = avruntime - se->vruntime; |
863 |
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); |
864 |
+#ifdef CONFIG_SCHED_BORE |
865 |
+ limit >>= !!sched_bore; |
866 |
+#endif // CONFIG_SCHED_BORE |
867 |
|
868 |
return clamp(vlag, -limit, limit); |
869 |
} |
870 |
@@ -894,6 +920,10 @@ static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq) |
871 |
* until it gets a new slice. See the HACK in set_next_entity(). |
872 |
*/ |
873 |
if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline) |
874 |
+#ifdef CONFIG_SCHED_BORE |
875 |
+ if (!(likely(sched_bore) && likely(sched_burst_parity_threshold) && |
876 |
+ sched_burst_parity_threshold < cfs_rq->nr_running)) |
877 |
+#endif // CONFIG_SCHED_BORE |
878 |
return curr; |
879 |
|
880 |
while (node) { |
881 |
@@ -1002,6 +1032,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
882 |
* Scheduling class statistics methods: |
883 |
*/ |
884 |
#ifdef CONFIG_SMP |
885 |
+#if !defined(CONFIG_SCHED_BORE) |
886 |
int sched_update_scaling(void) |
887 |
{ |
888 |
unsigned int factor = get_update_sysctl_factor(); |
889 |
@@ -1013,6 +1044,7 @@ int sched_update_scaling(void) |
890 |
|
891 |
return 0; |
892 |
} |
893 |
+#endif // CONFIG_SCHED_BORE |
894 |
#endif |
895 |
#endif |
896 |
|
897 |
@@ -1210,6 +1242,10 @@ static void update_curr(struct cfs_rq *cfs_rq) |
898 |
if (unlikely(delta_exec <= 0)) |
899 |
return; |
900 |
|
901 |
+#ifdef CONFIG_SCHED_BORE |
902 |
+ curr->burst_time += delta_exec; |
903 |
+ update_burst_penalty(curr); |
904 |
+#endif // CONFIG_SCHED_BORE |
905 |
curr->vruntime += calc_delta_fair(delta_exec, curr); |
906 |
update_deadline(cfs_rq, curr); |
907 |
update_min_vruntime(cfs_rq); |
908 |
@@ -3848,7 +3884,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime, |
909 |
se->deadline = avruntime + vslice; |
910 |
} |
911 |
|
912 |
-static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, |
913 |
+void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, |
914 |
unsigned long weight) |
915 |
{ |
916 |
bool curr = cfs_rq->curr == se; |
917 |
@@ -5191,6 +5227,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
918 |
* |
919 |
* EEVDF: placement strategy #1 / #2 |
920 |
*/ |
921 |
+#ifdef CONFIG_SCHED_BORE |
922 |
+ if (se->vlag) |
923 |
+#endif // CONFIG_SCHED_BORE |
924 |
if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) { |
925 |
struct sched_entity *curr = cfs_rq->curr; |
926 |
unsigned long load; |
927 |
@@ -5261,8 +5300,18 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
928 |
|
929 |
se->vruntime = vruntime - lag; |
930 |
|
931 |
+ if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) { |
932 |
+ se->deadline += se->vruntime; |
933 |
+ se->rel_deadline = 0; |
934 |
+ return; |
935 |
+ } |
936 |
+#ifdef CONFIG_SCHED_BORE |
937 |
+ else if (likely(sched_bore)) |
938 |
+ vslice >>= !!(flags & sched_deadline_boost_mask); |
939 |
+ else |
940 |
+#endif // CONFIG_SCHED_BORE |
941 |
/* |
942 |
- * When joining the competition; the exisiting tasks will be, |
943 |
+ * When joining the competition; the existing tasks will be, |
944 |
* on average, halfway through their slice, as such start tasks |
945 |
* off with half a slice to ease into the competition. |
946 |
*/ |
947 |
@@ -5370,6 +5419,7 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); |
948 |
static void |
949 |
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
950 |
{ |
951 |
+ bool sleep = flags & DEQUEUE_SLEEP; |
952 |
int action = UPDATE_TG; |
953 |
|
954 |
if (entity_is_task(se) && task_on_rq_migrating(task_of(se))) |
955 |
@@ -5397,6 +5447,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
956 |
clear_buddies(cfs_rq, se); |
957 |
|
958 |
update_entity_lag(cfs_rq, se); |
959 |
+ if (sched_feat(PLACE_REL_DEADLINE) && !sleep) { |
960 |
+ se->deadline -= se->vruntime; |
961 |
+ se->rel_deadline = 1; |
962 |
+ } |
963 |
+ |
964 |
if (se != cfs_rq->curr) |
965 |
__dequeue_entity(cfs_rq, se); |
966 |
se->on_rq = 0; |
967 |
@@ -6833,6 +6888,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) |
968 |
bool was_sched_idle = sched_idle_rq(rq); |
969 |
|
970 |
util_est_dequeue(&rq->cfs, p); |
971 |
+#ifdef CONFIG_SCHED_BORE |
972 |
+ if (task_sleep) { |
973 |
+ cfs_rq = cfs_rq_of(se); |
974 |
+ if (cfs_rq->curr == se) |
975 |
+ update_curr(cfs_rq); |
976 |
+ restart_burst(se); |
977 |
+ } |
978 |
+#endif // CONFIG_SCHED_BORE |
979 |
|
980 |
for_each_sched_entity(se) { |
981 |
cfs_rq = cfs_rq_of(se); |
982 |
@@ -8566,16 +8629,25 @@ static void yield_task_fair(struct rq *rq) |
983 |
/* |
984 |
* Are we the only task in the tree? |
985 |
*/ |
986 |
+#if !defined(CONFIG_SCHED_BORE) |
987 |
if (unlikely(rq->nr_running == 1)) |
988 |
return; |
989 |
|
990 |
clear_buddies(cfs_rq, se); |
991 |
+#endif // CONFIG_SCHED_BORE |
992 |
|
993 |
update_rq_clock(rq); |
994 |
/* |
995 |
* Update run-time statistics of the 'current'. |
996 |
*/ |
997 |
update_curr(cfs_rq); |
998 |
+#ifdef CONFIG_SCHED_BORE |
999 |
+ restart_burst_rescale_deadline(se); |
1000 |
+ if (unlikely(rq->nr_running == 1)) |
1001 |
+ return; |
1002 |
+ |
1003 |
+ clear_buddies(cfs_rq, se); |
1004 |
+#endif // CONFIG_SCHED_BORE |
1005 |
/* |
1006 |
* Tell update_rq_clock() that we've just updated, |
1007 |
* so we don't do microscopic update in schedule() |
1008 |
@@ -12641,6 +12713,9 @@ static void task_fork_fair(struct task_struct *p) |
1009 |
curr = cfs_rq->curr; |
1010 |
if (curr) |
1011 |
update_curr(cfs_rq); |
1012 |
+#ifdef CONFIG_SCHED_BORE |
1013 |
+ update_burst_score(se); |
1014 |
+#endif // CONFIG_SCHED_BORE |
1015 |
place_entity(cfs_rq, se, ENQUEUE_INITIAL); |
1016 |
rq_unlock(rq, &rf); |
1017 |
} |
1018 |
@@ -12753,6 +12828,10 @@ static void attach_task_cfs_rq(struct task_struct *p) |
1019 |
|
1020 |
static void switched_from_fair(struct rq *rq, struct task_struct *p) |
1021 |
{ |
1022 |
+ p->se.rel_deadline = 0; |
1023 |
+#ifdef CONFIG_SCHED_BORE |
1024 |
+ reset_task_bore(p); |
1025 |
+#endif // CONFIG_SCHED_BORE |
1026 |
detach_task_cfs_rq(p); |
1027 |
} |
1028 |
|
1029 |
diff --git a/kernel/sched/features.h b/kernel/sched/features.h |
1030 |
index f770168230..87464a97d0 100644 |
1031 |
--- a/kernel/sched/features.h |
1032 |
+++ b/kernel/sched/features.h |
1033 |
@@ -6,6 +6,10 @@ |
1034 |
*/ |
1035 |
SCHED_FEAT(PLACE_LAG, true) |
1036 |
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) |
1037 |
+/* |
1038 |
+ * Preserve relative virtual deadline on 'migration'. |
1039 |
+ */ |
1040 |
+SCHED_FEAT(PLACE_REL_DEADLINE, true) |
1041 |
SCHED_FEAT(RUN_TO_PARITY, true) |
1042 |
|
1043 |
/* |
1044 |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h |
1045 |
index d48c6a292a..75acf2f6e2 100644 |
1046 |
--- a/kernel/sched/sched.h |
1047 |
+++ b/kernel/sched/sched.h |
1048 |
@@ -1946,7 +1946,11 @@ static inline void dirty_sched_domain_sysctl(int cpu) |
1049 |
} |
1050 |
#endif |
1051 |
|
1052 |
+#ifdef CONFIG_SCHED_BORE |
1053 |
+extern void sched_update_min_base_slice(void); |
1054 |
+#else // !CONFIG_SCHED_BORE |
1055 |
extern int sched_update_scaling(void); |
1056 |
+#endif // CONFIG_SCHED_BORE |
1057 |
|
1058 |
static inline const struct cpumask *task_user_cpus(struct task_struct *p) |
1059 |
{ |
1060 |
@@ -2532,7 +2536,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); |
1061 |
extern const_debug unsigned int sysctl_sched_nr_migrate; |
1062 |
extern const_debug unsigned int sysctl_sched_migration_cost; |
1063 |
|
1064 |
+#ifdef CONFIG_SCHED_BORE |
1065 |
+extern unsigned int sysctl_sched_min_base_slice; |
1066 |
+extern __read_mostly uint sysctl_sched_base_slice; |
1067 |
+#else // !CONFIG_SCHED_BORE |
1068 |
extern unsigned int sysctl_sched_base_slice; |
1069 |
+#endif // CONFIG_SCHED_BORE |
1070 |
|
1071 |
#ifdef CONFIG_SCHED_DEBUG |
1072 |
extern int sysctl_resched_latency_warn_ms; |
1073 |
-- |
1074 |
2.34.1 |
1075 |
|