/[packages]/cauldron/kernel/current/SOURCES/0001-linux6.6.69-bore5.7.14.patch
ViewVC logotype

Contents of /cauldron/kernel/current/SOURCES/0001-linux6.6.69-bore5.7.14.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2136362 - (show annotations) (download)
Thu Jan 9 17:14:43 2025 UTC (4 weeks, 2 days ago) by ghibo
File size: 32247 byte(s)
- Update to version 6.6.70.
- BORE 5.7.15.
- Merge stable-queue as of 2025-01-09.
- Disable Patch1020 for l2c-id on AMD CPU (potential problems on AMD FX-8350 CPU, bug#33459).
- Extend usage of %_make_verbose to other supported kernel verbosity levels.
- Use macro for make silent.

1 From 0fb0d405a0a5c2bc5acd03d7d0d96abf4d2ecab9 Mon Sep 17 00:00:00 2001
2 From: Masahito S <firelzrd@gmail.com>
3 Date: Tue, 24 Dec 2024 05:15:50 +0900
4 Subject: [PATCH] linux6.6.57-bore5.7.14
5
6 ---
7 include/linux/sched.h | 20 +-
8 include/linux/sched/bore.h | 40 ++++
9 init/Kconfig | 17 ++
10 kernel/Kconfig.hz | 17 ++
11 kernel/fork.c | 9 +
12 kernel/sched/Makefile | 1 +
13 kernel/sched/bore.c | 424 +++++++++++++++++++++++++++++++++++++
14 kernel/sched/core.c | 6 +
15 kernel/sched/debug.c | 61 +++++-
16 kernel/sched/fair.c | 87 +++++++-
17 kernel/sched/features.h | 4 +
18 kernel/sched/sched.h | 9 +
19 12 files changed, 689 insertions(+), 6 deletions(-)
20 create mode 100644 include/linux/sched/bore.h
21 create mode 100644 kernel/sched/bore.c
22
23 diff --git a/include/linux/sched.h b/include/linux/sched.h
24 index d4f9d82c6..14a7b2c99 100644
25 --- a/include/linux/sched.h
26 +++ b/include/linux/sched.h
27 @@ -548,6 +548,14 @@ struct sched_statistics {
28 #endif /* CONFIG_SCHEDSTATS */
29 } ____cacheline_aligned;
30
31 +#ifdef CONFIG_SCHED_BORE
32 +struct sched_burst_cache {
33 + u8 score;
34 + u32 count;
35 + u64 timestamp;
36 +};
37 +#endif // CONFIG_SCHED_BORE
38 +
39 struct sched_entity {
40 /* For load-balancing: */
41 struct load_weight load;
42 @@ -556,12 +564,22 @@ struct sched_entity {
43 u64 min_deadline;
44
45 struct list_head group_node;
46 - unsigned int on_rq;
47 + unsigned char on_rq;
48 + unsigned char rel_deadline;
49
50 u64 exec_start;
51 u64 sum_exec_runtime;
52 u64 prev_sum_exec_runtime;
53 u64 vruntime;
54 +#ifdef CONFIG_SCHED_BORE
55 + u64 burst_time;
56 + u8 prev_burst_penalty;
57 + u8 curr_burst_penalty;
58 + u8 burst_penalty;
59 + u8 burst_score;
60 + struct sched_burst_cache child_burst;
61 + struct sched_burst_cache group_burst;
62 +#endif // CONFIG_SCHED_BORE
63 s64 vlag;
64 u64 slice;
65
66 diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
67 new file mode 100644
68 index 000000000..a8e4093ad
69 --- /dev/null
70 +++ b/include/linux/sched/bore.h
71 @@ -0,0 +1,40 @@
72 +
73 +#include <linux/sched.h>
74 +#include <linux/sched/cputime.h>
75 +
76 +#ifndef _LINUX_SCHED_BORE_H
77 +#define _LINUX_SCHED_BORE_H
78 +#define SCHED_BORE_VERSION "5.7.14"
79 +
80 +#ifdef CONFIG_SCHED_BORE
81 +extern u8 __read_mostly sched_bore;
82 +extern u8 __read_mostly sched_burst_exclude_kthreads;
83 +extern u8 __read_mostly sched_burst_smoothness_long;
84 +extern u8 __read_mostly sched_burst_smoothness_short;
85 +extern u8 __read_mostly sched_burst_fork_atavistic;
86 +extern u8 __read_mostly sched_burst_parity_threshold;
87 +extern u8 __read_mostly sched_burst_penalty_offset;
88 +extern uint __read_mostly sched_burst_penalty_scale;
89 +extern uint __read_mostly sched_burst_cache_stop_count;
90 +extern uint __read_mostly sched_burst_cache_lifetime;
91 +extern uint __read_mostly sched_deadline_boost_mask;
92 +
93 +extern void update_burst_score(struct sched_entity *se);
94 +extern void update_burst_penalty(struct sched_entity *se);
95 +
96 +extern void restart_burst(struct sched_entity *se);
97 +extern void restart_burst_rescale_deadline(struct sched_entity *se);
98 +
99 +extern int sched_bore_update_handler(struct ctl_table *table, int write,
100 + void __user *buffer, size_t *lenp, loff_t *ppos);
101 +
102 +extern void sched_clone_bore(
103 + struct task_struct *p, struct task_struct *parent, u64 clone_flags);
104 +
105 +extern void init_task_bore(struct task_struct *p);
106 +extern void sched_bore_init(void);
107 +
108 +extern void reweight_entity(
109 + struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight);
110 +#endif // CONFIG_SCHED_BORE
111 +#endif // _LINUX_SCHED_BORE_H
112 diff --git a/init/Kconfig b/init/Kconfig
113 index 60ed7713b..629dac0eb 100644
114 --- a/init/Kconfig
115 +++ b/init/Kconfig
116 @@ -1276,6 +1276,23 @@ config CHECKPOINT_RESTORE
117
118 If unsure, say N here.
119
120 +config SCHED_BORE
121 + bool "Burst-Oriented Response Enhancer"
122 + default y
123 + help
124 + In Desktop and Mobile computing, one might prefer interactive
125 + tasks to keep responsive no matter what they run in the background.
126 +
127 + Enabling this kernel feature modifies the scheduler to discriminate
128 + tasks by their burst time (runtime since it last went sleeping or
129 + yielding state) and prioritize those that run less bursty.
130 + Such tasks usually include window compositor, widgets backend,
131 + terminal emulator, video playback, games and so on.
132 + With a little impact to scheduling fairness, it may improve
133 + responsiveness especially under heavy background workload.
134 +
135 + If unsure, say Y here.
136 +
137 config SCHED_AUTOGROUP
138 bool "Automatic process group scheduling"
139 select CGROUPS
140 diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
141 index 38ef6d068..253c566b5 100644
142 --- a/kernel/Kconfig.hz
143 +++ b/kernel/Kconfig.hz
144 @@ -55,5 +55,22 @@ config HZ
145 default 300 if HZ_300
146 default 1000 if HZ_1000
147
148 +config MIN_BASE_SLICE_NS
149 + int "Default value for min_base_slice_ns"
150 + default 2000000
151 + help
152 + The BORE Scheduler automatically calculates the optimal base
153 + slice for the configured HZ using the following equation:
154 +
155 + base_slice_ns =
156 + 1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ)
157 +
158 + This option sets the default lower bound limit of the base slice
159 + to prevent the loss of task throughput due to overscheduling.
160 +
161 + Setting this value too high can cause the system to boot with
162 + an unnecessarily large base slice, resulting in high scheduling
163 + latency and poor system responsiveness.
164 +
165 config SCHED_HRTICK
166 def_bool HIGH_RES_TIMERS
167 diff --git a/kernel/fork.c b/kernel/fork.c
168 index 23efaa2c4..524497cee 100644
169 --- a/kernel/fork.c
170 +++ b/kernel/fork.c
171 @@ -101,12 +101,18 @@
172 #include <linux/iommu.h>
173 #include <linux/tick.h>
174
175 +#ifdef CONFIG_USER_NS
176 +#include <linux/user_namespace.h>
177 +#endif
178 +
179 #include <asm/pgalloc.h>
180 #include <linux/uaccess.h>
181 #include <asm/mmu_context.h>
182 #include <asm/cacheflush.h>
183 #include <asm/tlbflush.h>
184
185 +#include <linux/sched/bore.h>
186 +
187 #include <trace/events/sched.h>
188
189 #define CREATE_TRACE_POINTS
190 @@ -2471,6 +2477,9 @@ __latent_entropy struct task_struct *copy_process(
191 retval = sched_fork(clone_flags, p);
192 if (retval)
193 goto bad_fork_cleanup_policy;
194 +#ifdef CONFIG_SCHED_BORE
195 + sched_clone_bore(p, current, clone_flags);
196 +#endif // CONFIG_SCHED_BORE
197
198 retval = perf_event_init_task(p, clone_flags);
199 if (retval)
200 diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
201 index 976092b7b..293aad675 100644
202 --- a/kernel/sched/Makefile
203 +++ b/kernel/sched/Makefile
204 @@ -32,3 +32,4 @@ obj-y += core.o
205 obj-y += fair.o
206 obj-y += build_policy.o
207 obj-y += build_utility.o
208 +obj-y += bore.o
209 diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
210 new file mode 100644
211 index 000000000..f262231e7
212 --- /dev/null
213 +++ b/kernel/sched/bore.c
214 @@ -0,0 +1,424 @@
215 +/*
216 + * Burst-Oriented Response Enhancer (BORE) CPU Scheduler
217 + * Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
218 + */
219 +#include <linux/cpuset.h>
220 +#include <linux/sched/task.h>
221 +#include <linux/sched/bore.h>
222 +#include "sched.h"
223 +
224 +#ifdef CONFIG_SCHED_BORE
225 +u8 __read_mostly sched_bore = 1;
226 +u8 __read_mostly sched_burst_exclude_kthreads = 1;
227 +u8 __read_mostly sched_burst_smoothness_long = 1;
228 +u8 __read_mostly sched_burst_smoothness_short = 0;
229 +u8 __read_mostly sched_burst_fork_atavistic = 2;
230 +u8 __read_mostly sched_burst_parity_threshold = 2;
231 +u8 __read_mostly sched_burst_penalty_offset = 24;
232 +uint __read_mostly sched_burst_penalty_scale = 1280;
233 +uint __read_mostly sched_burst_cache_stop_count = 64;
234 +uint __read_mostly sched_burst_cache_lifetime = 75000000;
235 +uint __read_mostly sched_deadline_boost_mask = ENQUEUE_INITIAL
236 + | ENQUEUE_WAKEUP;
237 +static int __maybe_unused sixty_four = 64;
238 +static int __maybe_unused maxval_u8 = 255;
239 +static int __maybe_unused maxval_12_bits = 4095;
240 +
241 +#define MAX_BURST_PENALTY (39U <<2)
242 +
243 +static inline u32 log2plus1_u64_u32f8(u64 v) {
244 + u32 integral = fls64(v);
245 + u8 fractional = v << (64 - integral) >> 55;
246 + return integral << 8 | fractional;
247 +}
248 +
249 +static inline u32 calc_burst_penalty(u64 burst_time) {
250 + u32 greed, tolerance, penalty, scaled_penalty;
251 +
252 + greed = log2plus1_u64_u32f8(burst_time);
253 + tolerance = sched_burst_penalty_offset << 8;
254 + penalty = max(0, (s32)(greed - tolerance));
255 + scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
256 +
257 + return min(MAX_BURST_PENALTY, scaled_penalty);
258 +}
259 +
260 +static inline u64 __scale_slice(u64 delta, u8 score)
261 +{return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);}
262 +
263 +static inline u64 __unscale_slice(u64 delta, u8 score)
264 +{return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);}
265 +
266 +static void reweight_task_by_prio(struct task_struct *p, int prio) {
267 + struct sched_entity *se = &p->se;
268 + unsigned long weight = scale_load(sched_prio_to_weight[prio]);
269 +
270 + reweight_entity(cfs_rq_of(se), se, weight);
271 + se->load.inv_weight = sched_prio_to_wmult[prio];
272 +}
273 +
274 +static inline u8 effective_prio(struct task_struct *p) {
275 + u8 prio = p->static_prio - MAX_RT_PRIO;
276 + if (likely(sched_bore))
277 + prio += p->se.burst_score;
278 + return min(39, prio);
279 +}
280 +
281 +void update_burst_score(struct sched_entity *se) {
282 + if (!entity_is_task(se)) return;
283 + struct task_struct *p = task_of(se);
284 + u8 prev_prio = effective_prio(p);
285 +
286 + u8 burst_score = 0;
287 + if (!((p->flags & PF_KTHREAD) && likely(sched_burst_exclude_kthreads)))
288 + burst_score = se->burst_penalty >> 2;
289 + se->burst_score = burst_score;
290 +
291 + u8 new_prio = effective_prio(p);
292 + if (new_prio != prev_prio)
293 + reweight_task_by_prio(p, new_prio);
294 +}
295 +
296 +void update_burst_penalty(struct sched_entity *se) {
297 + se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
298 + se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
299 + update_burst_score(se);
300 +}
301 +
302 +static inline u32 binary_smooth(u32 new, u32 old) {
303 + int increment = new - old;
304 + return (0 <= increment)?
305 + old + ( increment >> (int)sched_burst_smoothness_long):
306 + old - (-increment >> (int)sched_burst_smoothness_short);
307 +}
308 +
309 +static void revolve_burst_penalty(struct sched_entity *se) {
310 + se->prev_burst_penalty =
311 + binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
312 + se->burst_time = 0;
313 + se->curr_burst_penalty = 0;
314 +}
315 +
316 +inline void restart_burst(struct sched_entity *se) {
317 + revolve_burst_penalty(se);
318 + se->burst_penalty = se->prev_burst_penalty;
319 + update_burst_score(se);
320 +}
321 +
322 +void restart_burst_rescale_deadline(struct sched_entity *se) {
323 + s64 vscaled, wremain, vremain = se->deadline - se->vruntime;
324 + struct task_struct *p = task_of(se);
325 + u8 prev_prio = effective_prio(p);
326 + restart_burst(se);
327 + u8 new_prio = effective_prio(p);
328 + if (prev_prio > new_prio) {
329 + wremain = __unscale_slice(abs(vremain), prev_prio);
330 + vscaled = __scale_slice(wremain, new_prio);
331 + if (unlikely(vremain < 0))
332 + vscaled = -vscaled;
333 + se->deadline = se->vruntime + vscaled;
334 + }
335 +}
336 +
337 +static inline bool task_is_bore_eligible(struct task_struct *p)
338 +{return p && p->sched_class == &fair_sched_class && !p->exit_state;}
339 +
340 +static void reset_task_weights_bore(void) {
341 + struct task_struct *task;
342 + struct rq *rq;
343 + struct rq_flags rf;
344 +
345 + write_lock_irq(&tasklist_lock);
346 + for_each_process(task) {
347 + if (!task_is_bore_eligible(task)) continue;
348 + rq = task_rq(task);
349 + rq_pin_lock(rq, &rf);
350 + update_rq_clock(rq);
351 + reweight_task_by_prio(task, effective_prio(task));
352 + rq_unpin_lock(rq, &rf);
353 + }
354 + write_unlock_irq(&tasklist_lock);
355 +}
356 +
357 +int sched_bore_update_handler(struct ctl_table *table, int write,
358 + void __user *buffer, size_t *lenp, loff_t *ppos) {
359 + int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
360 + if (ret || !write)
361 + return ret;
362 +
363 + reset_task_weights_bore();
364 +
365 + return 0;
366 +}
367 +
368 +#define for_each_child(p, t) \
369 + list_for_each_entry(t, &(p)->children, sibling)
370 +
371 +static u32 count_children_max2(struct task_struct *p) {
372 + u32 cnt = 0;
373 + struct task_struct *child;
374 + for_each_child(p, child) {if (2 <= ++cnt) break;}
375 + return cnt;
376 +}
377 +
378 +static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now)
379 +{return (s64)(bc->timestamp + sched_burst_cache_lifetime - now) < 0;}
380 +
381 +static void update_burst_cache(struct sched_burst_cache *bc,
382 + struct task_struct *p, u32 cnt, u32 sum, u64 now) {
383 + u8 avg = cnt ? sum / cnt : 0;
384 + bc->score = max(avg, p->se.burst_penalty);
385 + bc->count = cnt;
386 + bc->timestamp = now;
387 +}
388 +
389 +static inline void update_child_burst_direct(struct task_struct *p, u64 now) {
390 + u32 cnt = 0, sum = 0;
391 + struct task_struct *child;
392 +
393 + for_each_child(p, child) {
394 + if (!task_is_bore_eligible(child)) continue;
395 + cnt++;
396 + sum += child->se.burst_penalty;
397 + }
398 +
399 + update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
400 +}
401 +
402 +static inline u8 inherit_burst_direct(
403 + struct task_struct *p, u64 now, u64 clone_flags) {
404 + struct task_struct *parent = p;
405 +
406 + if (clone_flags & CLONE_PARENT)
407 + parent = parent->real_parent;
408 +
409 + if (burst_cache_expired(&parent->se.child_burst, now))
410 + update_child_burst_direct(parent, now);
411 +
412 + return parent->se.child_burst.score;
413 +}
414 +
415 +static void update_child_burst_topological(
416 + struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
417 + u32 cnt = 0, dcnt = 0, sum = 0;
418 + struct task_struct *child, *dec;
419 +
420 + for_each_child(p, child) {
421 + dec = child;
422 + while ((dcnt = count_children_max2(dec)) == 1)
423 + dec = list_first_entry(&dec->children, struct task_struct, sibling);
424 +
425 + if (!dcnt || !depth) {
426 + if (!task_is_bore_eligible(dec)) continue;
427 + cnt++;
428 + sum += dec->se.burst_penalty;
429 + continue;
430 + }
431 + if (!burst_cache_expired(&dec->se.child_burst, now)) {
432 + cnt += dec->se.child_burst.count;
433 + sum += (u32)dec->se.child_burst.score * dec->se.child_burst.count;
434 + if (sched_burst_cache_stop_count <= cnt) break;
435 + continue;
436 + }
437 + update_child_burst_topological(dec, now, depth - 1, &cnt, &sum);
438 + }
439 +
440 + update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
441 + *acnt += cnt;
442 + *asum += sum;
443 +}
444 +
445 +static inline u8 inherit_burst_topological(
446 + struct task_struct *p, u64 now, u64 clone_flags) {
447 + struct task_struct *anc = p;
448 + u32 cnt = 0, sum = 0;
449 + u32 base_child_cnt = 0;
450 +
451 + if (clone_flags & CLONE_PARENT) {
452 + anc = anc->real_parent;
453 + base_child_cnt = 1;
454 + }
455 +
456 + for (struct task_struct *next;
457 + anc != (next = anc->real_parent) &&
458 + count_children_max2(anc) <= base_child_cnt;) {
459 + anc = next;
460 + base_child_cnt = 1;
461 + }
462 +
463 + if (burst_cache_expired(&anc->se.child_burst, now))
464 + update_child_burst_topological(
465 + anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
466 +
467 + return anc->se.child_burst.score;
468 +}
469 +
470 +static inline void update_tg_burst(struct task_struct *p, u64 now) {
471 + struct task_struct *task;
472 + u32 cnt = 0, sum = 0;
473 +
474 + for_each_thread(p, task) {
475 + if (!task_is_bore_eligible(task)) continue;
476 + cnt++;
477 + sum += task->se.burst_penalty;
478 + }
479 +
480 + update_burst_cache(&p->se.group_burst, p, cnt, sum, now);
481 +}
482 +
483 +static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) {
484 + struct task_struct *parent = rcu_dereference(p->group_leader);
485 + if (burst_cache_expired(&parent->se.group_burst, now))
486 + update_tg_burst(parent, now);
487 +
488 + return parent->se.group_burst.score;
489 +}
490 +
491 +void sched_clone_bore(
492 + struct task_struct *p, struct task_struct *parent, u64 clone_flags) {
493 + struct sched_entity *se = &p->se;
494 + u64 now;
495 + u8 penalty;
496 +
497 + if (!task_is_bore_eligible(p)) return;
498 +
499 + if (clone_flags & CLONE_THREAD) {
500 + rcu_read_lock();
501 + now = jiffies_to_nsecs(jiffies);
502 + penalty = inherit_burst_tg(parent, now);
503 + rcu_read_unlock();
504 + } else {
505 + read_lock(&tasklist_lock);
506 + now = jiffies_to_nsecs(jiffies);
507 + penalty = likely(sched_burst_fork_atavistic) ?
508 + inherit_burst_topological(parent, now, clone_flags):
509 + inherit_burst_direct(parent, now, clone_flags);
510 + read_unlock(&tasklist_lock);
511 + }
512 +
513 + revolve_burst_penalty(se);
514 + se->burst_penalty = se->prev_burst_penalty =
515 + max(se->prev_burst_penalty, penalty);
516 + se->child_burst.timestamp = 0;
517 + se->group_burst.timestamp = 0;
518 +}
519 +
520 +void init_task_bore(struct task_struct *p) {
521 + p->se.burst_time = 0;
522 + p->se.prev_burst_penalty = 0;
523 + p->se.curr_burst_penalty = 0;
524 + p->se.burst_penalty = 0;
525 + p->se.burst_score = 0;
526 + memset(&p->se.child_burst, 0, sizeof(struct sched_burst_cache));
527 + memset(&p->se.group_burst, 0, sizeof(struct sched_burst_cache));
528 +}
529 +
530 +void __init sched_bore_init(void) {
531 + printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification %s by Masahito Suzuki", SCHED_BORE_VERSION);
532 + init_task_bore(&init_task);
533 +}
534 +
535 +#ifdef CONFIG_SYSCTL
536 +static struct ctl_table sched_bore_sysctls[] = {
537 + {
538 + .procname = "sched_bore",
539 + .data = &sched_bore,
540 + .maxlen = sizeof(u8),
541 + .mode = 0644,
542 + .proc_handler = sched_bore_update_handler,
543 + .extra1 = SYSCTL_ZERO,
544 + .extra2 = SYSCTL_ONE,
545 + },
546 + {
547 + .procname = "sched_burst_exclude_kthreads",
548 + .data = &sched_burst_exclude_kthreads,
549 + .maxlen = sizeof(u8),
550 + .mode = 0644,
551 + .proc_handler = proc_dou8vec_minmax,
552 + .extra1 = SYSCTL_ZERO,
553 + .extra2 = SYSCTL_ONE,
554 + },
555 + {
556 + .procname = "sched_burst_smoothness_long",
557 + .data = &sched_burst_smoothness_long,
558 + .maxlen = sizeof(u8),
559 + .mode = 0644,
560 + .proc_handler = proc_dou8vec_minmax,
561 + .extra1 = SYSCTL_ZERO,
562 + .extra2 = SYSCTL_ONE,
563 + },
564 + {
565 + .procname = "sched_burst_smoothness_short",
566 + .data = &sched_burst_smoothness_short,
567 + .maxlen = sizeof(u8),
568 + .mode = 0644,
569 + .proc_handler = proc_dou8vec_minmax,
570 + .extra1 = SYSCTL_ZERO,
571 + .extra2 = SYSCTL_ONE,
572 + },
573 + {
574 + .procname = "sched_burst_fork_atavistic",
575 + .data = &sched_burst_fork_atavistic,
576 + .maxlen = sizeof(u8),
577 + .mode = 0644,
578 + .proc_handler = proc_dou8vec_minmax,
579 + .extra1 = SYSCTL_ZERO,
580 + .extra2 = SYSCTL_THREE,
581 + },
582 + {
583 + .procname = "sched_burst_parity_threshold",
584 + .data = &sched_burst_parity_threshold,
585 + .maxlen = sizeof(u8),
586 + .mode = 0644,
587 + .proc_handler = proc_dou8vec_minmax,
588 + .extra1 = SYSCTL_ZERO,
589 + .extra2 = &maxval_u8,
590 + },
591 + {
592 + .procname = "sched_burst_penalty_offset",
593 + .data = &sched_burst_penalty_offset,
594 + .maxlen = sizeof(u8),
595 + .mode = 0644,
596 + .proc_handler = proc_dou8vec_minmax,
597 + .extra1 = SYSCTL_ZERO,
598 + .extra2 = &sixty_four,
599 + },
600 + {
601 + .procname = "sched_burst_penalty_scale",
602 + .data = &sched_burst_penalty_scale,
603 + .maxlen = sizeof(uint),
604 + .mode = 0644,
605 + .proc_handler = proc_douintvec_minmax,
606 + .extra1 = SYSCTL_ZERO,
607 + .extra2 = &maxval_12_bits,
608 + },
609 + {
610 + .procname = "sched_burst_cache_stop_count",
611 + .data = &sched_burst_cache_stop_count,
612 + .maxlen = sizeof(uint),
613 + .mode = 0644,
614 + .proc_handler = proc_douintvec,
615 + },
616 + {
617 + .procname = "sched_burst_cache_lifetime",
618 + .data = &sched_burst_cache_lifetime,
619 + .maxlen = sizeof(uint),
620 + .mode = 0644,
621 + .proc_handler = proc_douintvec,
622 + },
623 + {
624 + .procname = "sched_deadline_boost_mask",
625 + .data = &sched_deadline_boost_mask,
626 + .maxlen = sizeof(uint),
627 + .mode = 0644,
628 + .proc_handler = proc_douintvec,
629 + },
630 +};
631 +
632 +static int __init sched_bore_sysctl_init(void) {
633 + register_sysctl_init("kernel", sched_bore_sysctls);
634 + return 0;
635 +}
636 +late_initcall(sched_bore_sysctl_init);
637 +#endif // CONFIG_SYSCTL
638 +#endif // CONFIG_SCHED_BORE
639 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
640 index b4b4a6539..67f959bcb 100644
641 --- a/kernel/sched/core.c
642 +++ b/kernel/sched/core.c
643 @@ -96,6 +96,8 @@
644 #include "../../io_uring/io-wq.h"
645 #include "../smpboot.h"
646
647 +#include <linux/sched/bore.h>
648 +
649 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
650 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
651
652 @@ -9944,6 +9946,10 @@ void __init sched_init(void)
653 BUG_ON(&dl_sched_class != &stop_sched_class + 1);
654 #endif
655
656 +#ifdef CONFIG_SCHED_BORE
657 + sched_bore_init();
658 +#endif // CONFIG_SCHED_BORE
659 +
660 wait_bit_init();
661
662 #ifdef CONFIG_FAIR_GROUP_SCHED
663 diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
664 index 4c3d0d9f3..638a71f0f 100644
665 --- a/kernel/sched/debug.c
666 +++ b/kernel/sched/debug.c
667 @@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = {
668 };
669
670 #ifdef CONFIG_SMP
671 +#ifdef CONFIG_SCHED_BORE
672 +#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \
673 +static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \
674 +{ \
675 + char buf[16]; \
676 + unsigned int value; \
677 +\
678 + if (cnt > 15) \
679 + cnt = 15; \
680 +\
681 + if (copy_from_user(&buf, ubuf, cnt)) \
682 + return -EFAULT; \
683 + buf[cnt] = '\0'; \
684 +\
685 + if (kstrtouint(buf, 10, &value)) \
686 + return -EINVAL; \
687 +\
688 + sysctl_sched_##name = value; \
689 + sched_update_##update_func(); \
690 +\
691 + *ppos += cnt; \
692 + return cnt; \
693 +} \
694 +\
695 +static int sched_##name##_show(struct seq_file *m, void *v) \
696 +{ \
697 + seq_printf(m, "%d\n", sysctl_sched_##name); \
698 + return 0; \
699 +} \
700 +\
701 +static int sched_##name##_open(struct inode *inode, struct file *filp) \
702 +{ \
703 + return single_open(filp, sched_##name##_show, NULL); \
704 +} \
705 +\
706 +static const struct file_operations sched_##name##_fops = { \
707 + .open = sched_##name##_open, \
708 + .write = sched_##name##_write, \
709 + .read = seq_read, \
710 + .llseek = seq_lseek, \
711 + .release = single_release, \
712 +};
713 +
714 +DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice)
715
716 +#undef DEFINE_SYSCTL_SCHED_FUNC
717 +#else // !CONFIG_SCHED_BORE
718 static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
719 size_t cnt, loff_t *ppos)
720 {
721 @@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = {
722 .llseek = seq_lseek,
723 .release = single_release,
724 };
725 -
726 +#endif // CONFIG_SCHED_BORE
727 #endif /* SMP */
728
729 #ifdef CONFIG_PREEMPT_DYNAMIC
730 @@ -347,13 +393,20 @@ static __init int sched_init_debug(void)
731 debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
732 #endif
733
734 +#ifdef CONFIG_SCHED_BORE
735 + debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
736 + debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice);
737 +#else // !CONFIG_SCHED_BORE
738 debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
739 +#endif // CONFIG_SCHED_BORE
740
741 debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
742 debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
743
744 #ifdef CONFIG_SMP
745 +#if !defined(CONFIG_SCHED_BORE)
746 debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
747 +#endif // CONFIG_SCHED_BORE
748 debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
749 debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
750
751 @@ -595,6 +648,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
752 SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
753 SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
754
755 +#ifdef CONFIG_SCHED_BORE
756 + SEQ_printf(m, " %2d", p->se.burst_score);
757 +#endif // CONFIG_SCHED_BORE
758 #ifdef CONFIG_NUMA_BALANCING
759 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
760 #endif
761 @@ -1068,6 +1124,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
762
763 P(se.load.weight);
764 #ifdef CONFIG_SMP
765 +#ifdef CONFIG_SCHED_BORE
766 + P(se.burst_score);
767 +#endif // CONFIG_SCHED_BORE
768 P(se.avg.load_sum);
769 P(se.avg.runnable_sum);
770 P(se.avg.util_sum);
771 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
772 index 3b2cfdb8d..7f7745d0e 100644
773 --- a/kernel/sched/fair.c
774 +++ b/kernel/sched/fair.c
775 @@ -57,6 +57,8 @@
776 #include "stats.h"
777 #include "autogroup.h"
778
779 +#include <linux/sched/bore.h>
780 +
781 /*
782 * The initial- and re-scaling of tunables is configurable
783 *
784 @@ -66,17 +68,30 @@
785 * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
786 * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
787 *
788 - * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
789 + * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant
790 + * EEVDF: default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
791 */
792 +#ifdef CONFIG_SCHED_BORE
793 +unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
794 +#else // !CONFIG_SCHED_BORE
795 unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
796 +#endif // CONFIG_SCHED_BORE
797
798 /*
799 * Minimal preemption granularity for CPU-bound tasks:
800 *
801 - * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
802 + * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice
803 + * (default min_base_slice = 2000000 constant, units: nanoseconds)
804 + * EEVDF: default 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds
805 */
806 +#ifdef CONFIG_SCHED_BORE
807 +static const unsigned int nsecs_per_tick = 1000000000ULL / HZ;
808 +unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS;
809 +__read_mostly uint sysctl_sched_base_slice = nsecs_per_tick;
810 +#else // !CONFIG_SCHED_BORE
811 unsigned int sysctl_sched_base_slice = 750000ULL;
812 static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
813 +#endif // CONFIG_SCHED_BORE
814
815 /*
816 * After fork, child runs first. If set to 0 (default) then
817 @@ -210,6 +225,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
818 *
819 * This idea comes from the SD scheduler of Con Kolivas:
820 */
821 +#ifdef CONFIG_SCHED_BORE
822 +static void update_sysctl(void) {
823 + sysctl_sched_base_slice = nsecs_per_tick *
824 + max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick));
825 +}
826 +void sched_update_min_base_slice(void) { update_sysctl(); }
827 +#else // !CONFIG_SCHED_BORE
828 static unsigned int get_update_sysctl_factor(void)
829 {
830 unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
831 @@ -240,6 +262,7 @@ static void update_sysctl(void)
832 SET_SYSCTL(sched_base_slice);
833 #undef SET_SYSCTL
834 }
835 +#endif // CONFIG_SCHED_BORE
836
837 void __init sched_init_granularity(void)
838 {
839 @@ -713,6 +736,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
840
841 vlag = avruntime - se->vruntime;
842 limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
843 +#ifdef CONFIG_SCHED_BORE
844 + limit >>= !!sched_bore;
845 +#endif // CONFIG_SCHED_BORE
846
847 return clamp(vlag, -limit, limit);
848 }
849 @@ -894,6 +920,10 @@ static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq)
850 * until it gets a new slice. See the HACK in set_next_entity().
851 */
852 if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
853 +#ifdef CONFIG_SCHED_BORE
854 + if (!(likely(sched_bore) && likely(sched_burst_parity_threshold) &&
855 + sched_burst_parity_threshold < cfs_rq->nr_running))
856 +#endif // CONFIG_SCHED_BORE
857 return curr;
858
859 while (node) {
860 @@ -1002,6 +1032,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
861 * Scheduling class statistics methods:
862 */
863 #ifdef CONFIG_SMP
864 +#if !defined(CONFIG_SCHED_BORE)
865 int sched_update_scaling(void)
866 {
867 unsigned int factor = get_update_sysctl_factor();
868 @@ -1013,6 +1044,7 @@ int sched_update_scaling(void)
869
870 return 0;
871 }
872 +#endif // CONFIG_SCHED_BORE
873 #endif
874 #endif
875
876 @@ -1210,6 +1242,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
877 if (unlikely(delta_exec <= 0))
878 return;
879
880 +#ifdef CONFIG_SCHED_BORE
881 + curr->burst_time += delta_exec;
882 + update_burst_penalty(curr);
883 +#endif // CONFIG_SCHED_BORE
884 curr->vruntime += calc_delta_fair(delta_exec, curr);
885 update_deadline(cfs_rq, curr);
886 update_min_vruntime(cfs_rq);
887 @@ -3848,7 +3884,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
888 se->deadline = avruntime + vslice;
889 }
890
891 -static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
892 +void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
893 unsigned long weight)
894 {
895 bool curr = cfs_rq->curr == se;
896 @@ -5191,6 +5227,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
897 *
898 * EEVDF: placement strategy #1 / #2
899 */
900 +#ifdef CONFIG_SCHED_BORE
901 + if (se->vlag)
902 +#endif // CONFIG_SCHED_BORE
903 if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
904 struct sched_entity *curr = cfs_rq->curr;
905 unsigned long load;
906 @@ -5261,8 +5300,18 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
907
908 se->vruntime = vruntime - lag;
909
910 + if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
911 + se->deadline += se->vruntime;
912 + se->rel_deadline = 0;
913 + return;
914 + }
915 +#ifdef CONFIG_SCHED_BORE
916 + else if (likely(sched_bore))
917 + vslice >>= !!(flags & sched_deadline_boost_mask);
918 + else
919 +#endif // CONFIG_SCHED_BORE
920 /*
921 - * When joining the competition; the exisiting tasks will be,
922 + * When joining the competition; the existing tasks will be,
923 * on average, halfway through their slice, as such start tasks
924 * off with half a slice to ease into the competition.
925 */
926 @@ -5370,6 +5419,7 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
927 static void
928 dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
929 {
930 + bool sleep = flags & DEQUEUE_SLEEP;
931 int action = UPDATE_TG;
932
933 if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
934 @@ -5397,6 +5447,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
935 clear_buddies(cfs_rq, se);
936
937 update_entity_lag(cfs_rq, se);
938 + if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
939 + se->deadline -= se->vruntime;
940 + se->rel_deadline = 1;
941 + }
942 +
943 if (se != cfs_rq->curr)
944 __dequeue_entity(cfs_rq, se);
945 se->on_rq = 0;
946 @@ -6833,6 +6888,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
947 bool was_sched_idle = sched_idle_rq(rq);
948
949 util_est_dequeue(&rq->cfs, p);
950 +#ifdef CONFIG_SCHED_BORE
951 + if (task_sleep) {
952 + cfs_rq = cfs_rq_of(se);
953 + if (cfs_rq->curr == se)
954 + update_curr(cfs_rq);
955 + restart_burst(se);
956 + }
957 +#endif // CONFIG_SCHED_BORE
958
959 for_each_sched_entity(se) {
960 cfs_rq = cfs_rq_of(se);
961 @@ -8566,16 +8629,25 @@ static void yield_task_fair(struct rq *rq)
962 /*
963 * Are we the only task in the tree?
964 */
965 +#if !defined(CONFIG_SCHED_BORE)
966 if (unlikely(rq->nr_running == 1))
967 return;
968
969 clear_buddies(cfs_rq, se);
970 +#endif // CONFIG_SCHED_BORE
971
972 update_rq_clock(rq);
973 /*
974 * Update run-time statistics of the 'current'.
975 */
976 update_curr(cfs_rq);
977 +#ifdef CONFIG_SCHED_BORE
978 + restart_burst_rescale_deadline(se);
979 + if (unlikely(rq->nr_running == 1))
980 + return;
981 +
982 + clear_buddies(cfs_rq, se);
983 +#endif // CONFIG_SCHED_BORE
984 /*
985 * Tell update_rq_clock() that we've just updated,
986 * so we don't do microscopic update in schedule()
987 @@ -12641,6 +12713,9 @@ static void task_fork_fair(struct task_struct *p)
988 curr = cfs_rq->curr;
989 if (curr)
990 update_curr(cfs_rq);
991 +#ifdef CONFIG_SCHED_BORE
992 + update_burst_score(se);
993 +#endif // CONFIG_SCHED_BORE
994 place_entity(cfs_rq, se, ENQUEUE_INITIAL);
995 rq_unlock(rq, &rf);
996 }
997 @@ -12753,6 +12828,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
998
999 static void switched_from_fair(struct rq *rq, struct task_struct *p)
1000 {
1001 + p->se.rel_deadline = 0;
1002 +#ifdef CONFIG_SCHED_BORE
1003 + init_task_bore(p);
1004 +#endif // CONFIG_SCHED_BORE
1005 detach_task_cfs_rq(p);
1006 }
1007
1008 diff --git a/kernel/sched/features.h b/kernel/sched/features.h
1009 index f77016823..87464a97d 100644
1010 --- a/kernel/sched/features.h
1011 +++ b/kernel/sched/features.h
1012 @@ -6,6 +6,10 @@
1013 */
1014 SCHED_FEAT(PLACE_LAG, true)
1015 SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
1016 +/*
1017 + * Preserve relative virtual deadline on 'migration'.
1018 + */
1019 +SCHED_FEAT(PLACE_REL_DEADLINE, true)
1020 SCHED_FEAT(RUN_TO_PARITY, true)
1021
1022 /*
1023 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
1024 index d48c6a292..75acf2f6e 100644
1025 --- a/kernel/sched/sched.h
1026 +++ b/kernel/sched/sched.h
1027 @@ -1946,7 +1946,11 @@ static inline void dirty_sched_domain_sysctl(int cpu)
1028 }
1029 #endif
1030
1031 +#ifdef CONFIG_SCHED_BORE
1032 +extern void sched_update_min_base_slice(void);
1033 +#else // !CONFIG_SCHED_BORE
1034 extern int sched_update_scaling(void);
1035 +#endif // CONFIG_SCHED_BORE
1036
1037 static inline const struct cpumask *task_user_cpus(struct task_struct *p)
1038 {
1039 @@ -2532,7 +2536,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
1040 extern const_debug unsigned int sysctl_sched_nr_migrate;
1041 extern const_debug unsigned int sysctl_sched_migration_cost;
1042
1043 +#ifdef CONFIG_SCHED_BORE
1044 +extern unsigned int sysctl_sched_min_base_slice;
1045 +extern __read_mostly uint sysctl_sched_base_slice;
1046 +#else // !CONFIG_SCHED_BORE
1047 extern unsigned int sysctl_sched_base_slice;
1048 +#endif // CONFIG_SCHED_BORE
1049
1050 #ifdef CONFIG_SCHED_DEBUG
1051 extern int sysctl_resched_latency_warn_ms;
1052 --
1053 2.41.1
1054

  ViewVC Help
Powered by ViewVC 1.1.30