/[packages]/cauldron/kernel/current/SOURCES/0001-linux6.6.67-bore5.9.6.patch
ViewVC logotype

Contents of /cauldron/kernel/current/SOURCES/0001-linux6.6.67-bore5.9.6.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2139948 - (show annotations) (download)
Sun Jan 19 23:41:27 2025 UTC (2 weeks, 6 days ago) by ghibo
File size: 32856 byte(s)
- Cleanup unused patches.
- Update BORE to release 5.9.6.

1 From 81aaea92bcd825975fbed1b7c2f85e259b29b6b8 Mon Sep 17 00:00:00 2001
2 From: Masahito S <firelzrd@gmail.com>
3 Date: Mon, 20 Jan 2025 07:24:54 +0900
4 Subject: [PATCH] linux6.6.67-bore5.9.6
5
6 ---
7 include/linux/sched.h | 21 +-
8 include/linux/sched/bore.h | 40 ++++
9 init/Kconfig | 17 ++
10 kernel/Kconfig.hz | 17 ++
11 kernel/fork.c | 10 +
12 kernel/sched/Makefile | 1 +
13 kernel/sched/bore.c | 443 +++++++++++++++++++++++++++++++++++++
14 kernel/sched/core.c | 6 +
15 kernel/sched/debug.c | 61 ++++-
16 kernel/sched/fair.c | 87 +++++++-
17 kernel/sched/features.h | 4 +
18 kernel/sched/sched.h | 9 +
19 12 files changed, 710 insertions(+), 6 deletions(-)
20 create mode 100644 include/linux/sched/bore.h
21 create mode 100644 kernel/sched/bore.c
22
23 diff --git a/include/linux/sched.h b/include/linux/sched.h
24 index 4809f27b52..ce5c55cbd9 100644
25 --- a/include/linux/sched.h
26 +++ b/include/linux/sched.h
27 @@ -548,6 +548,15 @@ struct sched_statistics {
28 #endif /* CONFIG_SCHEDSTATS */
29 } ____cacheline_aligned;
30
31 +#ifdef CONFIG_SCHED_BORE
32 +struct sched_burst_cache {
33 + u8 score;
34 + u32 count;
35 + u64 timestamp;
36 + spinlock_t lock;
37 +};
38 +#endif // CONFIG_SCHED_BORE
39 +
40 struct sched_entity {
41 /* For load-balancing: */
42 struct load_weight load;
43 @@ -556,12 +565,22 @@ struct sched_entity {
44 u64 min_deadline;
45
46 struct list_head group_node;
47 - unsigned int on_rq;
48 + unsigned char on_rq;
49 + unsigned char rel_deadline;
50
51 u64 exec_start;
52 u64 sum_exec_runtime;
53 u64 prev_sum_exec_runtime;
54 u64 vruntime;
55 +#ifdef CONFIG_SCHED_BORE
56 + u64 burst_time;
57 + u8 prev_burst_penalty;
58 + u8 curr_burst_penalty;
59 + u8 burst_penalty;
60 + u8 burst_score;
61 + struct sched_burst_cache child_burst;
62 + struct sched_burst_cache group_burst;
63 +#endif // CONFIG_SCHED_BORE
64 s64 vlag;
65 u64 slice;
66
67 diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
68 new file mode 100644
69 index 0000000000..32f15d7c1f
70 --- /dev/null
71 +++ b/include/linux/sched/bore.h
72 @@ -0,0 +1,40 @@
73 +
74 +#include <linux/sched.h>
75 +#include <linux/sched/cputime.h>
76 +
77 +#ifndef _LINUX_SCHED_BORE_H
78 +#define _LINUX_SCHED_BORE_H
79 +#define SCHED_BORE_VERSION "5.9.6"
80 +
81 +#ifdef CONFIG_SCHED_BORE
82 +extern u8 __read_mostly sched_bore;
83 +extern u8 __read_mostly sched_burst_exclude_kthreads;
84 +extern u8 __read_mostly sched_burst_smoothness_long;
85 +extern u8 __read_mostly sched_burst_smoothness_short;
86 +extern u8 __read_mostly sched_burst_fork_atavistic;
87 +extern u8 __read_mostly sched_burst_parity_threshold;
88 +extern u8 __read_mostly sched_burst_penalty_offset;
89 +extern uint __read_mostly sched_burst_penalty_scale;
90 +extern uint __read_mostly sched_burst_cache_stop_count;
91 +extern uint __read_mostly sched_burst_cache_lifetime;
92 +extern uint __read_mostly sched_deadline_boost_mask;
93 +
94 +extern void update_burst_score(struct sched_entity *se);
95 +extern void update_burst_penalty(struct sched_entity *se);
96 +
97 +extern void restart_burst(struct sched_entity *se);
98 +extern void restart_burst_rescale_deadline(struct sched_entity *se);
99 +
100 +extern int sched_bore_update_handler(struct ctl_table *table, int write,
101 + void __user *buffer, size_t *lenp, loff_t *ppos);
102 +
103 +extern void sched_clone_bore(
104 + struct task_struct *p, struct task_struct *parent, u64 clone_flags, u64 now);
105 +
106 +extern void reset_task_bore(struct task_struct *p);
107 +extern void sched_bore_init(void);
108 +
109 +extern void reweight_entity(
110 + struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight);
111 +#endif // CONFIG_SCHED_BORE
112 +#endif // _LINUX_SCHED_BORE_H
113 diff --git a/init/Kconfig b/init/Kconfig
114 index 60ed7713b5..629dac0eb4 100644
115 --- a/init/Kconfig
116 +++ b/init/Kconfig
117 @@ -1276,6 +1276,23 @@ config CHECKPOINT_RESTORE
118
119 If unsure, say N here.
120
121 +config SCHED_BORE
122 + bool "Burst-Oriented Response Enhancer"
123 + default y
124 + help
125 + In Desktop and Mobile computing, one might prefer interactive
126 + tasks to keep responsive no matter what they run in the background.
127 +
128 + Enabling this kernel feature modifies the scheduler to discriminate
129 + tasks by their burst time (runtime since it last went sleeping or
130 + yielding state) and prioritize those that run less bursty.
131 + Such tasks usually include window compositor, widgets backend,
132 + terminal emulator, video playback, games and so on.
133 + With a little impact to scheduling fairness, it may improve
134 + responsiveness especially under heavy background workload.
135 +
136 + If unsure, say Y here.
137 +
138 config SCHED_AUTOGROUP
139 bool "Automatic process group scheduling"
140 select CGROUPS
141 diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
142 index 38ef6d0688..253c566b59 100644
143 --- a/kernel/Kconfig.hz
144 +++ b/kernel/Kconfig.hz
145 @@ -55,5 +55,22 @@ config HZ
146 default 300 if HZ_300
147 default 1000 if HZ_1000
148
149 +config MIN_BASE_SLICE_NS
150 + int "Default value for min_base_slice_ns"
151 + default 2000000
152 + help
153 + The BORE Scheduler automatically calculates the optimal base
154 + slice for the configured HZ using the following equation:
155 +
156 + base_slice_ns =
157 + 1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ)
158 +
159 + This option sets the default lower bound limit of the base slice
160 + to prevent the loss of task throughput due to overscheduling.
161 +
162 + Setting this value too high can cause the system to boot with
163 + an unnecessarily large base slice, resulting in high scheduling
164 + latency and poor system responsiveness.
165 +
166 config SCHED_HRTICK
167 def_bool HIGH_RES_TIMERS
168 diff --git a/kernel/fork.c b/kernel/fork.c
169 index 23efaa2c42..d8f36a8c2e 100644
170 --- a/kernel/fork.c
171 +++ b/kernel/fork.c
172 @@ -101,12 +101,18 @@
173 #include <linux/iommu.h>
174 #include <linux/tick.h>
175
176 +#ifdef CONFIG_USER_NS
177 +#include <linux/user_namespace.h>
178 +#endif
179 +
180 #include <asm/pgalloc.h>
181 #include <linux/uaccess.h>
182 #include <asm/mmu_context.h>
183 #include <asm/cacheflush.h>
184 #include <asm/tlbflush.h>
185
186 +#include <linux/sched/bore.h>
187 +
188 #include <trace/events/sched.h>
189
190 #define CREATE_TRACE_POINTS
191 @@ -2619,6 +2625,10 @@ __latent_entropy struct task_struct *copy_process(
192 p->start_time = ktime_get_ns();
193 p->start_boottime = ktime_get_boottime_ns();
194
195 +#ifdef CONFIG_SCHED_BORE
196 + if (likely(p->pid))
197 + sched_clone_bore(p, current, clone_flags, p->start_time);
198 +#endif // CONFIG_SCHED_BORE
199 /*
200 * Make it visible to the rest of the system, but dont wake it up yet.
201 * Need tasklist lock for parent etc handling!
202 diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
203 index 976092b7bd..293aad6754 100644
204 --- a/kernel/sched/Makefile
205 +++ b/kernel/sched/Makefile
206 @@ -32,3 +32,4 @@ obj-y += core.o
207 obj-y += fair.o
208 obj-y += build_policy.o
209 obj-y += build_utility.o
210 +obj-y += bore.o
211 diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
212 new file mode 100644
213 index 0000000000..b63dc1759e
214 --- /dev/null
215 +++ b/kernel/sched/bore.c
216 @@ -0,0 +1,443 @@
217 +/*
218 + * Burst-Oriented Response Enhancer (BORE) CPU Scheduler
219 + * Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
220 + */
221 +#include <linux/cpuset.h>
222 +#include <linux/sched/task.h>
223 +#include <linux/sched/bore.h>
224 +#include "sched.h"
225 +
226 +#ifdef CONFIG_SCHED_BORE
227 +u8 __read_mostly sched_bore = 1;
228 +u8 __read_mostly sched_burst_exclude_kthreads = 1;
229 +u8 __read_mostly sched_burst_smoothness_long = 1;
230 +u8 __read_mostly sched_burst_smoothness_short = 0;
231 +u8 __read_mostly sched_burst_fork_atavistic = 2;
232 +u8 __read_mostly sched_burst_parity_threshold = 2;
233 +u8 __read_mostly sched_burst_penalty_offset = 24;
234 +uint __read_mostly sched_burst_penalty_scale = 1280;
235 +uint __read_mostly sched_burst_cache_stop_count = 64;
236 +uint __read_mostly sched_burst_cache_lifetime = 75000000;
237 +uint __read_mostly sched_deadline_boost_mask = ENQUEUE_INITIAL
238 + | ENQUEUE_WAKEUP;
239 +static int __maybe_unused sixty_four = 64;
240 +static int __maybe_unused maxval_u8 = 255;
241 +static int __maybe_unused maxval_12_bits = 4095;
242 +
243 +#define MAX_BURST_PENALTY (39U <<2)
244 +
245 +static inline u32 log2plus1_u64_u32f8(u64 v) {
246 + u32 integral = fls64(v);
247 + u8 fractional = v << (64 - integral) >> 55;
248 + return integral << 8 | fractional;
249 +}
250 +
251 +static inline u32 calc_burst_penalty(u64 burst_time) {
252 + u32 greed, tolerance, penalty, scaled_penalty;
253 +
254 + greed = log2plus1_u64_u32f8(burst_time);
255 + tolerance = sched_burst_penalty_offset << 8;
256 + penalty = max(0, (s32)(greed - tolerance));
257 + scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
258 +
259 + return min(MAX_BURST_PENALTY, scaled_penalty);
260 +}
261 +
262 +static inline u64 __scale_slice(u64 delta, u8 score)
263 +{return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22);}
264 +
265 +static inline u64 __unscale_slice(u64 delta, u8 score)
266 +{return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);}
267 +
268 +static void reweight_task_by_prio(struct task_struct *p, int prio) {
269 + struct sched_entity *se = &p->se;
270 + unsigned long weight = scale_load(sched_prio_to_weight[prio]);
271 +
272 + reweight_entity(cfs_rq_of(se), se, weight);
273 + se->load.inv_weight = sched_prio_to_wmult[prio];
274 +}
275 +
276 +static inline u8 effective_prio(struct task_struct *p) {
277 + u8 prio = p->static_prio - MAX_RT_PRIO;
278 + if (likely(sched_bore))
279 + prio += p->se.burst_score;
280 + return min(39, prio);
281 +}
282 +
283 +void update_burst_score(struct sched_entity *se) {
284 + if (!entity_is_task(se)) return;
285 + struct task_struct *p = task_of(se);
286 + u8 prev_prio = effective_prio(p);
287 +
288 + u8 burst_score = 0;
289 + if (!((p->flags & PF_KTHREAD) && likely(sched_burst_exclude_kthreads)))
290 + burst_score = se->burst_penalty >> 2;
291 + se->burst_score = burst_score;
292 +
293 + u8 new_prio = effective_prio(p);
294 + if (new_prio != prev_prio)
295 + reweight_task_by_prio(p, new_prio);
296 +}
297 +
298 +void update_burst_penalty(struct sched_entity *se) {
299 + se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
300 + se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
301 + update_burst_score(se);
302 +}
303 +
304 +static inline u32 binary_smooth(u32 new, u32 old) {
305 + int increment = new - old;
306 + return (0 <= increment)?
307 + old + ( increment >> (int)sched_burst_smoothness_long):
308 + old - (-increment >> (int)sched_burst_smoothness_short);
309 +}
310 +
311 +static void revolve_burst_penalty(struct sched_entity *se) {
312 + se->prev_burst_penalty =
313 + binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
314 + se->burst_time = 0;
315 + se->curr_burst_penalty = 0;
316 +}
317 +
318 +inline void restart_burst(struct sched_entity *se) {
319 + revolve_burst_penalty(se);
320 + se->burst_penalty = se->prev_burst_penalty;
321 + update_burst_score(se);
322 +}
323 +
324 +void restart_burst_rescale_deadline(struct sched_entity *se) {
325 + s64 vscaled, wremain, vremain = se->deadline - se->vruntime;
326 + struct task_struct *p = task_of(se);
327 + u8 prev_prio = effective_prio(p);
328 + restart_burst(se);
329 + u8 new_prio = effective_prio(p);
330 + if (prev_prio > new_prio) {
331 + wremain = __unscale_slice(abs(vremain), prev_prio);
332 + vscaled = __scale_slice(wremain, new_prio);
333 + if (unlikely(vremain < 0))
334 + vscaled = -vscaled;
335 + se->deadline = se->vruntime + vscaled;
336 + }
337 +}
338 +
339 +static inline bool task_is_bore_eligible(struct task_struct *p)
340 +{return p && p->sched_class == &fair_sched_class && !p->exit_state;}
341 +
342 +static void reset_task_weights_bore(void) {
343 + struct task_struct *task;
344 + struct rq *rq;
345 + struct rq_flags rf;
346 +
347 + write_lock_irq(&tasklist_lock);
348 + for_each_process(task) {
349 + if (!task_is_bore_eligible(task)) continue;
350 + rq = task_rq(task);
351 + rq_pin_lock(rq, &rf);
352 + update_rq_clock(rq);
353 + reweight_task_by_prio(task, effective_prio(task));
354 + rq_unpin_lock(rq, &rf);
355 + }
356 + write_unlock_irq(&tasklist_lock);
357 +}
358 +
359 +int sched_bore_update_handler(struct ctl_table *table, int write,
360 + void __user *buffer, size_t *lenp, loff_t *ppos) {
361 + int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
362 + if (ret || !write)
363 + return ret;
364 +
365 + reset_task_weights_bore();
366 +
367 + return 0;
368 +}
369 +
370 +#define for_each_child(p, t) \
371 + list_for_each_entry(t, &(p)->children, sibling)
372 +
373 +static u32 count_entries_upto2(struct list_head *head) {
374 + struct list_head *next = head->next;
375 + return (next != head) + (next->next != head);
376 +}
377 +
378 +static inline void init_task_burst_cache_lock(struct task_struct *p) {
379 + spin_lock_init(&p->se.child_burst.lock);
380 + spin_lock_init(&p->se.group_burst.lock);
381 +}
382 +
383 +static inline bool burst_cache_expired(struct sched_burst_cache *bc, u64 now)
384 +{return (s64)(bc->timestamp + sched_burst_cache_lifetime - now) < 0;}
385 +
386 +static void update_burst_cache(struct sched_burst_cache *bc,
387 + struct task_struct *p, u32 cnt, u32 sum, u64 now) {
388 + u8 avg = cnt ? sum / cnt : 0;
389 + bc->score = max(avg, p->se.burst_penalty);
390 + bc->count = cnt;
391 + bc->timestamp = now;
392 +}
393 +
394 +static inline void update_child_burst_direct(struct task_struct *p, u64 now) {
395 + u32 cnt = 0, sum = 0;
396 + struct task_struct *child;
397 +
398 + for_each_child(p, child) {
399 + if (!task_is_bore_eligible(child)) continue;
400 + cnt++;
401 + sum += child->se.burst_penalty;
402 + }
403 +
404 + update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
405 +}
406 +
407 +static inline u8 inherit_burst_direct(
408 + struct task_struct *p, u64 now, u64 clone_flags) {
409 + struct task_struct *parent = p;
410 + struct sched_burst_cache *bc;
411 +
412 + if (clone_flags & CLONE_PARENT)
413 + parent = parent->real_parent;
414 +
415 + bc = &parent->se.child_burst;
416 + guard(spinlock)(&bc->lock);
417 + if (burst_cache_expired(bc, now))
418 + update_child_burst_direct(parent, now);
419 +
420 + return bc->score;
421 +}
422 +
423 +static void update_child_burst_topological(
424 + struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
425 + u32 cnt = 0, dcnt = 0, sum = 0;
426 + struct task_struct *child, *dec;
427 + struct sched_burst_cache *bc __maybe_unused;
428 +
429 + for_each_child(p, child) {
430 + dec = child;
431 + while ((dcnt = count_entries_upto2(&dec->children)) == 1)
432 + dec = list_first_entry(&dec->children, struct task_struct, sibling);
433 +
434 + if (!dcnt || !depth) {
435 + if (!task_is_bore_eligible(dec)) continue;
436 + cnt++;
437 + sum += dec->se.burst_penalty;
438 + continue;
439 + }
440 + bc = &dec->se.child_burst;
441 + spin_lock(&bc->lock);
442 + if (!burst_cache_expired(bc, now)) {
443 + cnt += bc->count;
444 + sum += (u32)bc->score * bc->count;
445 + if (sched_burst_cache_stop_count <= cnt) {
446 + spin_unlock(&bc->lock);
447 + break;
448 + }
449 + spin_unlock(&bc->lock);
450 + continue;
451 + }
452 + update_child_burst_topological(dec, now, depth - 1, &cnt, &sum);
453 + spin_unlock(&bc->lock);
454 + }
455 +
456 + update_burst_cache(&p->se.child_burst, p, cnt, sum, now);
457 + *acnt += cnt;
458 + *asum += sum;
459 +}
460 +
461 +static inline u8 inherit_burst_topological(
462 + struct task_struct *p, u64 now, u64 clone_flags) {
463 + struct task_struct *anc = p;
464 + struct sched_burst_cache *bc;
465 + u32 cnt = 0, sum = 0;
466 + u32 base_child_cnt = 0;
467 +
468 + if (clone_flags & CLONE_PARENT) {
469 + anc = anc->real_parent;
470 + base_child_cnt = 1;
471 + }
472 +
473 + for (struct task_struct *next;
474 + anc != (next = anc->real_parent) &&
475 + count_entries_upto2(&anc->children) <= base_child_cnt;) {
476 + anc = next;
477 + base_child_cnt = 1;
478 + }
479 +
480 + bc = &anc->se.child_burst;
481 + guard(spinlock)(&bc->lock);
482 + if (burst_cache_expired(bc, now))
483 + update_child_burst_topological(
484 + anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
485 +
486 + return bc->score;
487 +}
488 +
489 +static inline void update_tg_burst(struct task_struct *p, u64 now) {
490 + struct task_struct *task;
491 + u32 cnt = 0, sum = 0;
492 +
493 + for_each_thread(p, task) {
494 + if (!task_is_bore_eligible(task)) continue;
495 + cnt++;
496 + sum += task->se.burst_penalty;
497 + }
498 +
499 + update_burst_cache(&p->se.group_burst, p, cnt, sum, now);
500 +}
501 +
502 +static inline u8 inherit_burst_tg(struct task_struct *p, u64 now) {
503 + struct task_struct *parent = rcu_dereference(p->group_leader);
504 + struct sched_burst_cache *bc = &parent->se.group_burst;
505 + guard(spinlock)(&bc->lock);
506 + if (burst_cache_expired(bc, now))
507 + update_tg_burst(parent, now);
508 +
509 + return bc->score;
510 +}
511 +
512 +void sched_clone_bore(struct task_struct *p,
513 + struct task_struct *parent, u64 clone_flags, u64 now) {
514 + struct sched_entity *se = &p->se;
515 + u8 penalty;
516 +
517 + init_task_burst_cache_lock(p);
518 +
519 + if (!task_is_bore_eligible(p)) return;
520 +
521 + if (clone_flags & CLONE_THREAD) {
522 + rcu_read_lock();
523 + penalty = inherit_burst_tg(parent, now);
524 + rcu_read_unlock();
525 + } else {
526 + read_lock(&tasklist_lock);
527 + penalty = likely(sched_burst_fork_atavistic) ?
528 + inherit_burst_topological(parent, now, clone_flags):
529 + inherit_burst_direct(parent, now, clone_flags);
530 + read_unlock(&tasklist_lock);
531 + }
532 +
533 + revolve_burst_penalty(se);
534 + se->burst_penalty = se->prev_burst_penalty =
535 + max(se->prev_burst_penalty, penalty);
536 + se->child_burst.timestamp = 0;
537 + se->group_burst.timestamp = 0;
538 +}
539 +
540 +void reset_task_bore(struct task_struct *p) {
541 + p->se.burst_time = 0;
542 + p->se.prev_burst_penalty = 0;
543 + p->se.curr_burst_penalty = 0;
544 + p->se.burst_penalty = 0;
545 + p->se.burst_score = 0;
546 + memset(&p->se.child_burst, 0, sizeof(struct sched_burst_cache));
547 + memset(&p->se.group_burst, 0, sizeof(struct sched_burst_cache));
548 +}
549 +
550 +void __init sched_bore_init(void) {
551 + printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification %s by Masahito Suzuki", SCHED_BORE_VERSION);
552 + reset_task_bore(&init_task);
553 + init_task_burst_cache_lock(&init_task);
554 +}
555 +
556 +#ifdef CONFIG_SYSCTL
557 +static struct ctl_table sched_bore_sysctls[] = {
558 + {
559 + .procname = "sched_bore",
560 + .data = &sched_bore,
561 + .maxlen = sizeof(u8),
562 + .mode = 0644,
563 + .proc_handler = sched_bore_update_handler,
564 + .extra1 = SYSCTL_ZERO,
565 + .extra2 = SYSCTL_ONE,
566 + },
567 + {
568 + .procname = "sched_burst_exclude_kthreads",
569 + .data = &sched_burst_exclude_kthreads,
570 + .maxlen = sizeof(u8),
571 + .mode = 0644,
572 + .proc_handler = proc_dou8vec_minmax,
573 + .extra1 = SYSCTL_ZERO,
574 + .extra2 = SYSCTL_ONE,
575 + },
576 + {
577 + .procname = "sched_burst_smoothness_long",
578 + .data = &sched_burst_smoothness_long,
579 + .maxlen = sizeof(u8),
580 + .mode = 0644,
581 + .proc_handler = proc_dou8vec_minmax,
582 + .extra1 = SYSCTL_ZERO,
583 + .extra2 = SYSCTL_ONE,
584 + },
585 + {
586 + .procname = "sched_burst_smoothness_short",
587 + .data = &sched_burst_smoothness_short,
588 + .maxlen = sizeof(u8),
589 + .mode = 0644,
590 + .proc_handler = proc_dou8vec_minmax,
591 + .extra1 = SYSCTL_ZERO,
592 + .extra2 = SYSCTL_ONE,
593 + },
594 + {
595 + .procname = "sched_burst_fork_atavistic",
596 + .data = &sched_burst_fork_atavistic,
597 + .maxlen = sizeof(u8),
598 + .mode = 0644,
599 + .proc_handler = proc_dou8vec_minmax,
600 + .extra1 = SYSCTL_ZERO,
601 + .extra2 = SYSCTL_THREE,
602 + },
603 + {
604 + .procname = "sched_burst_parity_threshold",
605 + .data = &sched_burst_parity_threshold,
606 + .maxlen = sizeof(u8),
607 + .mode = 0644,
608 + .proc_handler = proc_dou8vec_minmax,
609 + .extra1 = SYSCTL_ZERO,
610 + .extra2 = &maxval_u8,
611 + },
612 + {
613 + .procname = "sched_burst_penalty_offset",
614 + .data = &sched_burst_penalty_offset,
615 + .maxlen = sizeof(u8),
616 + .mode = 0644,
617 + .proc_handler = proc_dou8vec_minmax,
618 + .extra1 = SYSCTL_ZERO,
619 + .extra2 = &sixty_four,
620 + },
621 + {
622 + .procname = "sched_burst_penalty_scale",
623 + .data = &sched_burst_penalty_scale,
624 + .maxlen = sizeof(uint),
625 + .mode = 0644,
626 + .proc_handler = proc_douintvec_minmax,
627 + .extra1 = SYSCTL_ZERO,
628 + .extra2 = &maxval_12_bits,
629 + },
630 + {
631 + .procname = "sched_burst_cache_stop_count",
632 + .data = &sched_burst_cache_stop_count,
633 + .maxlen = sizeof(uint),
634 + .mode = 0644,
635 + .proc_handler = proc_douintvec,
636 + },
637 + {
638 + .procname = "sched_burst_cache_lifetime",
639 + .data = &sched_burst_cache_lifetime,
640 + .maxlen = sizeof(uint),
641 + .mode = 0644,
642 + .proc_handler = proc_douintvec,
643 + },
644 + {
645 + .procname = "sched_deadline_boost_mask",
646 + .data = &sched_deadline_boost_mask,
647 + .maxlen = sizeof(uint),
648 + .mode = 0644,
649 + .proc_handler = proc_douintvec,
650 + },
651 +};
652 +
653 +static int __init sched_bore_sysctl_init(void) {
654 + register_sysctl_init("kernel", sched_bore_sysctls);
655 + return 0;
656 +}
657 +late_initcall(sched_bore_sysctl_init);
658 +#endif // CONFIG_SYSCTL
659 +#endif // CONFIG_SCHED_BORE
660 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
661 index 228f7c07da..76c54361fa 100644
662 --- a/kernel/sched/core.c
663 +++ b/kernel/sched/core.c
664 @@ -96,6 +96,8 @@
665 #include "../../io_uring/io-wq.h"
666 #include "../smpboot.h"
667
668 +#include <linux/sched/bore.h>
669 +
670 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
671 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
672
673 @@ -9946,6 +9948,10 @@ void __init sched_init(void)
674 BUG_ON(&dl_sched_class != &stop_sched_class + 1);
675 #endif
676
677 +#ifdef CONFIG_SCHED_BORE
678 + sched_bore_init();
679 +#endif // CONFIG_SCHED_BORE
680 +
681 wait_bit_init();
682
683 #ifdef CONFIG_FAIR_GROUP_SCHED
684 diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
685 index 4c3d0d9f3d..638a71f0fa 100644
686 --- a/kernel/sched/debug.c
687 +++ b/kernel/sched/debug.c
688 @@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = {
689 };
690
691 #ifdef CONFIG_SMP
692 +#ifdef CONFIG_SCHED_BORE
693 +#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \
694 +static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \
695 +{ \
696 + char buf[16]; \
697 + unsigned int value; \
698 +\
699 + if (cnt > 15) \
700 + cnt = 15; \
701 +\
702 + if (copy_from_user(&buf, ubuf, cnt)) \
703 + return -EFAULT; \
704 + buf[cnt] = '\0'; \
705 +\
706 + if (kstrtouint(buf, 10, &value)) \
707 + return -EINVAL; \
708 +\
709 + sysctl_sched_##name = value; \
710 + sched_update_##update_func(); \
711 +\
712 + *ppos += cnt; \
713 + return cnt; \
714 +} \
715 +\
716 +static int sched_##name##_show(struct seq_file *m, void *v) \
717 +{ \
718 + seq_printf(m, "%d\n", sysctl_sched_##name); \
719 + return 0; \
720 +} \
721 +\
722 +static int sched_##name##_open(struct inode *inode, struct file *filp) \
723 +{ \
724 + return single_open(filp, sched_##name##_show, NULL); \
725 +} \
726 +\
727 +static const struct file_operations sched_##name##_fops = { \
728 + .open = sched_##name##_open, \
729 + .write = sched_##name##_write, \
730 + .read = seq_read, \
731 + .llseek = seq_lseek, \
732 + .release = single_release, \
733 +};
734 +
735 +DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice)
736
737 +#undef DEFINE_SYSCTL_SCHED_FUNC
738 +#else // !CONFIG_SCHED_BORE
739 static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
740 size_t cnt, loff_t *ppos)
741 {
742 @@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = {
743 .llseek = seq_lseek,
744 .release = single_release,
745 };
746 -
747 +#endif // CONFIG_SCHED_BORE
748 #endif /* SMP */
749
750 #ifdef CONFIG_PREEMPT_DYNAMIC
751 @@ -347,13 +393,20 @@ static __init int sched_init_debug(void)
752 debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
753 #endif
754
755 +#ifdef CONFIG_SCHED_BORE
756 + debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
757 + debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice);
758 +#else // !CONFIG_SCHED_BORE
759 debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
760 +#endif // CONFIG_SCHED_BORE
761
762 debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
763 debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
764
765 #ifdef CONFIG_SMP
766 +#if !defined(CONFIG_SCHED_BORE)
767 debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
768 +#endif // CONFIG_SCHED_BORE
769 debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
770 debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
771
772 @@ -595,6 +648,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
773 SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
774 SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
775
776 +#ifdef CONFIG_SCHED_BORE
777 + SEQ_printf(m, " %2d", p->se.burst_score);
778 +#endif // CONFIG_SCHED_BORE
779 #ifdef CONFIG_NUMA_BALANCING
780 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
781 #endif
782 @@ -1068,6 +1124,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
783
784 P(se.load.weight);
785 #ifdef CONFIG_SMP
786 +#ifdef CONFIG_SCHED_BORE
787 + P(se.burst_score);
788 +#endif // CONFIG_SCHED_BORE
789 P(se.avg.load_sum);
790 P(se.avg.runnable_sum);
791 P(se.avg.util_sum);
792 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
793 index 3b2cfdb8d7..6268ccb187 100644
794 --- a/kernel/sched/fair.c
795 +++ b/kernel/sched/fair.c
796 @@ -57,6 +57,8 @@
797 #include "stats.h"
798 #include "autogroup.h"
799
800 +#include <linux/sched/bore.h>
801 +
802 /*
803 * The initial- and re-scaling of tunables is configurable
804 *
805 @@ -66,17 +68,30 @@
806 * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
807 * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
808 *
809 - * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
810 + * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant
811 + * EEVDF: default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
812 */
813 +#ifdef CONFIG_SCHED_BORE
814 +unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
815 +#else // !CONFIG_SCHED_BORE
816 unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
817 +#endif // CONFIG_SCHED_BORE
818
819 /*
820 * Minimal preemption granularity for CPU-bound tasks:
821 *
822 - * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
823 + * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice
824 + * (default min_base_slice = 2000000 constant, units: nanoseconds)
825 + * EEVDF: default 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds
826 */
827 +#ifdef CONFIG_SCHED_BORE
828 +static const unsigned int nsecs_per_tick = 1000000000ULL / HZ;
829 +unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS;
830 +__read_mostly uint sysctl_sched_base_slice = nsecs_per_tick;
831 +#else // !CONFIG_SCHED_BORE
832 unsigned int sysctl_sched_base_slice = 750000ULL;
833 static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
834 +#endif // CONFIG_SCHED_BORE
835
836 /*
837 * After fork, child runs first. If set to 0 (default) then
838 @@ -210,6 +225,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
839 *
840 * This idea comes from the SD scheduler of Con Kolivas:
841 */
842 +#ifdef CONFIG_SCHED_BORE
843 +static void update_sysctl(void) {
844 + sysctl_sched_base_slice = nsecs_per_tick *
845 + max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick));
846 +}
847 +void sched_update_min_base_slice(void) { update_sysctl(); }
848 +#else // !CONFIG_SCHED_BORE
849 static unsigned int get_update_sysctl_factor(void)
850 {
851 unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
852 @@ -240,6 +262,7 @@ static void update_sysctl(void)
853 SET_SYSCTL(sched_base_slice);
854 #undef SET_SYSCTL
855 }
856 +#endif // CONFIG_SCHED_BORE
857
858 void __init sched_init_granularity(void)
859 {
860 @@ -713,6 +736,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
861
862 vlag = avruntime - se->vruntime;
863 limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
864 +#ifdef CONFIG_SCHED_BORE
865 + limit >>= !!sched_bore;
866 +#endif // CONFIG_SCHED_BORE
867
868 return clamp(vlag, -limit, limit);
869 }
870 @@ -894,6 +920,10 @@ static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq)
871 * until it gets a new slice. See the HACK in set_next_entity().
872 */
873 if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
874 +#ifdef CONFIG_SCHED_BORE
875 + if (!(likely(sched_bore) && likely(sched_burst_parity_threshold) &&
876 + sched_burst_parity_threshold < cfs_rq->nr_running))
877 +#endif // CONFIG_SCHED_BORE
878 return curr;
879
880 while (node) {
881 @@ -1002,6 +1032,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
882 * Scheduling class statistics methods:
883 */
884 #ifdef CONFIG_SMP
885 +#if !defined(CONFIG_SCHED_BORE)
886 int sched_update_scaling(void)
887 {
888 unsigned int factor = get_update_sysctl_factor();
889 @@ -1013,6 +1044,7 @@ int sched_update_scaling(void)
890
891 return 0;
892 }
893 +#endif // CONFIG_SCHED_BORE
894 #endif
895 #endif
896
897 @@ -1210,6 +1242,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
898 if (unlikely(delta_exec <= 0))
899 return;
900
901 +#ifdef CONFIG_SCHED_BORE
902 + curr->burst_time += delta_exec;
903 + update_burst_penalty(curr);
904 +#endif // CONFIG_SCHED_BORE
905 curr->vruntime += calc_delta_fair(delta_exec, curr);
906 update_deadline(cfs_rq, curr);
907 update_min_vruntime(cfs_rq);
908 @@ -3848,7 +3884,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
909 se->deadline = avruntime + vslice;
910 }
911
912 -static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
913 +void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
914 unsigned long weight)
915 {
916 bool curr = cfs_rq->curr == se;
917 @@ -5191,6 +5227,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
918 *
919 * EEVDF: placement strategy #1 / #2
920 */
921 +#ifdef CONFIG_SCHED_BORE
922 + if (se->vlag)
923 +#endif // CONFIG_SCHED_BORE
924 if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
925 struct sched_entity *curr = cfs_rq->curr;
926 unsigned long load;
927 @@ -5261,8 +5300,18 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
928
929 se->vruntime = vruntime - lag;
930
931 + if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
932 + se->deadline += se->vruntime;
933 + se->rel_deadline = 0;
934 + return;
935 + }
936 +#ifdef CONFIG_SCHED_BORE
937 + else if (likely(sched_bore))
938 + vslice >>= !!(flags & sched_deadline_boost_mask);
939 + else
940 +#endif // CONFIG_SCHED_BORE
941 /*
942 - * When joining the competition; the exisiting tasks will be,
943 + * When joining the competition; the existing tasks will be,
944 * on average, halfway through their slice, as such start tasks
945 * off with half a slice to ease into the competition.
946 */
947 @@ -5370,6 +5419,7 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
948 static void
949 dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
950 {
951 + bool sleep = flags & DEQUEUE_SLEEP;
952 int action = UPDATE_TG;
953
954 if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
955 @@ -5397,6 +5447,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
956 clear_buddies(cfs_rq, se);
957
958 update_entity_lag(cfs_rq, se);
959 + if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
960 + se->deadline -= se->vruntime;
961 + se->rel_deadline = 1;
962 + }
963 +
964 if (se != cfs_rq->curr)
965 __dequeue_entity(cfs_rq, se);
966 se->on_rq = 0;
967 @@ -6833,6 +6888,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
968 bool was_sched_idle = sched_idle_rq(rq);
969
970 util_est_dequeue(&rq->cfs, p);
971 +#ifdef CONFIG_SCHED_BORE
972 + if (task_sleep) {
973 + cfs_rq = cfs_rq_of(se);
974 + if (cfs_rq->curr == se)
975 + update_curr(cfs_rq);
976 + restart_burst(se);
977 + }
978 +#endif // CONFIG_SCHED_BORE
979
980 for_each_sched_entity(se) {
981 cfs_rq = cfs_rq_of(se);
982 @@ -8566,16 +8629,25 @@ static void yield_task_fair(struct rq *rq)
983 /*
984 * Are we the only task in the tree?
985 */
986 +#if !defined(CONFIG_SCHED_BORE)
987 if (unlikely(rq->nr_running == 1))
988 return;
989
990 clear_buddies(cfs_rq, se);
991 +#endif // CONFIG_SCHED_BORE
992
993 update_rq_clock(rq);
994 /*
995 * Update run-time statistics of the 'current'.
996 */
997 update_curr(cfs_rq);
998 +#ifdef CONFIG_SCHED_BORE
999 + restart_burst_rescale_deadline(se);
1000 + if (unlikely(rq->nr_running == 1))
1001 + return;
1002 +
1003 + clear_buddies(cfs_rq, se);
1004 +#endif // CONFIG_SCHED_BORE
1005 /*
1006 * Tell update_rq_clock() that we've just updated,
1007 * so we don't do microscopic update in schedule()
1008 @@ -12641,6 +12713,9 @@ static void task_fork_fair(struct task_struct *p)
1009 curr = cfs_rq->curr;
1010 if (curr)
1011 update_curr(cfs_rq);
1012 +#ifdef CONFIG_SCHED_BORE
1013 + update_burst_score(se);
1014 +#endif // CONFIG_SCHED_BORE
1015 place_entity(cfs_rq, se, ENQUEUE_INITIAL);
1016 rq_unlock(rq, &rf);
1017 }
1018 @@ -12753,6 +12828,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
1019
1020 static void switched_from_fair(struct rq *rq, struct task_struct *p)
1021 {
1022 + p->se.rel_deadline = 0;
1023 +#ifdef CONFIG_SCHED_BORE
1024 + reset_task_bore(p);
1025 +#endif // CONFIG_SCHED_BORE
1026 detach_task_cfs_rq(p);
1027 }
1028
1029 diff --git a/kernel/sched/features.h b/kernel/sched/features.h
1030 index f770168230..87464a97d0 100644
1031 --- a/kernel/sched/features.h
1032 +++ b/kernel/sched/features.h
1033 @@ -6,6 +6,10 @@
1034 */
1035 SCHED_FEAT(PLACE_LAG, true)
1036 SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
1037 +/*
1038 + * Preserve relative virtual deadline on 'migration'.
1039 + */
1040 +SCHED_FEAT(PLACE_REL_DEADLINE, true)
1041 SCHED_FEAT(RUN_TO_PARITY, true)
1042
1043 /*
1044 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
1045 index d48c6a292a..75acf2f6e2 100644
1046 --- a/kernel/sched/sched.h
1047 +++ b/kernel/sched/sched.h
1048 @@ -1946,7 +1946,11 @@ static inline void dirty_sched_domain_sysctl(int cpu)
1049 }
1050 #endif
1051
1052 +#ifdef CONFIG_SCHED_BORE
1053 +extern void sched_update_min_base_slice(void);
1054 +#else // !CONFIG_SCHED_BORE
1055 extern int sched_update_scaling(void);
1056 +#endif // CONFIG_SCHED_BORE
1057
1058 static inline const struct cpumask *task_user_cpus(struct task_struct *p)
1059 {
1060 @@ -2532,7 +2536,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
1061 extern const_debug unsigned int sysctl_sched_nr_migrate;
1062 extern const_debug unsigned int sysctl_sched_migration_cost;
1063
1064 +#ifdef CONFIG_SCHED_BORE
1065 +extern unsigned int sysctl_sched_min_base_slice;
1066 +extern __read_mostly uint sysctl_sched_base_slice;
1067 +#else // !CONFIG_SCHED_BORE
1068 extern unsigned int sysctl_sched_base_slice;
1069 +#endif // CONFIG_SCHED_BORE
1070
1071 #ifdef CONFIG_SCHED_DEBUG
1072 extern int sysctl_resched_latency_warn_ms;
1073 --
1074 2.34.1
1075

  ViewVC Help
Powered by ViewVC 1.1.30