1 |
From c5214e13ad60bd0022bab45cbac2c9db6bc1e0d4 Mon Sep 17 00:00:00 2001 |
2 |
From: Peter Zijlstra <peterz@infradead.org> |
3 |
Date: Tue, 30 May 2023 13:20:46 +0200 |
4 |
Subject: [PATCH] sched/fair: Multi-LLC select_idle_sibling() |
5 |
|
6 |
Tejun reported that when he targets workqueues towards a specific LLC |
7 |
on his Zen2 machine with 3 cores / LLC and 4 LLCs in total, he gets |
8 |
significant idle time. |
9 |
|
10 |
This is, of course, because of how select_idle_sibling() will not |
11 |
consider anything outside of the local LLC, and since all these tasks |
12 |
are short running the periodic idle load balancer is ineffective. |
13 |
|
14 |
And while it is good to keep work cache local, it is better to not |
15 |
have significant idle time. Therefore, have select_idle_sibling() try |
16 |
other LLCs inside the same node when the local one comes up empty. |
17 |
|
18 |
Reported-by: Tejun Heo <tj@kernel.org> |
19 |
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
20 |
--- |
21 |
kernel/sched/fair.c | 37 +++++++++++++++++++++++++++++++++++++ |
22 |
kernel/sched/features.h | 1 + |
23 |
2 files changed, 38 insertions(+) |
24 |
|
25 |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
26 |
index 48b6f0ca13ac..cd80e30b9d67 100644 |
27 |
--- a/kernel/sched/fair.c |
28 |
+++ b/kernel/sched/fair.c |
29 |
@@ -7027,6 +7027,37 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool |
30 |
return idle_cpu; |
31 |
} |
32 |
|
33 |
+/* |
34 |
+ * For the multiple-LLC per node case, make sure to try the other LLC's if the |
35 |
+ * local LLC comes up empty. |
36 |
+ */ |
37 |
+static int |
38 |
+select_idle_node(struct task_struct *p, struct sched_domain *sd, int target) |
39 |
+{ |
40 |
+ struct sched_domain *parent = sd->parent; |
41 |
+ struct sched_group *sg; |
42 |
+ |
43 |
+ /* Make sure to not cross nodes. */ |
44 |
+ if (!parent || parent->flags & SD_NUMA) |
45 |
+ return -1; |
46 |
+ |
47 |
+ sg = parent->groups; |
48 |
+ do { |
49 |
+ int cpu = cpumask_first(sched_group_span(sg)); |
50 |
+ |
51 |
+ if (!cpus_share_cache(cpu, target)) { |
52 |
+ int i = select_idle_cpu(p, per_cpu(sd_llc, cpu), |
53 |
+ test_idle_cores(cpu), cpu); |
54 |
+ if ((unsigned)i < nr_cpumask_bits) |
55 |
+ return i; |
56 |
+ } |
57 |
+ |
58 |
+ sg = sg->next; |
59 |
+ } while (sg != parent->groups); |
60 |
+ |
61 |
+ return -1; |
62 |
+} |
63 |
+ |
64 |
/* |
65 |
* Scan the asym_capacity domain for idle CPUs; pick the first idle one on which |
66 |
* the task fits. If no CPU is big enough, but there are idle ones, try to |
67 |
@@ -7199,6 +7230,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) |
68 |
if ((unsigned)i < nr_cpumask_bits) |
69 |
return i; |
70 |
|
71 |
+ if (sched_feat(SIS_NODE)) { |
72 |
+ i = select_idle_node(p, sd, target); |
73 |
+ if ((unsigned)i < nr_cpumask_bits) |
74 |
+ return i; |
75 |
+ } |
76 |
+ |
77 |
return target; |
78 |
} |
79 |
|
80 |
diff --git a/kernel/sched/features.h b/kernel/sched/features.h |
81 |
index ee7f23c76bd3..9e390eb82e38 100644 |
82 |
--- a/kernel/sched/features.h |
83 |
+++ b/kernel/sched/features.h |
84 |
@@ -62,6 +62,7 @@ SCHED_FEAT(TTWU_QUEUE, true) |
85 |
*/ |
86 |
SCHED_FEAT(SIS_PROP, false) |
87 |
SCHED_FEAT(SIS_UTIL, true) |
88 |
+SCHED_FEAT(SIS_NODE, true) |
89 |
|
90 |
/* |
91 |
* Issue a WARN when we do multiple update_rq_clock() calls |
92 |
-- |
93 |
2.41.0 |
94 |
|