1 |
From 587eb88b2177476bb5d004b0515332d7c1f3205b Mon Sep 17 00:00:00 2001 |
2 |
From: Sasha Levin <sashal@kernel.org> |
3 |
Date: Wed, 3 Aug 2022 10:49:23 +0300 |
4 |
Subject: net/mlx5: Avoid false positive lockdep warning by adding |
5 |
lock_class_key |
6 |
|
7 |
From: Moshe Shemesh <moshe@nvidia.com> |
8 |
|
9 |
[ Upstream commit d59b73a66e5e0682442b6d7b4965364e57078b80 ] |
10 |
|
11 |
Add a lock_class_key per mlx5 device to avoid a false positive |
12 |
"possible circular locking dependency" warning by lockdep, on flows |
13 |
which lock more than one mlx5 device, such as adding SF. |
14 |
|
15 |
kernel log: |
16 |
====================================================== |
17 |
WARNING: possible circular locking dependency detected |
18 |
5.19.0-rc8+ #2 Not tainted |
19 |
------------------------------------------------------ |
20 |
kworker/u20:0/8 is trying to acquire lock: |
21 |
ffff88812dfe0d98 (&dev->intf_state_mutex){+.+.}-{3:3}, at: mlx5_init_one+0x2e/0x490 [mlx5_core] |
22 |
|
23 |
but task is already holding lock: |
24 |
ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 |
25 |
|
26 |
which lock already depends on the new lock. |
27 |
|
28 |
the existing dependency chain (in reverse order) is: |
29 |
|
30 |
-> #1 (&(¬ifier->n_head)->rwsem){++++}-{3:3}: |
31 |
down_write+0x90/0x150 |
32 |
blocking_notifier_chain_register+0x53/0xa0 |
33 |
mlx5_sf_table_init+0x369/0x4a0 [mlx5_core] |
34 |
mlx5_init_one+0x261/0x490 [mlx5_core] |
35 |
probe_one+0x430/0x680 [mlx5_core] |
36 |
local_pci_probe+0xd6/0x170 |
37 |
work_for_cpu_fn+0x4e/0xa0 |
38 |
process_one_work+0x7c2/0x1340 |
39 |
worker_thread+0x6f6/0xec0 |
40 |
kthread+0x28f/0x330 |
41 |
ret_from_fork+0x1f/0x30 |
42 |
|
43 |
-> #0 (&dev->intf_state_mutex){+.+.}-{3:3}: |
44 |
__lock_acquire+0x2fc7/0x6720 |
45 |
lock_acquire+0x1c1/0x550 |
46 |
__mutex_lock+0x12c/0x14b0 |
47 |
mlx5_init_one+0x2e/0x490 [mlx5_core] |
48 |
mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] |
49 |
auxiliary_bus_probe+0x9d/0xe0 |
50 |
really_probe+0x1e0/0xaa0 |
51 |
__driver_probe_device+0x219/0x480 |
52 |
driver_probe_device+0x49/0x130 |
53 |
__device_attach_driver+0x1b8/0x280 |
54 |
bus_for_each_drv+0x123/0x1a0 |
55 |
__device_attach+0x1a3/0x460 |
56 |
bus_probe_device+0x1a2/0x260 |
57 |
device_add+0x9b1/0x1b40 |
58 |
__auxiliary_device_add+0x88/0xc0 |
59 |
mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] |
60 |
blocking_notifier_call_chain+0xd5/0x130 |
61 |
mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] |
62 |
process_one_work+0x7c2/0x1340 |
63 |
worker_thread+0x59d/0xec0 |
64 |
kthread+0x28f/0x330 |
65 |
ret_from_fork+0x1f/0x30 |
66 |
|
67 |
other info that might help us debug this: |
68 |
|
69 |
Possible unsafe locking scenario: |
70 |
|
71 |
CPU0 CPU1 |
72 |
---- ---- |
73 |
lock(&(¬ifier->n_head)->rwsem); |
74 |
lock(&dev->intf_state_mutex); |
75 |
lock(&(¬ifier->n_head)->rwsem); |
76 |
lock(&dev->intf_state_mutex); |
77 |
|
78 |
*** DEADLOCK *** |
79 |
|
80 |
4 locks held by kworker/u20:0/8: |
81 |
#0: ffff888150612938 ((wq_completion)mlx5_events){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 |
82 |
#1: ffff888100cafdb8 ((work_completion)(&work->work)#3){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 |
83 |
#2: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 |
84 |
#3: ffff88813682d0e8 (&dev->mutex){....}-{3:3}, at:__device_attach+0x76/0x460 |
85 |
|
86 |
stack backtrace: |
87 |
CPU: 6 PID: 8 Comm: kworker/u20:0 Not tainted 5.19.0-rc8+ |
88 |
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 |
89 |
Workqueue: mlx5_events mlx5_vhca_state_work_handler [mlx5_core] |
90 |
Call Trace: |
91 |
<TASK> |
92 |
dump_stack_lvl+0x57/0x7d |
93 |
check_noncircular+0x278/0x300 |
94 |
? print_circular_bug+0x460/0x460 |
95 |
? lock_chain_count+0x20/0x20 |
96 |
? register_lock_class+0x1880/0x1880 |
97 |
__lock_acquire+0x2fc7/0x6720 |
98 |
? register_lock_class+0x1880/0x1880 |
99 |
? register_lock_class+0x1880/0x1880 |
100 |
lock_acquire+0x1c1/0x550 |
101 |
? mlx5_init_one+0x2e/0x490 [mlx5_core] |
102 |
? lockdep_hardirqs_on_prepare+0x400/0x400 |
103 |
__mutex_lock+0x12c/0x14b0 |
104 |
? mlx5_init_one+0x2e/0x490 [mlx5_core] |
105 |
? mlx5_init_one+0x2e/0x490 [mlx5_core] |
106 |
? _raw_read_unlock+0x1f/0x30 |
107 |
? mutex_lock_io_nested+0x1320/0x1320 |
108 |
? __ioremap_caller.constprop.0+0x306/0x490 |
109 |
? mlx5_sf_dev_probe+0x269/0x370 [mlx5_core] |
110 |
? iounmap+0x160/0x160 |
111 |
mlx5_init_one+0x2e/0x490 [mlx5_core] |
112 |
mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] |
113 |
? mlx5_sf_dev_remove+0x130/0x130 [mlx5_core] |
114 |
auxiliary_bus_probe+0x9d/0xe0 |
115 |
really_probe+0x1e0/0xaa0 |
116 |
__driver_probe_device+0x219/0x480 |
117 |
? auxiliary_match_id+0xe9/0x140 |
118 |
driver_probe_device+0x49/0x130 |
119 |
__device_attach_driver+0x1b8/0x280 |
120 |
? driver_allows_async_probing+0x140/0x140 |
121 |
bus_for_each_drv+0x123/0x1a0 |
122 |
? bus_for_each_dev+0x1a0/0x1a0 |
123 |
? lockdep_hardirqs_on_prepare+0x286/0x400 |
124 |
? trace_hardirqs_on+0x2d/0x100 |
125 |
__device_attach+0x1a3/0x460 |
126 |
? device_driver_attach+0x1e0/0x1e0 |
127 |
? kobject_uevent_env+0x22d/0xf10 |
128 |
bus_probe_device+0x1a2/0x260 |
129 |
device_add+0x9b1/0x1b40 |
130 |
? dev_set_name+0xab/0xe0 |
131 |
? __fw_devlink_link_to_suppliers+0x260/0x260 |
132 |
? memset+0x20/0x40 |
133 |
? lockdep_init_map_type+0x21a/0x7d0 |
134 |
__auxiliary_device_add+0x88/0xc0 |
135 |
? auxiliary_device_init+0x86/0xa0 |
136 |
mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] |
137 |
blocking_notifier_call_chain+0xd5/0x130 |
138 |
mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] |
139 |
? mlx5_vhca_event_arm+0x100/0x100 [mlx5_core] |
140 |
? lock_downgrade+0x6e0/0x6e0 |
141 |
? lockdep_hardirqs_on_prepare+0x286/0x400 |
142 |
process_one_work+0x7c2/0x1340 |
143 |
? lockdep_hardirqs_on_prepare+0x400/0x400 |
144 |
? pwq_dec_nr_in_flight+0x230/0x230 |
145 |
? rwlock_bug.part.0+0x90/0x90 |
146 |
worker_thread+0x59d/0xec0 |
147 |
? process_one_work+0x1340/0x1340 |
148 |
kthread+0x28f/0x330 |
149 |
? kthread_complete_and_exit+0x20/0x20 |
150 |
ret_from_fork+0x1f/0x30 |
151 |
</TASK> |
152 |
|
153 |
Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support") |
154 |
Signed-off-by: Moshe Shemesh <moshe@nvidia.com> |
155 |
Reviewed-by: Shay Drory <shayd@nvidia.com> |
156 |
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com> |
157 |
Signed-off-by: Sasha Levin <sashal@kernel.org> |
158 |
--- |
159 |
drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++++ |
160 |
include/linux/mlx5/driver.h | 1 + |
161 |
2 files changed, 5 insertions(+) |
162 |
|
163 |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c |
164 |
index ba2e5232b90be..616207c3b187a 100644 |
165 |
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c |
166 |
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c |
167 |
@@ -1472,7 +1472,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) |
168 |
memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); |
169 |
INIT_LIST_HEAD(&priv->ctx_list); |
170 |
spin_lock_init(&priv->ctx_lock); |
171 |
+ lockdep_register_key(&dev->lock_key); |
172 |
mutex_init(&dev->intf_state_mutex); |
173 |
+ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); |
174 |
|
175 |
mutex_init(&priv->bfregs.reg_head.lock); |
176 |
mutex_init(&priv->bfregs.wc_head.lock); |
177 |
@@ -1527,6 +1529,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) |
178 |
mutex_destroy(&priv->bfregs.wc_head.lock); |
179 |
mutex_destroy(&priv->bfregs.reg_head.lock); |
180 |
mutex_destroy(&dev->intf_state_mutex); |
181 |
+ lockdep_unregister_key(&dev->lock_key); |
182 |
return err; |
183 |
} |
184 |
|
185 |
@@ -1545,6 +1548,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) |
186 |
mutex_destroy(&priv->bfregs.wc_head.lock); |
187 |
mutex_destroy(&priv->bfregs.reg_head.lock); |
188 |
mutex_destroy(&dev->intf_state_mutex); |
189 |
+ lockdep_unregister_key(&dev->lock_key); |
190 |
} |
191 |
|
192 |
static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) |
193 |
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h |
194 |
index 5040cd774c5a3..b0b4ac92354a2 100644 |
195 |
--- a/include/linux/mlx5/driver.h |
196 |
+++ b/include/linux/mlx5/driver.h |
197 |
@@ -773,6 +773,7 @@ struct mlx5_core_dev { |
198 |
enum mlx5_device_state state; |
199 |
/* sync interface state */ |
200 |
struct mutex intf_state_mutex; |
201 |
+ struct lock_class_key lock_key; |
202 |
unsigned long intf_state; |
203 |
struct mlx5_priv priv; |
204 |
struct mlx5_profile profile; |
205 |
-- |
206 |
2.35.1 |
207 |
|