/[packages]/cauldron/kernel/current/PATCHES/patches/0032-nvmet-rdma-use-a-private-workqueue-for-delete.patch
ViewVC logotype

Contents of /cauldron/kernel/current/PATCHES/patches/0032-nvmet-rdma-use-a-private-workqueue-for-delete.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1329221 - (show annotations) (download)
Fri Nov 9 22:05:45 2018 UTC (5 years, 9 months ago) by tmb
File size: 5519 byte(s)
add fixes from sashas autosel queue
1 From 8650705bf855559a304486957e6a144458af520f Mon Sep 17 00:00:00 2001
2 From: Sagi Grimberg <sagi@grimberg.me>
3 Date: Thu, 27 Sep 2018 11:00:31 -0700
4 Subject: [PATCH 032/145] nvmet-rdma: use a private workqueue for delete
5
6 [ Upstream commit 2acf70ade79d26b97611a8df52eb22aa33814cd4 ]
7
8 Queue deletion is done asynchronous when the last reference on the queue
9 is dropped. Thus, in order to make sure we don't over allocate under a
10 connect/disconnect storm, we let queue deletion complete before making
11 forward progress.
12
13 However, given that we flush the system_wq from rdma_cm context which
14 runs from a workqueue context, we can have a circular locking complaint
15 [1]. Fix that by using a private workqueue for queue deletion.
16
17 [1]:
18 ======================================================
19 WARNING: possible circular locking dependency detected
20 4.19.0-rc4-dbg+ #3 Not tainted
21 ------------------------------------------------------
22 kworker/5:0/39 is trying to acquire lock:
23 00000000a10b6db9 (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm]
24
25 but task is already holding lock:
26 00000000331b4e2c ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20
27
28 which lock already depends on the new lock.
29
30 the existing dependency chain (in reverse order) is:
31
32 -> #3 ((work_completion)(&queue->release_work)){+.+.}:
33 process_one_work+0x474/0xa20
34 worker_thread+0x63/0x5a0
35 kthread+0x1cf/0x1f0
36 ret_from_fork+0x24/0x30
37
38 -> #2 ((wq_completion)"events"){+.+.}:
39 flush_workqueue+0xf3/0x970
40 nvmet_rdma_cm_handler+0x133d/0x1734 [nvmet_rdma]
41 cma_ib_req_handler+0x72f/0xf90 [rdma_cm]
42 cm_process_work+0x2e/0x110 [ib_cm]
43 cm_req_handler+0x135b/0x1c30 [ib_cm]
44 cm_work_handler+0x2b7/0x38cd [ib_cm]
45 process_one_work+0x4ae/0xa20
46 nvmet_rdma:nvmet_rdma_cm_handler: nvmet_rdma: disconnected (10): status 0 id 0000000040357082
47 worker_thread+0x63/0x5a0
48 kthread+0x1cf/0x1f0
49 ret_from_fork+0x24/0x30
50 nvme nvme0: Reconnecting in 10 seconds...
51
52 -> #1 (&id_priv->handler_mutex/1){+.+.}:
53 __mutex_lock+0xfe/0xbe0
54 mutex_lock_nested+0x1b/0x20
55 cma_ib_req_handler+0x6aa/0xf90 [rdma_cm]
56 cm_process_work+0x2e/0x110 [ib_cm]
57 cm_req_handler+0x135b/0x1c30 [ib_cm]
58 cm_work_handler+0x2b7/0x38cd [ib_cm]
59 process_one_work+0x4ae/0xa20
60 worker_thread+0x63/0x5a0
61 kthread+0x1cf/0x1f0
62 ret_from_fork+0x24/0x30
63
64 -> #0 (&id_priv->handler_mutex){+.+.}:
65 lock_acquire+0xc5/0x200
66 __mutex_lock+0xfe/0xbe0
67 mutex_lock_nested+0x1b/0x20
68 rdma_destroy_id+0x6f/0x440 [rdma_cm]
69 nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma]
70 process_one_work+0x4ae/0xa20
71 worker_thread+0x63/0x5a0
72 kthread+0x1cf/0x1f0
73 ret_from_fork+0x24/0x30
74
75 Fixes: 777dc82395de ("nvmet-rdma: occasionally flush ongoing controller teardown")
76 Reported-by: Bart Van Assche <bvanassche@acm.org>
77 Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
78 Tested-by: Bart Van Assche <bvanassche@acm.org>
79
80 Signed-off-by: Christoph Hellwig <hch@lst.de>
81
82 Signed-off-by: Sasha Levin <sashal@kernel.org>
83 ---
84 drivers/nvme/target/rdma.c | 19 +++++++++++++++----
85 1 file changed, 15 insertions(+), 4 deletions(-)
86
87 diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
88 index bfc4da660bb4..5becca88ccbe 100644
89 --- a/drivers/nvme/target/rdma.c
90 +++ b/drivers/nvme/target/rdma.c
91 @@ -122,6 +122,7 @@ struct nvmet_rdma_device {
92 int inline_page_count;
93 };
94
95 +struct workqueue_struct *nvmet_rdma_delete_wq;
96 static bool nvmet_rdma_use_srq;
97 module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
98 MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
99 @@ -1267,12 +1268,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
100
101 if (queue->host_qid == 0) {
102 /* Let inflight controller teardown complete */
103 - flush_scheduled_work();
104 + flush_workqueue(nvmet_rdma_delete_wq);
105 }
106
107 ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
108 if (ret) {
109 - schedule_work(&queue->release_work);
110 + queue_work(nvmet_rdma_delete_wq, &queue->release_work);
111 /* Destroying rdma_cm id is not needed here */
112 return 0;
113 }
114 @@ -1337,7 +1338,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
115
116 if (disconnect) {
117 rdma_disconnect(queue->cm_id);
118 - schedule_work(&queue->release_work);
119 + queue_work(nvmet_rdma_delete_wq, &queue->release_work);
120 }
121 }
122
123 @@ -1367,7 +1368,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
124 mutex_unlock(&nvmet_rdma_queue_mutex);
125
126 pr_err("failed to connect queue %d\n", queue->idx);
127 - schedule_work(&queue->release_work);
128 + queue_work(nvmet_rdma_delete_wq, &queue->release_work);
129 }
130
131 /**
132 @@ -1649,8 +1650,17 @@ static int __init nvmet_rdma_init(void)
133 if (ret)
134 goto err_ib_client;
135
136 + nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq",
137 + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
138 + if (!nvmet_rdma_delete_wq) {
139 + ret = -ENOMEM;
140 + goto err_unreg_transport;
141 + }
142 +
143 return 0;
144
145 +err_unreg_transport:
146 + nvmet_unregister_transport(&nvmet_rdma_ops);
147 err_ib_client:
148 ib_unregister_client(&nvmet_rdma_ib_client);
149 return ret;
150 @@ -1658,6 +1668,7 @@ static int __init nvmet_rdma_init(void)
151
152 static void __exit nvmet_rdma_exit(void)
153 {
154 + destroy_workqueue(nvmet_rdma_delete_wq);
155 nvmet_unregister_transport(&nvmet_rdma_ops);
156 ib_unregister_client(&nvmet_rdma_ib_client);
157 WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list));
158 --
159 2.19.1
160

  ViewVC Help
Powered by ViewVC 1.1.30