1 |
From 7172d5ee8205f7e25a6d127e4fea0ff8287fd7be Mon Sep 17 00:00:00 2001 |
2 |
From: Michal Hocko <mhocko@suse.com> |
3 |
Date: Thu, 18 Oct 2018 10:56:17 +0200 |
4 |
Subject: [PATCH 085/145] cgroup, netclassid: add a preemption point to |
5 |
write_classid |
6 |
|
7 |
[ Upstream commit a90e90b7d55e789c71d85b946ffb5c1ab2f137ca ] |
8 |
|
9 |
We have seen a customer complaining about soft lockups on !PREEMPT |
10 |
kernel config with 4.4 based kernel |
11 |
|
12 |
[1072141.435366] NMI watchdog: BUG: soft lockup - CPU#21 stuck for 22s! [systemd:1] |
13 |
[1072141.444090] Modules linked in: mpt3sas raid_class binfmt_misc af_packet 8021q garp mrp stp llc xfs libcrc32c bonding iscsi_ibft iscsi_boot_sysfs msr ext4 crc16 jbd2 mbcache cdc_ether usbnet mii joydev hid_generic usbhid intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_ssif mgag200 i2c_algo_bit ttm ipmi_devintf drbg ixgbe drm_kms_helper vxlan ansi_cprng ip6_udp_tunnel drm aesni_intel udp_tunnel aes_x86_64 iTCO_wdt syscopyarea ptp xhci_pci lrw iTCO_vendor_support pps_core gf128mul ehci_pci glue_helper sysfillrect mdio pcspkr sb_edac ablk_helper cryptd ehci_hcd sysimgblt xhci_hcd fb_sys_fops edac_core mei_me lpc_ich ses usbcore enclosure dca mfd_core ipmi_si mei i2c_i801 scsi_transport_sas usb_common ipmi_msghandler shpchp fjes wmi processor button acpi_pad btrfs xor raid6_pq sd_mod crc32c_intel megaraid_sas sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod md_mod autofs4 |
14 |
[1072141.444146] Supported: Yes |
15 |
[1072141.444149] CPU: 21 PID: 1 Comm: systemd Not tainted 4.4.121-92.80-default #1 |
16 |
[1072141.444150] Hardware name: LENOVO Lenovo System x3650 M5 -[5462P4U]- -[5462P4U]-/01GR451, BIOS -[TCE136H-2.70]- 06/13/2018 |
17 |
[1072141.444151] task: ffff880191bd0040 ti: ffff880191bd4000 task.ti: ffff880191bd4000 |
18 |
[1072141.444153] RIP: 0010:[<ffffffff815229f9>] [<ffffffff815229f9>] update_classid_sock+0x29/0x40 |
19 |
[1072141.444157] RSP: 0018:ffff880191bd7d58 EFLAGS: 00000286 |
20 |
[1072141.444158] RAX: ffff883b177cb7c0 RBX: 0000000000000000 RCX: 0000000000000000 |
21 |
[1072141.444159] RDX: 00000000000009c7 RSI: ffff880191bd7d5c RDI: ffff8822e29bb200 |
22 |
[1072141.444160] RBP: ffff883a72230980 R08: 0000000000000101 R09: 0000000000000000 |
23 |
[1072141.444161] R10: 0000000000000008 R11: f000000000000000 R12: ffffffff815229d0 |
24 |
[1072141.444162] R13: 0000000000000000 R14: ffff881fd0a47ac0 R15: ffff880191bd7f28 |
25 |
[1072141.444163] FS: 00007f3e2f1eb8c0(0000) GS:ffff882000340000(0000) knlGS:0000000000000000 |
26 |
[1072141.444164] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 |
27 |
[1072141.444165] CR2: 00007f3e2f200000 CR3: 0000001ffea4e000 CR4: 00000000001606f0 |
28 |
[1072141.444166] Stack: |
29 |
[1072141.444166] ffffffa800000246 00000000000009c7 ffffffff8121d583 ffff8818312a05c0 |
30 |
[1072141.444168] ffff8818312a1100 ffff880197c3b280 ffff881861422858 ffffffffffffffea |
31 |
[1072141.444170] ffffffff81522b1c ffffffff81d0ca20 ffff8817fa17b950 ffff883fdd8121e0 |
32 |
[1072141.444171] Call Trace: |
33 |
[1072141.444179] [<ffffffff8121d583>] iterate_fd+0x53/0x80 |
34 |
[1072141.444182] [<ffffffff81522b1c>] write_classid+0x4c/0x80 |
35 |
[1072141.444187] [<ffffffff8111328b>] cgroup_file_write+0x9b/0x100 |
36 |
[1072141.444193] [<ffffffff81278bcb>] kernfs_fop_write+0x11b/0x150 |
37 |
[1072141.444198] [<ffffffff81201566>] __vfs_write+0x26/0x100 |
38 |
[1072141.444201] [<ffffffff81201bed>] vfs_write+0x9d/0x190 |
39 |
[1072141.444203] [<ffffffff812028c2>] SyS_write+0x42/0xa0 |
40 |
[1072141.444207] [<ffffffff815f58c3>] entry_SYSCALL_64_fastpath+0x1e/0xca |
41 |
[1072141.445490] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x1e/0xca |
42 |
|
43 |
If a cgroup has many tasks with many open file descriptors then we would |
44 |
end up in a large loop without any rescheduling point throught the |
45 |
operation. Add cond_resched once per task. |
46 |
|
47 |
Signed-off-by: Michal Hocko <mhocko@suse.com> |
48 |
Signed-off-by: Tejun Heo <tj@kernel.org> |
49 |
Signed-off-by: Sasha Levin <sashal@kernel.org> |
50 |
--- |
51 |
net/core/netclassid_cgroup.c | 1 + |
52 |
1 file changed, 1 insertion(+) |
53 |
|
54 |
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c |
55 |
index 5e4f04004a49..7bf833598615 100644 |
56 |
--- a/net/core/netclassid_cgroup.c |
57 |
+++ b/net/core/netclassid_cgroup.c |
58 |
@@ -106,6 +106,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, |
59 |
iterate_fd(p->files, 0, update_classid_sock, |
60 |
(void *)(unsigned long)cs->classid); |
61 |
task_unlock(p); |
62 |
+ cond_resched(); |
63 |
} |
64 |
css_task_iter_end(&it); |
65 |
|
66 |
-- |
67 |
2.19.1 |
68 |
|