1 |
From: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com> |
2 |
Subject: [PATCH 5.15] ext4: fix deadlock due to mbcache entry corruption |
3 |
Date: Thu, 5 Jan 2023 06:01:59 -0800 |
4 |
|
5 |
From: Jan Kara <jack@suse.cz> |
6 |
|
7 |
commit a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd upstream. |
8 |
|
9 |
When manipulating xattr blocks, we can deadlock infinitely looping |
10 |
inside ext4_xattr_block_set() where we constantly keep finding xattr |
11 |
block for reuse in mbcache but we are unable to reuse it because its |
12 |
reference count is too big. This happens because cache entry for the |
13 |
xattr block is marked as reusable (e_reusable set) although its |
14 |
reference count is too big. When this inconsistency happens, this |
15 |
inconsistent state is kept indefinitely and so ext4_xattr_block_set() |
16 |
keeps retrying indefinitely. |
17 |
|
18 |
The inconsistent state is caused by non-atomic update of e_reusable bit. |
19 |
e_reusable is part of a bitfield and e_reusable update can race with |
20 |
update of e_referenced bit in the same bitfield resulting in loss of one |
21 |
of the updates. Fix the problem by using atomic bitops instead. |
22 |
|
23 |
This bug has been around for many years, but it became *much* easier |
24 |
to hit after commit 65f8b80053a1 ("ext4: fix race when reusing xattr |
25 |
blocks"). |
26 |
|
27 |
A special backport to 5.15 was necessary due to changes to reference counting. |
28 |
|
29 |
Cc: stable@vger.kernel.org # 5.15 |
30 |
Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries") |
31 |
Fixes: 65f8b80053a1 ("ext4: fix race when reusing xattr blocks") |
32 |
Reported-and-tested-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com> |
33 |
Reported-by: Thilo Fromm <t-lo@linux.microsoft.com> |
34 |
Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/ |
35 |
Signed-off-by: Jan Kara <jack@suse.cz> |
36 |
Reviewed-by: Andreas Dilger <adilger@dilger.ca> |
37 |
Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz |
38 |
Signed-off-by: Theodore Ts'o <tytso@mit.edu> |
39 |
Signed-off-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com> |
40 |
--- |
41 |
fs/ext4/xattr.c | 4 ++-- |
42 |
fs/mbcache.c | 14 ++++++++------ |
43 |
include/linux/mbcache.h | 9 +++++++-- |
44 |
3 files changed, 17 insertions(+), 10 deletions(-) |
45 |
|
46 |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c |
47 |
index 533216e80fa2..22700812a4d3 100644 |
48 |
--- a/fs/ext4/xattr.c |
49 |
+++ b/fs/ext4/xattr.c |
50 |
@@ -1281,7 +1281,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, |
51 |
ce = mb_cache_entry_get(ea_block_cache, hash, |
52 |
bh->b_blocknr); |
53 |
if (ce) { |
54 |
- ce->e_reusable = 1; |
55 |
+ set_bit(MBE_REUSABLE_B, &ce->e_flags); |
56 |
mb_cache_entry_put(ea_block_cache, ce); |
57 |
} |
58 |
} |
59 |
@@ -2042,7 +2042,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, |
60 |
} |
61 |
BHDR(new_bh)->h_refcount = cpu_to_le32(ref); |
62 |
if (ref == EXT4_XATTR_REFCOUNT_MAX) |
63 |
- ce->e_reusable = 0; |
64 |
+ clear_bit(MBE_REUSABLE_B, &ce->e_flags); |
65 |
ea_bdebug(new_bh, "reusing; refcount now=%d", |
66 |
ref); |
67 |
ext4_xattr_block_csum_set(inode, new_bh); |
68 |
diff --git a/fs/mbcache.c b/fs/mbcache.c |
69 |
index 2010bc80a3f2..ac07b50ea3df 100644 |
70 |
--- a/fs/mbcache.c |
71 |
+++ b/fs/mbcache.c |
72 |
@@ -94,8 +94,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, |
73 |
atomic_set(&entry->e_refcnt, 1); |
74 |
entry->e_key = key; |
75 |
entry->e_value = value; |
76 |
- entry->e_reusable = reusable; |
77 |
- entry->e_referenced = 0; |
78 |
+ entry->e_flags = 0; |
79 |
+ if (reusable) |
80 |
+ set_bit(MBE_REUSABLE_B, &entry->e_flags); |
81 |
head = mb_cache_entry_head(cache, key); |
82 |
hlist_bl_lock(head); |
83 |
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { |
84 |
@@ -155,7 +156,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, |
85 |
while (node) { |
86 |
entry = hlist_bl_entry(node, struct mb_cache_entry, |
87 |
e_hash_list); |
88 |
- if (entry->e_key == key && entry->e_reusable) { |
89 |
+ if (entry->e_key == key && |
90 |
+ test_bit(MBE_REUSABLE_B, &entry->e_flags)) { |
91 |
atomic_inc(&entry->e_refcnt); |
92 |
goto out; |
93 |
} |
94 |
@@ -325,7 +327,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get); |
95 |
void mb_cache_entry_touch(struct mb_cache *cache, |
96 |
struct mb_cache_entry *entry) |
97 |
{ |
98 |
- entry->e_referenced = 1; |
99 |
+ set_bit(MBE_REFERENCED_B, &entry->e_flags); |
100 |
} |
101 |
EXPORT_SYMBOL(mb_cache_entry_touch); |
102 |
|
103 |
@@ -350,8 +352,8 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, |
104 |
while (nr_to_scan-- && !list_empty(&cache->c_list)) { |
105 |
entry = list_first_entry(&cache->c_list, |
106 |
struct mb_cache_entry, e_list); |
107 |
- if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) { |
108 |
- entry->e_referenced = 0; |
109 |
+ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || atomic_read(&entry->e_refcnt) > 2) { |
110 |
+ clear_bit(MBE_REFERENCED_B, &entry->e_flags); |
111 |
list_move_tail(&entry->e_list, &cache->c_list); |
112 |
continue; |
113 |
} |
114 |
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h |
115 |
index 8eca7f25c432..62927f7e2588 100644 |
116 |
--- a/include/linux/mbcache.h |
117 |
+++ b/include/linux/mbcache.h |
118 |
@@ -10,6 +10,12 @@ |
119 |
|
120 |
struct mb_cache; |
121 |
|
122 |
+/* Cache entry flags */ |
123 |
+enum { |
124 |
+ MBE_REFERENCED_B = 0, |
125 |
+ MBE_REUSABLE_B |
126 |
+}; |
127 |
+ |
128 |
struct mb_cache_entry { |
129 |
/* List of entries in cache - protected by cache->c_list_lock */ |
130 |
struct list_head e_list; |
131 |
@@ -18,8 +24,7 @@ struct mb_cache_entry { |
132 |
atomic_t e_refcnt; |
133 |
/* Key in hash - stable during lifetime of the entry */ |
134 |
u32 e_key; |
135 |
- u32 e_referenced:1; |
136 |
- u32 e_reusable:1; |
137 |
+ unsigned long e_flags; |
138 |
/* User provided value - stable during lifetime of the entry */ |
139 |
u64 e_value; |
140 |
}; |
141 |
-- |
142 |
2.25.1 |