1 |
From e06618ca8031838f7df9a3d4bb5e4fee382734bb Mon Sep 17 00:00:00 2001 |
2 |
From: Ian Romanick <ian.d.romanick@intel.com> |
3 |
Date: Wed, 20 Jun 2018 17:18:30 -0700 |
4 |
Subject: [PATCH 23/78] i965/vec4/dce: Don't narrow the write mask if the flags |
5 |
are used |
6 |
|
7 |
In an instruction sequence like |
8 |
|
9 |
cmp(8).ge.f0.0 vgrf17:D, vgrf2.xxxx:D, vgrf9.xxxx:D |
10 |
(+f0.0) sel(8) vgrf1:UD, vgrf8.xyzw:UD, vgrf1.xyzw:UD |
11 |
|
12 |
The other fields of vgrf17 may be unused, but the CMP still needs to |
13 |
generate the other flag bits. |
14 |
|
15 |
To my surprise, nothing in shader-db or any test suite appears to hit |
16 |
this. However, I have a change to brw_vec4_cmod_propagation that |
17 |
creates cases where this can happen. This fix prevents a couple dozen |
18 |
regressions in that patch. |
19 |
|
20 |
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> |
21 |
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> |
22 |
Fixes: 5df88c20 ("i965/vec4: Rewrite dead code elimination to use live in/out.") |
23 |
(cherry picked from commit 440c051340669e809511c05370d6d703c70f6d0e) |
24 |
--- |
25 |
src/intel/Makefile.compiler.am | 5 + |
26 |
.../compiler/brw_vec4_dead_code_eliminate.cpp | 47 ++++- |
27 |
src/intel/compiler/meson.build | 3 +- |
28 |
.../test_vec4_dead_code_eliminate.cpp | 163 ++++++++++++++++++ |
29 |
4 files changed, 208 insertions(+), 10 deletions(-) |
30 |
create mode 100644 src/intel/compiler/test_vec4_dead_code_eliminate.cpp |
31 |
|
32 |
diff --git a/src/intel/Makefile.compiler.am b/src/intel/Makefile.compiler.am |
33 |
index cd7e6882fb..7c33e35816 100644 |
34 |
--- a/src/intel/Makefile.compiler.am |
35 |
+++ b/src/intel/Makefile.compiler.am |
36 |
@@ -64,6 +64,7 @@ COMPILER_TESTS = \ |
37 |
compiler/test_vf_float_conversions \ |
38 |
compiler/test_vec4_cmod_propagation \ |
39 |
compiler/test_vec4_copy_propagation \ |
40 |
+ compiler/test_vec4_dead_code_eliminate \ |
41 |
compiler/test_vec4_register_coalesce |
42 |
|
43 |
TESTS += $(COMPILER_TESTS) |
44 |
@@ -97,6 +98,10 @@ compiler_test_vec4_cmod_propagation_SOURCES = \ |
45 |
compiler/test_vec4_cmod_propagation.cpp |
46 |
compiler_test_vec4_cmod_propagation_LDADD = $(TEST_LIBS) |
47 |
|
48 |
+compiler_test_vec4_dead_code_eliminate_SOURCES = \ |
49 |
+ compiler/test_vec4_dead_code_eliminate.cpp |
50 |
+compiler_test_vec4_dead_code_eliminate_LDADD = $(TEST_LIBS) |
51 |
+ |
52 |
# Strictly speaking this is neither a C++ test nor using gtest - we can address |
53 |
# address that at a later point. Until then, this allows us a to simplify things. |
54 |
compiler_test_eu_compact_SOURCES = \ |
55 |
diff --git a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp |
56 |
index c09a3d7ebe..99e4c9caca 100644 |
57 |
--- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp |
58 |
+++ b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp |
59 |
@@ -81,17 +81,46 @@ vec4_visitor::dead_code_eliminate() |
60 |
result_live[3] = result; |
61 |
} |
62 |
|
63 |
- for (int c = 0; c < 4; c++) { |
64 |
- if (!result_live[c] && inst->dst.writemask & (1 << c)) { |
65 |
- inst->dst.writemask &= ~(1 << c); |
66 |
+ if (inst->writes_flag()) { |
67 |
+ /* Independently calculate the usage of the flag components and |
68 |
+ * the destination value components. |
69 |
+ */ |
70 |
+ uint8_t flag_mask = inst->dst.writemask; |
71 |
+ uint8_t dest_mask = inst->dst.writemask; |
72 |
+ |
73 |
+ for (int c = 0; c < 4; c++) { |
74 |
+ if (!result_live[c] && dest_mask & (1 << c)) |
75 |
+ dest_mask &= ~(1 << c); |
76 |
+ |
77 |
+ if (!BITSET_TEST(flag_live, c)) |
78 |
+ flag_mask &= ~(1 << c); |
79 |
+ } |
80 |
+ |
81 |
+ if (inst->dst.writemask != (flag_mask | dest_mask)) { |
82 |
progress = true; |
83 |
+ inst->dst.writemask = flag_mask | dest_mask; |
84 |
+ } |
85 |
|
86 |
- if (inst->dst.writemask == 0) { |
87 |
- if (inst->writes_accumulator || inst->writes_flag()) { |
88 |
- inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); |
89 |
- } else { |
90 |
- inst->opcode = BRW_OPCODE_NOP; |
91 |
- break; |
92 |
+ /* If none of the destination components are read, replace the |
93 |
+ * destination register with the NULL register. |
94 |
+ */ |
95 |
+ if (dest_mask == 0) { |
96 |
+ progress = true; |
97 |
+ inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); |
98 |
+ } |
99 |
+ } else { |
100 |
+ for (int c = 0; c < 4; c++) { |
101 |
+ if (!result_live[c] && inst->dst.writemask & (1 << c)) { |
102 |
+ inst->dst.writemask &= ~(1 << c); |
103 |
+ progress = true; |
104 |
+ |
105 |
+ if (inst->dst.writemask == 0) { |
106 |
+ if (inst->writes_accumulator) { |
107 |
+ inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); |
108 |
+ } else { |
109 |
+ inst->opcode = BRW_OPCODE_NOP; |
110 |
+ break; |
111 |
+ } |
112 |
} |
113 |
} |
114 |
} |
115 |
diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build |
116 |
index 3cdeb6214a..f2854be779 100644 |
117 |
--- a/src/intel/compiler/meson.build |
118 |
+++ b/src/intel/compiler/meson.build |
119 |
@@ -145,7 +145,8 @@ if with_tests |
120 |
foreach t : ['fs_cmod_propagation', 'fs_copy_propagation', |
121 |
'fs_saturate_propagation', 'vf_float_conversions', |
122 |
'vec4_register_coalesce', 'vec4_copy_propagation', |
123 |
- 'vec4_cmod_propagation', 'eu_compact', 'eu_validate'] |
124 |
+ 'vec4_cmod_propagation', 'vec4_dead_code_eliminate', |
125 |
+ 'eu_compact', 'eu_validate'] |
126 |
test( |
127 |
t, |
128 |
executable( |
129 |
diff --git a/src/intel/compiler/test_vec4_dead_code_eliminate.cpp b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp |
130 |
new file mode 100644 |
131 |
index 0000000000..25739c2895 |
132 |
--- /dev/null |
133 |
+++ b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp |
134 |
@@ -0,0 +1,163 @@ |
135 |
+/* |
136 |
+ * Copyright © 2018 Intel Corporation |
137 |
+ * |
138 |
+ * Permission is hereby granted, free of charge, to any person obtaining a |
139 |
+ * copy of this software and associated documentation files (the "Software"), |
140 |
+ * to deal in the Software without restriction, including without limitation |
141 |
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
142 |
+ * and/or sell copies of the Software, and to permit persons to whom the |
143 |
+ * Software is furnished to do so, subject to the following conditions: |
144 |
+ * |
145 |
+ * The above copyright notice and this permission notice (including the next |
146 |
+ * paragraph) shall be included in all copies or substantial portions of the |
147 |
+ * Software. |
148 |
+ * |
149 |
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
150 |
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
151 |
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
152 |
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
153 |
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
154 |
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
155 |
+ * IN THE SOFTWARE. |
156 |
+ */ |
157 |
+ |
158 |
+#include <gtest/gtest.h> |
159 |
+#include "brw_vec4.h" |
160 |
+#include "program/program.h" |
161 |
+ |
162 |
+using namespace brw; |
163 |
+ |
164 |
+class dead_code_eliminate_test : public ::testing::Test { |
165 |
+ virtual void SetUp(); |
166 |
+ |
167 |
+public: |
168 |
+ struct brw_compiler *compiler; |
169 |
+ struct gen_device_info *devinfo; |
170 |
+ struct gl_context *ctx; |
171 |
+ struct gl_shader_program *shader_prog; |
172 |
+ struct brw_vue_prog_data *prog_data; |
173 |
+ vec4_visitor *v; |
174 |
+}; |
175 |
+ |
176 |
+class dead_code_eliminate_vec4_visitor : public vec4_visitor |
177 |
+{ |
178 |
+public: |
179 |
+ dead_code_eliminate_vec4_visitor(struct brw_compiler *compiler, |
180 |
+ nir_shader *shader, |
181 |
+ struct brw_vue_prog_data *prog_data) |
182 |
+ : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL, |
183 |
+ false /* no_spills */, -1) |
184 |
+ { |
185 |
+ prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; |
186 |
+ } |
187 |
+ |
188 |
+protected: |
189 |
+ virtual dst_reg *make_reg_for_system_value(int /* location */) |
190 |
+ { |
191 |
+ unreachable("Not reached"); |
192 |
+ } |
193 |
+ |
194 |
+ virtual void setup_payload() |
195 |
+ { |
196 |
+ unreachable("Not reached"); |
197 |
+ } |
198 |
+ |
199 |
+ virtual void emit_prolog() |
200 |
+ { |
201 |
+ unreachable("Not reached"); |
202 |
+ } |
203 |
+ |
204 |
+ virtual void emit_thread_end() |
205 |
+ { |
206 |
+ unreachable("Not reached"); |
207 |
+ } |
208 |
+ |
209 |
+ virtual void emit_urb_write_header(int /* mrf */) |
210 |
+ { |
211 |
+ unreachable("Not reached"); |
212 |
+ } |
213 |
+ |
214 |
+ virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */) |
215 |
+ { |
216 |
+ unreachable("Not reached"); |
217 |
+ } |
218 |
+}; |
219 |
+ |
220 |
+ |
221 |
+void dead_code_eliminate_test::SetUp() |
222 |
+{ |
223 |
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); |
224 |
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); |
225 |
+ devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo)); |
226 |
+ prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data)); |
227 |
+ compiler->devinfo = devinfo; |
228 |
+ |
229 |
+ nir_shader *shader = |
230 |
+ nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL, NULL); |
231 |
+ |
232 |
+ v = new dead_code_eliminate_vec4_visitor(compiler, shader, prog_data); |
233 |
+ |
234 |
+ devinfo->gen = 4; |
235 |
+} |
236 |
+ |
237 |
+static void |
238 |
+dead_code_eliminate(vec4_visitor *v) |
239 |
+{ |
240 |
+ bool print = false; |
241 |
+ |
242 |
+ if (print) { |
243 |
+ fprintf(stderr, "instructions before:\n"); |
244 |
+ v->dump_instructions(); |
245 |
+ } |
246 |
+ |
247 |
+ v->calculate_cfg(); |
248 |
+ v->dead_code_eliminate(); |
249 |
+ |
250 |
+ if (print) { |
251 |
+ fprintf(stderr, "instructions after:\n"); |
252 |
+ v->dump_instructions(); |
253 |
+ } |
254 |
+} |
255 |
+ |
256 |
+TEST_F(dead_code_eliminate_test, some_dead_channels_all_flags_used) |
257 |
+{ |
258 |
+ const vec4_builder bld = vec4_builder(v).at_end(); |
259 |
+ src_reg r1 = src_reg(v, glsl_type::vec4_type); |
260 |
+ src_reg r2 = src_reg(v, glsl_type::vec4_type); |
261 |
+ src_reg r3 = src_reg(v, glsl_type::vec4_type); |
262 |
+ src_reg r4 = src_reg(v, glsl_type::vec4_type); |
263 |
+ src_reg r5 = src_reg(v, glsl_type::vec4_type); |
264 |
+ src_reg r6 = src_reg(v, glsl_type::vec4_type); |
265 |
+ |
266 |
+ /* Sequence like the following should not be modified by DCE. |
267 |
+ * |
268 |
+ * cmp.l.f0(8) g4<1>F g2<4,4,1>.wF g1<4,4,1>.xF |
269 |
+ * mov(8) g5<1>.xF g4<4,4,1>.xF |
270 |
+ * (+f0.x) sel(8) g6<1>UD g3<4>UD g6<4>UD |
271 |
+ */ |
272 |
+ vec4_instruction *test_cmp = |
273 |
+ bld.CMP(dst_reg(r4), r2, r1, BRW_CONDITIONAL_L); |
274 |
+ |
275 |
+ test_cmp->src[0].swizzle = BRW_SWIZZLE_WWWW; |
276 |
+ test_cmp->src[1].swizzle = BRW_SWIZZLE_XXXX; |
277 |
+ |
278 |
+ vec4_instruction *test_mov = |
279 |
+ bld.MOV(dst_reg(r5), r4); |
280 |
+ |
281 |
+ test_mov->dst.writemask = WRITEMASK_X; |
282 |
+ test_mov->src[0].swizzle = BRW_SWIZZLE_XXXX; |
283 |
+ |
284 |
+ vec4_instruction *test_sel = |
285 |
+ bld.SEL(dst_reg(r6), r3, r6); |
286 |
+ |
287 |
+ set_predicate(BRW_PREDICATE_NORMAL, test_sel); |
288 |
+ |
289 |
+ /* The scratch write is here just to make r5 and r6 be live so that the |
290 |
+ * whole program doesn't get eliminated by DCE. |
291 |
+ */ |
292 |
+ v->emit(v->SCRATCH_WRITE(dst_reg(r4), r6, r5)); |
293 |
+ |
294 |
+ dead_code_eliminate(v); |
295 |
+ |
296 |
+ EXPECT_EQ(test_cmp->dst.writemask, WRITEMASK_XYZW); |
297 |
+} |
298 |
-- |
299 |
2.20.1 |
300 |
|