/[packages]/cauldron/mesa/current/SOURCES/0023-i965-vec4-dce-Don-t-narrow-the-write-mask-if-the-fla.patch
ViewVC logotype

Contents of /cauldron/mesa/current/SOURCES/0023-i965-vec4-dce-Don-t-narrow-the-write-mask-if-the-fla.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1355920 - (show annotations) (download)
Sun Jan 13 14:22:00 2019 UTC (5 years, 3 months ago) by tmb
File size: 10948 byte(s)
add current fixes from upstream staging/18.3 branch
1 From e06618ca8031838f7df9a3d4bb5e4fee382734bb Mon Sep 17 00:00:00 2001
2 From: Ian Romanick <ian.d.romanick@intel.com>
3 Date: Wed, 20 Jun 2018 17:18:30 -0700
4 Subject: [PATCH 23/78] i965/vec4/dce: Don't narrow the write mask if the flags
5 are used
6
7 In an instruction sequence like
8
9 cmp(8).ge.f0.0 vgrf17:D, vgrf2.xxxx:D, vgrf9.xxxx:D
10 (+f0.0) sel(8) vgrf1:UD, vgrf8.xyzw:UD, vgrf1.xyzw:UD
11
12 The other fields of vgrf17 may be unused, but the CMP still needs to
13 generate the other flag bits.
14
15 To my surprise, nothing in shader-db or any test suite appears to hit
16 this. However, I have a change to brw_vec4_cmod_propagation that
17 creates cases where this can happen. This fix prevents a couple dozen
18 regressions in that patch.
19
20 Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
21 Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
22 Fixes: 5df88c20 ("i965/vec4: Rewrite dead code elimination to use live in/out.")
23 (cherry picked from commit 440c051340669e809511c05370d6d703c70f6d0e)
24 ---
25 src/intel/Makefile.compiler.am | 5 +
26 .../compiler/brw_vec4_dead_code_eliminate.cpp | 47 ++++-
27 src/intel/compiler/meson.build | 3 +-
28 .../test_vec4_dead_code_eliminate.cpp | 163 ++++++++++++++++++
29 4 files changed, 208 insertions(+), 10 deletions(-)
30 create mode 100644 src/intel/compiler/test_vec4_dead_code_eliminate.cpp
31
32 diff --git a/src/intel/Makefile.compiler.am b/src/intel/Makefile.compiler.am
33 index cd7e6882fb..7c33e35816 100644
34 --- a/src/intel/Makefile.compiler.am
35 +++ b/src/intel/Makefile.compiler.am
36 @@ -64,6 +64,7 @@ COMPILER_TESTS = \
37 compiler/test_vf_float_conversions \
38 compiler/test_vec4_cmod_propagation \
39 compiler/test_vec4_copy_propagation \
40 + compiler/test_vec4_dead_code_eliminate \
41 compiler/test_vec4_register_coalesce
42
43 TESTS += $(COMPILER_TESTS)
44 @@ -97,6 +98,10 @@ compiler_test_vec4_cmod_propagation_SOURCES = \
45 compiler/test_vec4_cmod_propagation.cpp
46 compiler_test_vec4_cmod_propagation_LDADD = $(TEST_LIBS)
47
48 +compiler_test_vec4_dead_code_eliminate_SOURCES = \
49 + compiler/test_vec4_dead_code_eliminate.cpp
50 +compiler_test_vec4_dead_code_eliminate_LDADD = $(TEST_LIBS)
51 +
52 # Strictly speaking this is neither a C++ test nor using gtest - we can address
53 # address that at a later point. Until then, this allows us a to simplify things.
54 compiler_test_eu_compact_SOURCES = \
55 diff --git a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
56 index c09a3d7ebe..99e4c9caca 100644
57 --- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
58 +++ b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
59 @@ -81,17 +81,46 @@ vec4_visitor::dead_code_eliminate()
60 result_live[3] = result;
61 }
62
63 - for (int c = 0; c < 4; c++) {
64 - if (!result_live[c] && inst->dst.writemask & (1 << c)) {
65 - inst->dst.writemask &= ~(1 << c);
66 + if (inst->writes_flag()) {
67 + /* Independently calculate the usage of the flag components and
68 + * the destination value components.
69 + */
70 + uint8_t flag_mask = inst->dst.writemask;
71 + uint8_t dest_mask = inst->dst.writemask;
72 +
73 + for (int c = 0; c < 4; c++) {
74 + if (!result_live[c] && dest_mask & (1 << c))
75 + dest_mask &= ~(1 << c);
76 +
77 + if (!BITSET_TEST(flag_live, c))
78 + flag_mask &= ~(1 << c);
79 + }
80 +
81 + if (inst->dst.writemask != (flag_mask | dest_mask)) {
82 progress = true;
83 + inst->dst.writemask = flag_mask | dest_mask;
84 + }
85
86 - if (inst->dst.writemask == 0) {
87 - if (inst->writes_accumulator || inst->writes_flag()) {
88 - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
89 - } else {
90 - inst->opcode = BRW_OPCODE_NOP;
91 - break;
92 + /* If none of the destination components are read, replace the
93 + * destination register with the NULL register.
94 + */
95 + if (dest_mask == 0) {
96 + progress = true;
97 + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
98 + }
99 + } else {
100 + for (int c = 0; c < 4; c++) {
101 + if (!result_live[c] && inst->dst.writemask & (1 << c)) {
102 + inst->dst.writemask &= ~(1 << c);
103 + progress = true;
104 +
105 + if (inst->dst.writemask == 0) {
106 + if (inst->writes_accumulator) {
107 + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
108 + } else {
109 + inst->opcode = BRW_OPCODE_NOP;
110 + break;
111 + }
112 }
113 }
114 }
115 diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build
116 index 3cdeb6214a..f2854be779 100644
117 --- a/src/intel/compiler/meson.build
118 +++ b/src/intel/compiler/meson.build
119 @@ -145,7 +145,8 @@ if with_tests
120 foreach t : ['fs_cmod_propagation', 'fs_copy_propagation',
121 'fs_saturate_propagation', 'vf_float_conversions',
122 'vec4_register_coalesce', 'vec4_copy_propagation',
123 - 'vec4_cmod_propagation', 'eu_compact', 'eu_validate']
124 + 'vec4_cmod_propagation', 'vec4_dead_code_eliminate',
125 + 'eu_compact', 'eu_validate']
126 test(
127 t,
128 executable(
129 diff --git a/src/intel/compiler/test_vec4_dead_code_eliminate.cpp b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp
130 new file mode 100644
131 index 0000000000..25739c2895
132 --- /dev/null
133 +++ b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp
134 @@ -0,0 +1,163 @@
135 +/*
136 + * Copyright © 2018 Intel Corporation
137 + *
138 + * Permission is hereby granted, free of charge, to any person obtaining a
139 + * copy of this software and associated documentation files (the "Software"),
140 + * to deal in the Software without restriction, including without limitation
141 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
142 + * and/or sell copies of the Software, and to permit persons to whom the
143 + * Software is furnished to do so, subject to the following conditions:
144 + *
145 + * The above copyright notice and this permission notice (including the next
146 + * paragraph) shall be included in all copies or substantial portions of the
147 + * Software.
148 + *
149 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
150 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
151 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
152 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
153 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
154 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
155 + * IN THE SOFTWARE.
156 + */
157 +
158 +#include <gtest/gtest.h>
159 +#include "brw_vec4.h"
160 +#include "program/program.h"
161 +
162 +using namespace brw;
163 +
164 +class dead_code_eliminate_test : public ::testing::Test {
165 + virtual void SetUp();
166 +
167 +public:
168 + struct brw_compiler *compiler;
169 + struct gen_device_info *devinfo;
170 + struct gl_context *ctx;
171 + struct gl_shader_program *shader_prog;
172 + struct brw_vue_prog_data *prog_data;
173 + vec4_visitor *v;
174 +};
175 +
176 +class dead_code_eliminate_vec4_visitor : public vec4_visitor
177 +{
178 +public:
179 + dead_code_eliminate_vec4_visitor(struct brw_compiler *compiler,
180 + nir_shader *shader,
181 + struct brw_vue_prog_data *prog_data)
182 + : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
183 + false /* no_spills */, -1)
184 + {
185 + prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
186 + }
187 +
188 +protected:
189 + virtual dst_reg *make_reg_for_system_value(int /* location */)
190 + {
191 + unreachable("Not reached");
192 + }
193 +
194 + virtual void setup_payload()
195 + {
196 + unreachable("Not reached");
197 + }
198 +
199 + virtual void emit_prolog()
200 + {
201 + unreachable("Not reached");
202 + }
203 +
204 + virtual void emit_thread_end()
205 + {
206 + unreachable("Not reached");
207 + }
208 +
209 + virtual void emit_urb_write_header(int /* mrf */)
210 + {
211 + unreachable("Not reached");
212 + }
213 +
214 + virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
215 + {
216 + unreachable("Not reached");
217 + }
218 +};
219 +
220 +
221 +void dead_code_eliminate_test::SetUp()
222 +{
223 + ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
224 + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
225 + devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo));
226 + prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
227 + compiler->devinfo = devinfo;
228 +
229 + nir_shader *shader =
230 + nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL, NULL);
231 +
232 + v = new dead_code_eliminate_vec4_visitor(compiler, shader, prog_data);
233 +
234 + devinfo->gen = 4;
235 +}
236 +
237 +static void
238 +dead_code_eliminate(vec4_visitor *v)
239 +{
240 + bool print = false;
241 +
242 + if (print) {
243 + fprintf(stderr, "instructions before:\n");
244 + v->dump_instructions();
245 + }
246 +
247 + v->calculate_cfg();
248 + v->dead_code_eliminate();
249 +
250 + if (print) {
251 + fprintf(stderr, "instructions after:\n");
252 + v->dump_instructions();
253 + }
254 +}
255 +
256 +TEST_F(dead_code_eliminate_test, some_dead_channels_all_flags_used)
257 +{
258 + const vec4_builder bld = vec4_builder(v).at_end();
259 + src_reg r1 = src_reg(v, glsl_type::vec4_type);
260 + src_reg r2 = src_reg(v, glsl_type::vec4_type);
261 + src_reg r3 = src_reg(v, glsl_type::vec4_type);
262 + src_reg r4 = src_reg(v, glsl_type::vec4_type);
263 + src_reg r5 = src_reg(v, glsl_type::vec4_type);
264 + src_reg r6 = src_reg(v, glsl_type::vec4_type);
265 +
266 + /* Sequence like the following should not be modified by DCE.
267 + *
268 + * cmp.l.f0(8) g4<1>F g2<4,4,1>.wF g1<4,4,1>.xF
269 + * mov(8) g5<1>.xF g4<4,4,1>.xF
270 + * (+f0.x) sel(8) g6<1>UD g3<4>UD g6<4>UD
271 + */
272 + vec4_instruction *test_cmp =
273 + bld.CMP(dst_reg(r4), r2, r1, BRW_CONDITIONAL_L);
274 +
275 + test_cmp->src[0].swizzle = BRW_SWIZZLE_WWWW;
276 + test_cmp->src[1].swizzle = BRW_SWIZZLE_XXXX;
277 +
278 + vec4_instruction *test_mov =
279 + bld.MOV(dst_reg(r5), r4);
280 +
281 + test_mov->dst.writemask = WRITEMASK_X;
282 + test_mov->src[0].swizzle = BRW_SWIZZLE_XXXX;
283 +
284 + vec4_instruction *test_sel =
285 + bld.SEL(dst_reg(r6), r3, r6);
286 +
287 + set_predicate(BRW_PREDICATE_NORMAL, test_sel);
288 +
289 + /* The scratch write is here just to make r5 and r6 be live so that the
290 + * whole program doesn't get eliminated by DCE.
291 + */
292 + v->emit(v->SCRATCH_WRITE(dst_reg(r4), r6, r5));
293 +
294 + dead_code_eliminate(v);
295 +
296 + EXPECT_EQ(test_cmp->dst.writemask, WRITEMASK_XYZW);
297 +}
298 --
299 2.20.1
300

  ViewVC Help
Powered by ViewVC 1.1.30