1 |
From 79c865f2310df6b843702bc47d3393d125801701 Mon Sep 17 00:00:00 2001 |
2 |
From: Jason Ekstrand <jason.ekstrand@intel.com> |
3 |
Date: Wed, 17 Oct 2018 11:34:32 -0500 |
4 |
Subject: [PATCH 75/78] intel/peephole_ffma: Fix swizzle propagation |
5 |
|
6 |
The num_components value passed into get_mul_for_src is used to only |
7 |
compose the parts of the swizzle that we know will be used so we don't |
8 |
compose invalid swizzle components. However, we had a bug where we |
9 |
passed the number of components of the add all the way through. For the |
10 |
given source, we need the number of components read from that source. |
11 |
In the case where we have a narrow add, say 2 components, that is |
12 |
sourced from a chain of wider instructions, we may not compose all the |
13 |
swizzles. All we really need to do is pass through the right number of |
14 |
components at each level. |
15 |
|
16 |
Fixes: 2231cf0ba3a "nir: Fix output swizzle in get_mul_for_src" |
17 |
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> |
18 |
(cherry picked from commit 1ede463b6e66eb0a6df5250261810b6985c35eb9) |
19 |
--- |
20 |
src/intel/compiler/brw_nir_opt_peephole_ffma.c | 11 +++++++---- |
21 |
1 file changed, 7 insertions(+), 4 deletions(-) |
22 |
|
23 |
diff --git a/src/intel/compiler/brw_nir_opt_peephole_ffma.c b/src/intel/compiler/brw_nir_opt_peephole_ffma.c |
24 |
index cc225e1847..7271bdbca4 100644 |
25 |
--- a/src/intel/compiler/brw_nir_opt_peephole_ffma.c |
26 |
+++ b/src/intel/compiler/brw_nir_opt_peephole_ffma.c |
27 |
@@ -68,7 +68,7 @@ are_all_uses_fadd(nir_ssa_def *def) |
28 |
} |
29 |
|
30 |
static nir_alu_instr * |
31 |
-get_mul_for_src(nir_alu_src *src, int num_components, |
32 |
+get_mul_for_src(nir_alu_src *src, unsigned num_components, |
33 |
uint8_t swizzle[4], bool *negate, bool *abs) |
34 |
{ |
35 |
uint8_t swizzle_tmp[4]; |
36 |
@@ -93,16 +93,19 @@ get_mul_for_src(nir_alu_src *src, int num_components, |
37 |
switch (alu->op) { |
38 |
case nir_op_imov: |
39 |
case nir_op_fmov: |
40 |
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); |
41 |
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, |
42 |
+ swizzle, negate, abs); |
43 |
break; |
44 |
|
45 |
case nir_op_fneg: |
46 |
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); |
47 |
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, |
48 |
+ swizzle, negate, abs); |
49 |
*negate = !*negate; |
50 |
break; |
51 |
|
52 |
case nir_op_fabs: |
53 |
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); |
54 |
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, |
55 |
+ swizzle, negate, abs); |
56 |
*negate = false; |
57 |
*abs = true; |
58 |
break; |
59 |
-- |
60 |
2.20.1 |
61 |
|