1 |
From 78d28da2092206d745f5070aa8b7263ad30124ad Mon Sep 17 00:00:00 2001 |
2 |
From: Francisco Jerez <currojerez@riseup.net> |
3 |
Date: Fri, 7 Dec 2018 14:26:23 -0800 |
4 |
Subject: [PATCH 73/78] intel/fs: Respect CHV/BXT regioning restrictions in |
5 |
copy propagation pass. |
6 |
|
7 |
Currently the visitor attempts to enforce the regioning restrictions |
8 |
that apply to double-precision instructions on CHV/BXT at NIR-to-i965 |
9 |
translation time. It is possible though for the copy propagation pass |
10 |
to violate this restriction if a strided move is propagated into one |
11 |
of the affected instructions. I've only reproduced this issue on a |
12 |
future platform but it could affect CHV/BXT too under the right |
13 |
conditions. |
14 |
|
15 |
Cc: mesa-stable@lists.freedesktop.org |
16 |
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> |
17 |
(cherry picked from commit c301f447ea8449804208e414f189c0571e4339a8) |
18 |
--- |
19 |
.../compiler/brw_fs_copy_propagation.cpp | 10 +++++++ |
20 |
src/intel/compiler/brw_ir_fs.h | 28 +++++++++++++++++++ |
21 |
2 files changed, 38 insertions(+) |
22 |
|
23 |
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp |
24 |
index ab34b63748..a76e0f3a6b 100644 |
25 |
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp |
26 |
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp |
27 |
@@ -315,6 +315,16 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride, |
28 |
if (stride > 4) |
29 |
return false; |
30 |
|
31 |
+ /* Bail if the channels of the source need to be aligned to the byte offset |
32 |
+ * of the corresponding channel of the destination, and the provided stride |
33 |
+ * would break this restriction. |
34 |
+ */ |
35 |
+ if (has_dst_aligned_region_restriction(devinfo, inst) && |
36 |
+ !(type_sz(inst->src[arg].type) * stride == |
37 |
+ type_sz(inst->dst.type) * inst->dst.stride || |
38 |
+ stride == 0)) |
39 |
+ return false; |
40 |
+ |
41 |
/* 3-source instructions can only be Align16, which restricts what strides |
42 |
* they can take. They can only take a stride of 1 (the usual case), or 0 |
43 |
* with a special "repctrl" bit. But the repctrl bit doesn't work for |
44 |
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h |
45 |
index 07e7224e0f..95b069a2e0 100644 |
46 |
--- a/src/intel/compiler/brw_ir_fs.h |
47 |
+++ b/src/intel/compiler/brw_ir_fs.h |
48 |
@@ -486,4 +486,32 @@ get_exec_type_size(const fs_inst *inst) |
49 |
return type_sz(get_exec_type(inst)); |
50 |
} |
51 |
|
52 |
+/** |
53 |
+ * Return whether the following regioning restriction applies to the specified |
54 |
+ * instruction. From the Cherryview PRM Vol 7. "Register Region |
55 |
+ * Restrictions": |
56 |
+ * |
57 |
+ * "When source or destination datatype is 64b or operation is integer DWord |
58 |
+ * multiply, regioning in Align1 must follow these rules: |
59 |
+ * |
60 |
+ * 1. Source and Destination horizontal stride must be aligned to the same qword. |
61 |
+ * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. |
62 |
+ * 3. Source and Destination offset must be the same, except the case of |
63 |
+ * scalar source." |
64 |
+ */ |
65 |
+static inline bool |
66 |
+has_dst_aligned_region_restriction(const gen_device_info *devinfo, |
67 |
+ const fs_inst *inst) |
68 |
+{ |
69 |
+ const brw_reg_type exec_type = get_exec_type(inst); |
70 |
+ const bool is_int_multiply = !brw_reg_type_is_floating_point(exec_type) && |
71 |
+ (inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD); |
72 |
+ |
73 |
+ if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 || |
74 |
+ (type_sz(exec_type) == 4 && is_int_multiply)) |
75 |
+ return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo); |
76 |
+ else |
77 |
+ return false; |
78 |
+} |
79 |
+ |
80 |
#endif |
81 |
-- |
82 |
2.20.1 |
83 |
|