1 |
From 32807e38c3574bde659ef6ddf434e57c0d7cffe8 Mon Sep 17 00:00:00 2001 |
2 |
From: Ian Romanick <ian.d.romanick@intel.com> |
3 |
Date: Wed, 16 Mar 2022 12:56:54 -0700 |
4 |
Subject: [PATCH 22/29] intel/fs: Force destination types on DP4A instructions |
5 |
|
6 |
Most of the time, this doesn't matter. On the versions with _sat, if |
7 |
the destination type is incorrect, the clamping will not happen |
8 |
correctly. |
9 |
|
10 |
Fixes the following CTS tests: |
11 |
|
12 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_packed_ss_v4i8_out32 |
13 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_packed_su_v4i8_out32 |
14 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_packed_us_v4i8_out32 |
15 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_packed_uu_v4i8_out32 |
16 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_ss_v4i8_out32 |
17 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_su_v4i8_out32 |
18 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_us_v4i8_out32 |
19 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_uu_v4i8_out32 |
20 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_ss_v4i8_out32 |
21 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_su_v4i8_out32 |
22 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_us_v4i8_out32 |
23 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_uu_v4i8_out32 |
24 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_ss_v4i8_out32 |
25 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_su_v4i8_out32 |
26 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_us_v4i8_out32 |
27 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_uu_v4i8_out32 |
28 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_ss_v4i8_out32 |
29 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_su_v4i8_out32 |
30 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_us_v4i8_out32 |
31 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_uu_v4i8_out32 |
32 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_ss_v4i8_out32 |
33 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_su_v4i8_out32 |
34 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_us_v4i8_out32 |
35 |
dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_uu_v4i8_out32 |
36 |
|
37 |
v2: Update anv-tgl-fails.txt. |
38 |
|
39 |
Reviewed-by: Ivan Briano <ivan.briano@intel.com> |
40 |
Fixes: 0f809dbf404 ("intel/compiler: Basic support for DP4A instruction") |
41 |
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15417> |
42 |
(cherry picked from commit 19330eeb1d65c631ce11a0be1cb13437c6c28491) |
43 |
--- |
44 |
.pick_status.json | 2 +- |
45 |
src/intel/compiler/brw_fs_nir.cpp | 6 +++--- |
46 |
2 files changed, 4 insertions(+), 4 deletions(-) |
47 |
|
48 |
diff --git a/.pick_status.json b/.pick_status.json |
49 |
index 283e3642175..03a0930b88a 100644 |
50 |
--- a/.pick_status.json |
51 |
+++ b/.pick_status.json |
52 |
@@ -490,7 +490,7 @@ |
53 |
"description": "intel/fs: Force destination types on DP4A instructions", |
54 |
"nominated": true, |
55 |
"nomination_type": 1, |
56 |
- "resolution": 0, |
57 |
+ "resolution": 1, |
58 |
"main_sha": null, |
59 |
"because_sha": "0f809dbf4048cbd89c5cf28dbb9ab38cc726fe2a" |
60 |
}, |
61 |
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp |
62 |
index 9376537bd44..e364c373210 100644 |
63 |
--- a/src/intel/compiler/brw_fs_nir.cpp |
64 |
+++ b/src/intel/compiler/brw_fs_nir.cpp |
65 |
@@ -1886,7 +1886,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, |
66 |
|
67 |
case nir_op_sdot_4x8_iadd: |
68 |
case nir_op_sdot_4x8_iadd_sat: |
69 |
- inst = bld.DP4A(result, |
70 |
+ inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_D), |
71 |
retype(op[2], BRW_REGISTER_TYPE_D), |
72 |
retype(op[0], BRW_REGISTER_TYPE_D), |
73 |
retype(op[1], BRW_REGISTER_TYPE_D)); |
74 |
@@ -1897,7 +1897,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, |
75 |
|
76 |
case nir_op_udot_4x8_uadd: |
77 |
case nir_op_udot_4x8_uadd_sat: |
78 |
- inst = bld.DP4A(result, |
79 |
+ inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_UD), |
80 |
retype(op[2], BRW_REGISTER_TYPE_UD), |
81 |
retype(op[0], BRW_REGISTER_TYPE_UD), |
82 |
retype(op[1], BRW_REGISTER_TYPE_UD)); |
83 |
@@ -1908,7 +1908,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, |
84 |
|
85 |
case nir_op_sudot_4x8_iadd: |
86 |
case nir_op_sudot_4x8_iadd_sat: |
87 |
- inst = bld.DP4A(result, |
88 |
+ inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_D), |
89 |
retype(op[2], BRW_REGISTER_TYPE_D), |
90 |
retype(op[0], BRW_REGISTER_TYPE_D), |
91 |
retype(op[1], BRW_REGISTER_TYPE_UD)); |
92 |
-- |
93 |
2.30.2 |
94 |
|