blob: 9a7a61ca5bed71162ba16d6e60181f293c0996df [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "PixelRoutine.hpp"
16
Nicolas Capens68a82382018-10-02 13:16:55 -040017#include "Constants.hpp"
Ben Claytonbc1c0672019-12-17 20:37:37 +000018#include "SamplerCore.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050019#include "Device/Primitive.hpp"
Ben Claytonbc1c0672019-12-17 20:37:37 +000020#include "Device/QuadRasterizer.hpp"
21#include "Device/Renderer.hpp"
Ben Clayton25e06e02020-02-07 11:19:08 +000022#include "System/Debug.hpp"
Chris Forbesc2968062019-03-19 16:48:03 -070023#include "Vulkan/VkPipelineLayout.hpp"
24
Nicolas Capens157ba262019-12-10 17:49:14 -050025namespace sw {
26
27PixelRoutine::PixelRoutine(
Ben Claytonbc1c0672019-12-17 20:37:37 +000028 const PixelProcessor::State &state,
29 vk::PipelineLayout const *pipelineLayout,
30 SpirvShader const *spirvShader,
31 const vk::DescriptorSet::Bindings &descriptorSets)
32 : QuadRasterizer(state, spirvShader)
33 , routine(pipelineLayout)
34 , descriptorSets(descriptorSets)
Nicolas Capens68a82382018-10-02 13:16:55 -040035{
Nicolas Capens81bc9d92019-12-16 15:05:57 -050036 if(spirvShader)
Nicolas Capens68a82382018-10-02 13:16:55 -040037 {
Nicolas Capens157ba262019-12-10 17:49:14 -050038 spirvShader->emitProlog(&routine);
Alexis Hetu8af8b402019-05-28 14:48:19 -040039
Nicolas Capens157ba262019-12-10 17:49:14 -050040 // Clearing inputs to 0 is not demanded by the spec,
41 // but it makes the undefined behavior deterministic.
42 for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
43 {
44 routine.inputs[i] = Float4(0.0f);
45 }
46 }
47}
48
49PixelRoutine::~PixelRoutine()
50{
51}
52
53void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
54{
55 // TODO: consider shader which modifies sample mask in general
56 const bool earlyDepthTest = !spirvShader || (spirvShader->getModes().EarlyFragmentTests && !spirvShader->getModes().DepthReplacing && !state.alphaToCoverage);
57
Ben Claytonbc1c0672019-12-17 20:37:37 +000058 Int zMask[4]; // Depth mask
59 Int sMask[4]; // Stencil mask
Nicolas Capens157ba262019-12-10 17:49:14 -050060
Nicolas Capens11cb8912020-01-14 00:24:05 -050061 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens157ba262019-12-10 17:49:14 -050062 {
63 zMask[q] = cMask[q];
64 sMask[q] = cMask[q];
65 }
66
Nicolas Capens11cb8912020-01-14 00:24:05 -050067 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens157ba262019-12-10 17:49:14 -050068 {
69 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
70 }
71
72 Float4 f;
73 Float4 rhwCentroid;
74
Ben Claytonbc1c0672019-12-17 20:37:37 +000075 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16);
Nicolas Capens157ba262019-12-10 17:49:14 -050076
77 if(interpolateZ())
78 {
Nicolas Capens11cb8912020-01-14 00:24:05 -050079 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens157ba262019-12-10 17:49:14 -050080 {
81 Float4 x = xxxx;
82
Nicolas Capensc514ab02020-01-20 14:42:53 -050083 if(state.enableMultiSampling)
Nicolas Capens68a82382018-10-02 13:16:55 -040084 {
Ben Claytonbc1c0672019-12-17 20:37:37 +000085 x -= *Pointer<Float4>(constants + OFFSET(Constants, X) + q * sizeof(float4));
Nicolas Capens68a82382018-10-02 13:16:55 -040086 }
Nicolas Capens157ba262019-12-10 17:49:14 -050087
Alexis Hetu81e08df2020-04-14 15:57:03 -040088 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive, z), false, false, true);
Nicolas Capens68a82382018-10-02 13:16:55 -040089 }
90 }
91
Nicolas Capens157ba262019-12-10 17:49:14 -050092 Bool depthPass = false;
93
94 if(earlyDepthTest)
Nicolas Capens68a82382018-10-02 13:16:55 -040095 {
Nicolas Capens11cb8912020-01-14 00:24:05 -050096 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens157ba262019-12-10 17:49:14 -050097 {
98 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
99 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400100 }
101
Nicolas Capens157ba262019-12-10 17:49:14 -0500102 If(depthPass || Bool(!earlyDepthTest))
Nicolas Capens68a82382018-10-02 13:16:55 -0400103 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000104 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16);
Nicolas Capens68a82382018-10-02 13:16:55 -0400105
Nicolas Capens157ba262019-12-10 17:49:14 -0500106 // Centroid locations
107 Float4 XXXX = Float4(0.0f);
108 Float4 YYYY = Float4(0.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -0400109
Nicolas Capens157ba262019-12-10 17:49:14 -0500110 if(state.centroid)
Nicolas Capens68a82382018-10-02 13:16:55 -0400111 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500112 Float4 WWWW(1.0e-9f);
Nicolas Capens68a82382018-10-02 13:16:55 -0400113
Nicolas Capens11cb8912020-01-14 00:24:05 -0500114 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400115 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000116 XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]);
117 YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]);
118 WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]);
Nicolas Capens68a82382018-10-02 13:16:55 -0400119 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500120
121 WWWW = Rcp_pp(WWWW);
122 XXXX *= WWWW;
123 YYYY *= WWWW;
124
125 XXXX += xxxx;
126 YYYY += yyyy;
Nicolas Capens68a82382018-10-02 13:16:55 -0400127 }
128
Nicolas Capens157ba262019-12-10 17:49:14 -0500129 if(interpolateW())
Nicolas Capens68a82382018-10-02 13:16:55 -0400130 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000131 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive, w), false, false, false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500132 rhw = reciprocal(w, false, false, true);
Nicolas Capens68a82382018-10-02 13:16:55 -0400133
134 if(state.centroid)
135 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000136 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, w), false, false));
Nicolas Capens68a82382018-10-02 13:16:55 -0400137 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500138 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400139
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500140 if(spirvShader)
Nicolas Capens157ba262019-12-10 17:49:14 -0500141 {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500142 for(int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400143 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500144 auto const &input = spirvShader->inputs[interpolant];
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500145 if(input.Type != SpirvShader::ATTRIBTYPE_UNUSED)
Nicolas Capens68a82382018-10-02 13:16:55 -0400146 {
Nicolas Capensc514ab02020-01-20 14:42:53 -0500147 if(input.Centroid && state.enableMultiSampling)
Nicolas Capens157ba262019-12-10 17:49:14 -0500148 {
149 routine.inputs[interpolant] =
Ben Claytonbc1c0672019-12-17 20:37:37 +0000150 interpolateCentroid(XXXX, YYYY, rhwCentroid,
151 primitive + OFFSET(Primitive, V[interpolant]),
152 input.Flat, !input.NoPerspective);
Nicolas Capens157ba262019-12-10 17:49:14 -0500153 }
154 else
155 {
156 routine.inputs[interpolant] =
Ben Claytonbc1c0672019-12-17 20:37:37 +0000157 interpolate(xxxx, Dv[interpolant], rhw,
158 primitive + OFFSET(Primitive, V[interpolant]),
159 input.Flat, !input.NoPerspective, false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500160 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400161 }
162 }
163
Nicolas Capens157ba262019-12-10 17:49:14 -0500164 setBuiltins(x, y, z, w, cMask);
165
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500166 for(uint32_t i = 0; i < state.numClipDistances; i++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400167 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500168 auto distance = interpolate(xxxx, DclipDistance[i], rhw,
Ben Claytonbc1c0672019-12-17 20:37:37 +0000169 primitive + OFFSET(Primitive, clipDistance[i]),
170 false, true, false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500171
172 auto clipMask = SignMask(CmpGE(distance, SIMD::Float(0)));
Nicolas Capens11cb8912020-01-14 00:24:05 -0500173 for(auto ms = 0u; ms < state.multiSampleCount; ms++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400174 {
Nicolas Capensc514ab02020-01-20 14:42:53 -0500175 // FIXME(b/148105887): Fragments discarded by clipping do not exist at
176 // all -- they should not be counted in queries or have their Z/S effects
177 // performed when early fragment tests are enabled.
Nicolas Capens157ba262019-12-10 17:49:14 -0500178 cMask[ms] &= clipMask;
Nicolas Capens68a82382018-10-02 13:16:55 -0400179 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400180
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500181 if(spirvShader->getUsedCapabilities().ClipDistance)
Ben Clayton9ad035b2019-08-09 23:44:09 +0100182 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500183 auto it = spirvShader->inputBuiltins.find(spv::BuiltInClipDistance);
Ben Clayton9ad035b2019-08-09 23:44:09 +0100184 if(it != spirvShader->inputBuiltins.end())
185 {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500186 if(i < it->second.SizeInComponents)
Ben Clayton9ad035b2019-08-09 23:44:09 +0100187 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500188 routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = distance;
Ben Clayton9ad035b2019-08-09 23:44:09 +0100189 }
190 }
191 }
Chris Forbes6cf65f62019-05-15 13:00:40 -0700192 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400193
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500194 if(spirvShader->getUsedCapabilities().CullDistance)
Nicolas Capens68a82382018-10-02 13:16:55 -0400195 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500196 auto it = spirvShader->inputBuiltins.find(spv::BuiltInCullDistance);
197 if(it != spirvShader->inputBuiltins.end())
198 {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500199 for(uint32_t i = 0; i < state.numCullDistances; i++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500200 {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500201 if(i < it->second.SizeInComponents)
Nicolas Capens157ba262019-12-10 17:49:14 -0500202 {
203 routine.getVariable(it->second.Id)[it->second.FirstComponent + i] =
Ben Claytonbc1c0672019-12-17 20:37:37 +0000204 interpolate(xxxx, DcullDistance[i], rhw,
205 primitive + OFFSET(Primitive, cullDistance[i]),
206 false, true, false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500207 }
208 }
209 }
Chris Forbes71a1e012019-04-22 14:18:34 -0700210 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500211 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400212
Nicolas Capens157ba262019-12-10 17:49:14 -0500213 Bool alphaPass = true;
Nicolas Capens68a82382018-10-02 13:16:55 -0400214
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500215 if(spirvShader)
Nicolas Capens157ba262019-12-10 17:49:14 -0500216 {
217 bool earlyFragTests = (spirvShader && spirvShader->getModes().EarlyFragmentTests);
218 applyShader(cMask, earlyFragTests ? sMask : cMask, earlyDepthTest ? zMask : cMask);
219 }
220
221 alphaPass = alphaTest(cMask);
222
223 if((spirvShader && spirvShader->getModes().ContainsKill) || state.alphaToCoverage)
224 {
Nicolas Capens11cb8912020-01-14 00:24:05 -0500225 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500226 {
227 zMask[q] &= cMask[q];
228 sMask[q] &= cMask[q];
229 }
230 }
231
232 If(alphaPass)
233 {
234 if(!earlyDepthTest)
Chris Forbes71a1e012019-04-22 14:18:34 -0700235 {
Nicolas Capens11cb8912020-01-14 00:24:05 -0500236 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400237 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500238 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
Nicolas Capens68a82382018-10-02 13:16:55 -0400239 }
240 }
241
Nicolas Capens157ba262019-12-10 17:49:14 -0500242 If(depthPass || Bool(earlyDepthTest))
Nicolas Capens68a82382018-10-02 13:16:55 -0400243 {
Nicolas Capens11cb8912020-01-14 00:24:05 -0500244 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400245 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500246 if(state.multiSampleMask & (1 << q))
Nicolas Capens68a82382018-10-02 13:16:55 -0400247 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500248 writeDepth(zBuffer, q, x, z[q], zMask[q]);
Nicolas Capens68a82382018-10-02 13:16:55 -0400249
Nicolas Capens157ba262019-12-10 17:49:14 -0500250 if(state.occlusionEnabled)
Nicolas Capens68a82382018-10-02 13:16:55 -0400251 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000252 occlusion += *Pointer<UInt>(constants + OFFSET(Constants, occlusionCount) + 4 * (zMask[q] & sMask[q]));
Nicolas Capens68a82382018-10-02 13:16:55 -0400253 }
254 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400255 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400256
Nicolas Capens157ba262019-12-10 17:49:14 -0500257 rasterOperation(cBuffer, x, sMask, zMask, cMask);
Nicolas Capens68a82382018-10-02 13:16:55 -0400258 }
259 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400260 }
261
Nicolas Capens11cb8912020-01-14 00:24:05 -0500262 for(unsigned int q = 0; q < state.multiSampleCount; q++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400263 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500264 if(state.multiSampleMask & (1 << q))
Nicolas Capens68a82382018-10-02 13:16:55 -0400265 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500266 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
Nicolas Capens68a82382018-10-02 13:16:55 -0400267 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400268 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500269}
Nicolas Capens68a82382018-10-02 13:16:55 -0400270
Nicolas Capens157ba262019-12-10 17:49:14 -0500271Float4 PixelRoutine::interpolateCentroid(const Float4 &x, const Float4 &y, const Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
272{
Ben Claytonbc1c0672019-12-17 20:37:37 +0000273 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, C), 16);
Nicolas Capens157ba262019-12-10 17:49:14 -0500274
275 if(!flat)
Nicolas Capens68a82382018-10-02 13:16:55 -0400276 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000277 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16) +
278 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, B), 16);
Nicolas Capens157ba262019-12-10 17:49:14 -0500279
280 if(perspective)
Nicolas Capens68a82382018-10-02 13:16:55 -0400281 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500282 interpolant *= rhw;
Nicolas Capens68a82382018-10-02 13:16:55 -0400283 }
284 }
285
Nicolas Capens157ba262019-12-10 17:49:14 -0500286 return interpolant;
287}
288
289void PixelRoutine::stencilTest(const Pointer<Byte> &sBuffer, int q, const Int &x, Int &sMask, const Int &cMask)
290{
291 if(!state.stencilActive)
Nicolas Capens68a82382018-10-02 13:16:55 -0400292 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500293 return;
Nicolas Capens68a82382018-10-02 13:16:55 -0400294 }
295
Nicolas Capens157ba262019-12-10 17:49:14 -0500296 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
297
298 Pointer<Byte> buffer = sBuffer + x;
299
300 if(q > 0)
Chris Forbesbea47512019-03-12 14:50:55 -0700301 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000302 buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB));
Chris Forbesbea47512019-03-12 14:50:55 -0700303 }
304
Nicolas Capens157ba262019-12-10 17:49:14 -0500305 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
306 Byte8 value = *Pointer<Byte8>(buffer) & Byte8(-1, -1, 0, 0, 0, 0, 0, 0);
307 value = value | (*Pointer<Byte8>(buffer + pitch - 2) & Byte8(0, 0, -1, -1, 0, 0, 0, 0));
308 Byte8 valueBack = value;
Chris Forbesbea47512019-03-12 14:50:55 -0700309
Nicolas Capens157ba262019-12-10 17:49:14 -0500310 if(state.frontStencil.compareMask != 0xff)
311 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000312 value &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].testMaskQ));
Chris Forbesbea47512019-03-12 14:50:55 -0700313 }
314
Nicolas Capens157ba262019-12-10 17:49:14 -0500315 stencilTest(value, state.frontStencil.compareOp, false);
316
317 if(state.backStencil.compareMask != 0xff)
Nicolas Capens68a82382018-10-02 13:16:55 -0400318 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000319 valueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].testMaskQ));
Nicolas Capens68a82382018-10-02 13:16:55 -0400320 }
321
Nicolas Capens157ba262019-12-10 17:49:14 -0500322 stencilTest(valueBack, state.backStencil.compareOp, true);
323
Ben Claytonbc1c0672019-12-17 20:37:37 +0000324 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask));
325 valueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask));
Nicolas Capens157ba262019-12-10 17:49:14 -0500326 value |= valueBack;
327
328 sMask = SignMask(value) & cMask;
329}
330
331void PixelRoutine::stencilTest(Byte8 &value, VkCompareOp stencilCompareMode, bool isBack)
332{
333 Byte8 equal;
334
335 switch(stencilCompareMode)
Nicolas Capens68a82382018-10-02 13:16:55 -0400336 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000337 case VK_COMPARE_OP_ALWAYS:
338 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
339 break;
340 case VK_COMPARE_OP_NEVER:
341 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
342 break;
343 case VK_COMPARE_OP_LESS: // a < b ~ b > a
344 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
345 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
346 break;
347 case VK_COMPARE_OP_EQUAL:
348 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
349 break;
350 case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b)
351 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
352 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
353 break;
354 case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ (b > a) || (a == b)
355 equal = value;
356 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedQ)));
357 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
358 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
359 value |= equal;
360 break;
361 case VK_COMPARE_OP_GREATER: // a > b
362 equal = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ));
363 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
364 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
365 value = equal;
366 break;
367 case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ !(a < b) ~ !(b > a)
368 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
369 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData, stencil[isBack].referenceMaskedSignedQ)));
370 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
371 break;
372 default:
Nicolas Capens865f8892020-01-21 14:27:10 -0500373 UNSUPPORTED("VkCompareOp: %d", int(stencilCompareMode));
Nicolas Capens157ba262019-12-10 17:49:14 -0500374 }
375}
Nicolas Capens68a82382018-10-02 13:16:55 -0400376
Nicolas Capens157ba262019-12-10 17:49:14 -0500377Bool PixelRoutine::depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
378{
379 Float4 Z = z;
Nicolas Capens68a82382018-10-02 13:16:55 -0400380
Nicolas Capens157ba262019-12-10 17:49:14 -0500381 if(spirvShader && spirvShader->getModes().DepthReplacing)
382 {
383 Z = oDepth;
384 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400385
Nicolas Capens157ba262019-12-10 17:49:14 -0500386 Pointer<Byte> buffer = zBuffer + 4 * x;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000387 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
Nicolas Capens68a82382018-10-02 13:16:55 -0400388
Nicolas Capens157ba262019-12-10 17:49:14 -0500389 if(q > 0)
390 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000391 buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -0500392 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400393
Nicolas Capens157ba262019-12-10 17:49:14 -0500394 Float4 zValue;
Nicolas Capens68a82382018-10-02 13:16:55 -0400395
Nicolas Capens157ba262019-12-10 17:49:14 -0500396 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
397 {
Alexis Hetu0d0ed6f2019-11-20 10:40:53 -0500398 // FIXME: Properly optimizes?
Nicolas Capens157ba262019-12-10 17:49:14 -0500399 zValue.xy = *Pointer<Float4>(buffer);
400 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
Nicolas Capens68a82382018-10-02 13:16:55 -0400401 }
402
Nicolas Capens157ba262019-12-10 17:49:14 -0500403 Int4 zTest;
404
405 switch(state.depthCompareMode)
Chris Forbesbea47512019-03-12 14:50:55 -0700406 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000407 case VK_COMPARE_OP_ALWAYS:
408 // Optimized
409 break;
410 case VK_COMPARE_OP_NEVER:
411 // Optimized
412 break;
413 case VK_COMPARE_OP_EQUAL:
414 zTest = CmpEQ(zValue, Z);
415 break;
416 case VK_COMPARE_OP_NOT_EQUAL:
417 zTest = CmpNEQ(zValue, Z);
418 break;
419 case VK_COMPARE_OP_LESS:
420 zTest = CmpNLE(zValue, Z);
421 break;
422 case VK_COMPARE_OP_GREATER_OR_EQUAL:
423 zTest = CmpLE(zValue, Z);
424 break;
425 case VK_COMPARE_OP_LESS_OR_EQUAL:
426 zTest = CmpNLT(zValue, Z);
427 break;
428 case VK_COMPARE_OP_GREATER:
429 zTest = CmpLT(zValue, Z);
430 break;
431 default:
Nicolas Capens865f8892020-01-21 14:27:10 -0500432 UNSUPPORTED("VkCompareOp: %d", int(state.depthCompareMode));
Nicolas Capens157ba262019-12-10 17:49:14 -0500433 }
Chris Forbesbea47512019-03-12 14:50:55 -0700434
Nicolas Capens157ba262019-12-10 17:49:14 -0500435 switch(state.depthCompareMode)
436 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000437 case VK_COMPARE_OP_ALWAYS:
438 zMask = cMask;
439 break;
440 case VK_COMPARE_OP_NEVER:
441 zMask = 0x0;
442 break;
443 default:
444 zMask = SignMask(zTest) & cMask;
445 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500446 }
Chris Forbesbea47512019-03-12 14:50:55 -0700447
Nicolas Capens157ba262019-12-10 17:49:14 -0500448 if(state.stencilActive)
449 {
450 zMask &= sMask;
451 }
Chris Forbesbea47512019-03-12 14:50:55 -0700452
Nicolas Capens157ba262019-12-10 17:49:14 -0500453 return zMask != 0;
454}
Chris Forbesbea47512019-03-12 14:50:55 -0700455
Nicolas Capens157ba262019-12-10 17:49:14 -0500456Bool PixelRoutine::depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
457{
458 Short4 Z = convertFixed16(z, true);
Chris Forbesbea47512019-03-12 14:50:55 -0700459
Nicolas Capens157ba262019-12-10 17:49:14 -0500460 if(spirvShader && spirvShader->getModes().DepthReplacing)
461 {
462 Z = convertFixed16(oDepth, true);
463 }
Chris Forbesbea47512019-03-12 14:50:55 -0700464
Nicolas Capens157ba262019-12-10 17:49:14 -0500465 Pointer<Byte> buffer = zBuffer + 2 * x;
466 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
Chris Forbesbea47512019-03-12 14:50:55 -0700467
Nicolas Capens157ba262019-12-10 17:49:14 -0500468 if(q > 0)
469 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000470 buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -0500471 }
472
473 Short4 zValue;
474
475 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
476 {
Alexis Hetu0d0ed6f2019-11-20 10:40:53 -0500477 // FIXME: Properly optimizes?
Nicolas Capens157ba262019-12-10 17:49:14 -0500478 zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
479 zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
Chris Forbesbea47512019-03-12 14:50:55 -0700480 }
481
Nicolas Capens157ba262019-12-10 17:49:14 -0500482 Int4 zTest;
483
484 // Bias values to make unsigned compares out of Reactor's (due SSE's) signed compares only
485 zValue = zValue - Short4(0x8000u);
486 Z = Z - Short4(0x8000u);
487
488 switch(state.depthCompareMode)
Chris Forbesbea47512019-03-12 14:50:55 -0700489 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000490 case VK_COMPARE_OP_ALWAYS:
491 // Optimized
492 break;
493 case VK_COMPARE_OP_NEVER:
494 // Optimized
495 break;
496 case VK_COMPARE_OP_EQUAL:
497 zTest = Int4(CmpEQ(zValue, Z));
498 break;
499 case VK_COMPARE_OP_NOT_EQUAL:
500 zTest = ~Int4(CmpEQ(zValue, Z));
501 break;
502 case VK_COMPARE_OP_LESS:
503 zTest = Int4(CmpGT(zValue, Z));
504 break;
505 case VK_COMPARE_OP_GREATER_OR_EQUAL:
506 zTest = ~Int4(CmpGT(zValue, Z));
507 break;
508 case VK_COMPARE_OP_LESS_OR_EQUAL:
509 zTest = ~Int4(CmpGT(Z, zValue));
510 break;
511 case VK_COMPARE_OP_GREATER:
512 zTest = Int4(CmpGT(Z, zValue));
513 break;
514 default:
Nicolas Capens865f8892020-01-21 14:27:10 -0500515 UNSUPPORTED("VkCompareOp: %d", int(state.depthCompareMode));
Nicolas Capens157ba262019-12-10 17:49:14 -0500516 }
517
518 switch(state.depthCompareMode)
519 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000520 case VK_COMPARE_OP_ALWAYS:
521 zMask = cMask;
522 break;
523 case VK_COMPARE_OP_NEVER:
524 zMask = 0x0;
525 break;
526 default:
527 zMask = SignMask(zTest) & cMask;
528 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500529 }
530
531 if(state.stencilActive)
532 {
533 zMask &= sMask;
534 }
535
536 return zMask != 0;
537}
538
539Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
540{
541 if(!state.depthTestActive)
542 {
543 return true;
544 }
545
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500546 if(state.depthFormat == VK_FORMAT_D16_UNORM)
Nicolas Capens157ba262019-12-10 17:49:14 -0500547 return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask);
548 else
549 return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask);
550}
551
552void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha)
553{
Ben Claytonbc1c0672019-12-17 20:37:37 +0000554 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c0)));
555 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c1)));
556 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c2)));
557 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData, a2c3)));
Nicolas Capens157ba262019-12-10 17:49:14 -0500558
559 Int aMask0 = SignMask(coverage0);
560 Int aMask1 = SignMask(coverage1);
561 Int aMask2 = SignMask(coverage2);
562 Int aMask3 = SignMask(coverage3);
563
564 cMask[0] &= aMask0;
565 cMask[1] &= aMask1;
566 cMask[2] &= aMask2;
567 cMask[3] &= aMask3;
568}
569
570void PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
571{
572 Float4 Z = z;
573
574 if(spirvShader && spirvShader->getModes().DepthReplacing)
575 {
576 Z = oDepth;
577 }
578
579 Pointer<Byte> buffer = zBuffer + 4 * x;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000580 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
Nicolas Capens157ba262019-12-10 17:49:14 -0500581
582 if(q > 0)
583 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000584 buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -0500585 }
586
587 Float4 zValue;
588
589 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
590 {
591 // FIXME: Properly optimizes?
592 zValue.xy = *Pointer<Float4>(buffer);
593 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
594 }
595
Ben Claytonbc1c0672019-12-17 20:37:37 +0000596 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + zMask * 16, 16));
597 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + zMask * 16, 16));
Nicolas Capens157ba262019-12-10 17:49:14 -0500598 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
599
600 // FIXME: Properly optimizes?
601 *Pointer<Float2>(buffer) = Float2(Z.xy);
602 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
603}
604
605void PixelRoutine::writeDepth16(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
606{
607 Short4 Z = As<Short4>(convertFixed16(z, true));
608
609 if(spirvShader && spirvShader->getModes().DepthReplacing)
610 {
611 Z = As<Short4>(convertFixed16(oDepth, true));
612 }
613
614 Pointer<Byte> buffer = zBuffer + 2 * x;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000615 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
Nicolas Capens157ba262019-12-10 17:49:14 -0500616
617 if(q > 0)
618 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000619 buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -0500620 }
621
622 Short4 zValue;
623
624 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
625 {
626 // FIXME: Properly optimizes?
627 zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
628 zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
629 }
630
Ben Claytonbc1c0672019-12-17 20:37:37 +0000631 Z = Z & *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q) + zMask * 8, 8);
632 zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q) + zMask * 8, 8);
Nicolas Capens157ba262019-12-10 17:49:14 -0500633 Z = Z | zValue;
634
635 // FIXME: Properly optimizes?
636 *Pointer<Short>(buffer) = Extract(Z, 0);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000637 *Pointer<Short>(buffer + 2) = Extract(Z, 1);
638 *Pointer<Short>(buffer + pitch) = Extract(Z, 2);
639 *Pointer<Short>(buffer + pitch + 2) = Extract(Z, 3);
Nicolas Capens157ba262019-12-10 17:49:14 -0500640}
641
642void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
643{
644 if(!state.depthWriteEnable)
645 {
646 return;
647 }
648
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500649 if(state.depthFormat == VK_FORMAT_D16_UNORM)
Nicolas Capens157ba262019-12-10 17:49:14 -0500650 writeDepth16(zBuffer, q, x, z, zMask);
651 else
652 writeDepth32F(zBuffer, q, x, z, zMask);
653}
654
655void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, const Int &x, const Int &sMask, const Int &zMask, const Int &cMask)
656{
657 if(!state.stencilActive)
658 {
659 return;
660 }
661
662 if(state.frontStencil.passOp == VK_STENCIL_OP_KEEP && state.frontStencil.depthFailOp == VK_STENCIL_OP_KEEP && state.frontStencil.failOp == VK_STENCIL_OP_KEEP)
663 {
664 if(state.backStencil.passOp == VK_STENCIL_OP_KEEP && state.backStencil.depthFailOp == VK_STENCIL_OP_KEEP && state.backStencil.failOp == VK_STENCIL_OP_KEEP)
Chris Forbesbea47512019-03-12 14:50:55 -0700665 {
666 return;
667 }
Chris Forbesbea47512019-03-12 14:50:55 -0700668 }
669
Nicolas Capens157ba262019-12-10 17:49:14 -0500670 if((state.frontStencil.writeMask == 0) && (state.backStencil.writeMask == 0))
Nicolas Capens68a82382018-10-02 13:16:55 -0400671 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500672 return;
Nicolas Capens68a82382018-10-02 13:16:55 -0400673 }
674
Nicolas Capens157ba262019-12-10 17:49:14 -0500675 Pointer<Byte> buffer = sBuffer + x;
676
677 if(q > 0)
Nicolas Capens68a82382018-10-02 13:16:55 -0400678 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000679 buffer += q * *Pointer<Int>(data + OFFSET(DrawData, stencilSliceB));
Nicolas Capens68a82382018-10-02 13:16:55 -0400680 }
681
Nicolas Capens157ba262019-12-10 17:49:14 -0500682 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
683 Byte8 bufferValue = *Pointer<Byte8>(buffer) & Byte8(-1, -1, 0, 0, 0, 0, 0, 0);
684 bufferValue = bufferValue | (*Pointer<Byte8>(buffer + pitch - 2) & Byte8(0, 0, -1, -1, 0, 0, 0, 0));
685 Byte8 newValue;
686 stencilOperation(newValue, bufferValue, state.frontStencil, false, zMask, sMask);
687
Ben Claytonbc1c0672019-12-17 20:37:37 +0000688 if((state.frontStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer
Nicolas Capens68a82382018-10-02 13:16:55 -0400689 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500690 Byte8 maskedValue = bufferValue;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000691 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].writeMaskQ));
692 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[0].invWriteMaskQ));
Nicolas Capens157ba262019-12-10 17:49:14 -0500693 newValue |= maskedValue;
Nicolas Capens68a82382018-10-02 13:16:55 -0400694 }
695
Nicolas Capens157ba262019-12-10 17:49:14 -0500696 Byte8 newValueBack;
697
698 stencilOperation(newValueBack, bufferValue, state.backStencil, true, zMask, sMask);
699
Ben Claytonbc1c0672019-12-17 20:37:37 +0000700 if((state.backStencil.writeMask & 0xFF) != 0xFF) // Assume 8-bit stencil buffer
Nicolas Capens68a82382018-10-02 13:16:55 -0400701 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500702 Byte8 maskedValue = bufferValue;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000703 newValueBack &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].writeMaskQ));
704 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData, stencil[1].invWriteMaskQ));
Nicolas Capens157ba262019-12-10 17:49:14 -0500705 newValueBack |= maskedValue;
Nicolas Capens68a82382018-10-02 13:16:55 -0400706 }
707
Ben Claytonbc1c0672019-12-17 20:37:37 +0000708 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive, clockwiseMask));
709 newValueBack &= *Pointer<Byte8>(primitive + OFFSET(Primitive, invClockwiseMask));
Nicolas Capens157ba262019-12-10 17:49:14 -0500710 newValue |= newValueBack;
711
Ben Claytonbc1c0672019-12-17 20:37:37 +0000712 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * cMask);
713 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * cMask);
Nicolas Capens157ba262019-12-10 17:49:14 -0500714 newValue |= bufferValue;
715
716 *Pointer<Short>(buffer) = Extract(As<Short4>(newValue), 0);
717 *Pointer<Short>(buffer + pitch) = Extract(As<Short4>(newValue), 1);
718}
719
720void PixelRoutine::stencilOperation(Byte8 &newValue, const Byte8 &bufferValue, const PixelProcessor::States::StencilOpState &ops, bool isBack, const Int &zMask, const Int &sMask)
721{
722 Byte8 &pass = newValue;
723 Byte8 fail;
724 Byte8 zFail;
725
726 stencilOperation(pass, bufferValue, ops.passOp, isBack);
727
728 if(ops.depthFailOp != ops.passOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400729 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500730 stencilOperation(zFail, bufferValue, ops.depthFailOp, isBack);
Nicolas Capens68a82382018-10-02 13:16:55 -0400731 }
732
Nicolas Capens157ba262019-12-10 17:49:14 -0500733 if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400734 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500735 stencilOperation(fail, bufferValue, ops.failOp, isBack);
Nicolas Capens68a82382018-10-02 13:16:55 -0400736 }
737
Nicolas Capens157ba262019-12-10 17:49:14 -0500738 if(ops.failOp != ops.passOp || ops.failOp != ops.depthFailOp)
Nicolas Capens68a82382018-10-02 13:16:55 -0400739 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000740 if(state.depthTestActive && ops.depthFailOp != ops.passOp) // zMask valid and values not the same
Nicolas Capens68a82382018-10-02 13:16:55 -0400741 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000742 pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * zMask);
743 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * zMask);
Nicolas Capens157ba262019-12-10 17:49:14 -0500744 pass |= zFail;
Nicolas Capens68a82382018-10-02 13:16:55 -0400745 }
746
Ben Claytonbc1c0672019-12-17 20:37:37 +0000747 pass &= *Pointer<Byte8>(constants + OFFSET(Constants, maskB4Q) + 8 * sMask);
748 fail &= *Pointer<Byte8>(constants + OFFSET(Constants, invMaskB4Q) + 8 * sMask);
Nicolas Capens157ba262019-12-10 17:49:14 -0500749 pass |= fail;
750 }
751}
752
Alexis Hetu1ee36c92020-02-20 14:07:26 -0500753Byte8 PixelRoutine::stencilReplaceRef(bool isBack)
754{
755 if(spirvShader)
756 {
757 auto it = spirvShader->outputBuiltins.find(spv::BuiltInFragStencilRefEXT);
758 if(it != spirvShader->outputBuiltins.end())
759 {
760 UInt4 sRef = As<UInt4>(routine.getVariable(it->second.Id)[it->second.FirstComponent]) & UInt4(0xff);
761 // TODO (b/148295813): Could be done with a single pshufb instruction. Optimize the
762 // following line by either adding a rr::Shuffle() variant to do
763 // it explicitly or adding a Byte4(Int4) constructor would work.
764 sRef.x = rr::UInt(sRef.x) | (rr::UInt(sRef.y) << 8) | (rr::UInt(sRef.z) << 16) | (rr::UInt(sRef.w) << 24);
765
766 UInt2 sRefDuplicated;
767 sRefDuplicated = Insert(sRefDuplicated, sRef.x, 0);
768 sRefDuplicated = Insert(sRefDuplicated, sRef.x, 1);
769 return As<Byte8>(sRefDuplicated);
770 }
771 }
772
773 return *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ));
774}
775
Nicolas Capens157ba262019-12-10 17:49:14 -0500776void PixelRoutine::stencilOperation(Byte8 &output, const Byte8 &bufferValue, VkStencilOp operation, bool isBack)
777{
778 switch(operation)
779 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000780 case VK_STENCIL_OP_KEEP:
781 output = bufferValue;
782 break;
783 case VK_STENCIL_OP_ZERO:
784 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
785 break;
786 case VK_STENCIL_OP_REPLACE:
Alexis Hetu1ee36c92020-02-20 14:07:26 -0500787 output = stencilReplaceRef(isBack);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000788 break;
789 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
790 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
791 break;
792 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
793 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
794 break;
795 case VK_STENCIL_OP_INVERT:
796 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
797 break;
798 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
799 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
800 break;
801 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
802 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
803 break;
804 default:
Nicolas Capens865f8892020-01-21 14:27:10 -0500805 UNSUPPORTED("VkStencilOp: %d", int(operation));
Nicolas Capens157ba262019-12-10 17:49:14 -0500806 }
807}
808
809void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorActive)
810{
811 switch(blendFactorActive)
812 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000813 case VK_BLEND_FACTOR_ZERO:
814 // Optimized
815 break;
816 case VK_BLEND_FACTOR_ONE:
817 // Optimized
818 break;
819 case VK_BLEND_FACTOR_SRC_COLOR:
820 blendFactor.x = current.x;
821 blendFactor.y = current.y;
822 blendFactor.z = current.z;
823 break;
824 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
825 blendFactor.x = Short4(0xFFFFu) - current.x;
826 blendFactor.y = Short4(0xFFFFu) - current.y;
827 blendFactor.z = Short4(0xFFFFu) - current.z;
828 break;
829 case VK_BLEND_FACTOR_DST_COLOR:
830 blendFactor.x = pixel.x;
831 blendFactor.y = pixel.y;
832 blendFactor.z = pixel.z;
833 break;
834 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
835 blendFactor.x = Short4(0xFFFFu) - pixel.x;
836 blendFactor.y = Short4(0xFFFFu) - pixel.y;
837 blendFactor.z = Short4(0xFFFFu) - pixel.z;
838 break;
839 case VK_BLEND_FACTOR_SRC_ALPHA:
840 blendFactor.x = current.w;
841 blendFactor.y = current.w;
842 blendFactor.z = current.w;
843 break;
844 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
845 blendFactor.x = Short4(0xFFFFu) - current.w;
846 blendFactor.y = Short4(0xFFFFu) - current.w;
847 blendFactor.z = Short4(0xFFFFu) - current.w;
848 break;
849 case VK_BLEND_FACTOR_DST_ALPHA:
850 blendFactor.x = pixel.w;
851 blendFactor.y = pixel.w;
852 blendFactor.z = pixel.w;
853 break;
854 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
855 blendFactor.x = Short4(0xFFFFu) - pixel.w;
856 blendFactor.y = Short4(0xFFFFu) - pixel.w;
857 blendFactor.z = Short4(0xFFFFu) - pixel.w;
858 break;
859 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
860 blendFactor.x = Short4(0xFFFFu) - pixel.w;
861 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
862 blendFactor.y = blendFactor.x;
863 blendFactor.z = blendFactor.x;
864 break;
865 case VK_BLEND_FACTOR_CONSTANT_COLOR:
866 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[0]));
867 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[1]));
868 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[2]));
869 break;
870 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
871 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[0]));
872 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[1]));
873 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[2]));
874 break;
875 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
876 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
877 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
878 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
879 break;
880 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
881 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
882 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
883 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
884 break;
885 default:
Nicolas Capens865f8892020-01-21 14:27:10 -0500886 UNSUPPORTED("VkBlendFactor: %d", int(blendFactorActive));
Nicolas Capens157ba262019-12-10 17:49:14 -0500887 }
888}
889
890void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorAlphaActive)
891{
892 switch(blendFactorAlphaActive)
893 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000894 case VK_BLEND_FACTOR_ZERO:
895 // Optimized
896 break;
897 case VK_BLEND_FACTOR_ONE:
898 // Optimized
899 break;
900 case VK_BLEND_FACTOR_SRC_COLOR:
901 blendFactor.w = current.w;
902 break;
903 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
904 blendFactor.w = Short4(0xFFFFu) - current.w;
905 break;
906 case VK_BLEND_FACTOR_DST_COLOR:
907 blendFactor.w = pixel.w;
908 break;
909 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
910 blendFactor.w = Short4(0xFFFFu) - pixel.w;
911 break;
912 case VK_BLEND_FACTOR_SRC_ALPHA:
913 blendFactor.w = current.w;
914 break;
915 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
916 blendFactor.w = Short4(0xFFFFu) - current.w;
917 break;
918 case VK_BLEND_FACTOR_DST_ALPHA:
919 blendFactor.w = pixel.w;
920 break;
921 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
922 blendFactor.w = Short4(0xFFFFu) - pixel.w;
923 break;
924 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
925 blendFactor.w = Short4(0xFFFFu);
926 break;
927 case VK_BLEND_FACTOR_CONSTANT_COLOR:
928 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
929 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
930 break;
931 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
932 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
933 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
934 break;
935 default:
Nicolas Capens865f8892020-01-21 14:27:10 -0500936 UNSUPPORTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
Nicolas Capens157ba262019-12-10 17:49:14 -0500937 }
938}
939
940bool PixelRoutine::isSRGB(int index) const
941{
942 return vk::Format(state.targetFormat[index]).isSRGBformat();
943}
944
945void PixelRoutine::readPixel(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &pixel)
946{
947 Short4 c01;
948 Short4 c23;
Alexis Hetu24c49dd2019-12-13 16:32:43 -0500949 Pointer<Byte> buffer = cBuffer;
Nicolas Capens157ba262019-12-10 17:49:14 -0500950 Pointer<Byte> buffer2;
951
Alexis Hetuf97fb9d2019-12-17 14:40:19 -0500952 Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
953
Nicolas Capens157ba262019-12-10 17:49:14 -0500954 switch(state.targetFormat[index])
955 {
Ben Claytonbc1c0672019-12-17 20:37:37 +0000956 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
957 buffer += 2 * x;
Alexis Hetuf97fb9d2019-12-17 14:40:19 -0500958 buffer2 = buffer + pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000959 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
Nicolas Capens157ba262019-12-10 17:49:14 -0500960
Ben Claytonbc1c0672019-12-17 20:37:37 +0000961 pixel.x = (c01 & Short4(0x7C00u)) << 1;
962 pixel.y = (c01 & Short4(0x03E0u)) << 6;
963 pixel.z = (c01 & Short4(0x001Fu)) << 11;
964 pixel.w = (c01 & Short4(0x8000u)) >> 15;
Nicolas Capens157ba262019-12-10 17:49:14 -0500965
Ben Claytonbc1c0672019-12-17 20:37:37 +0000966 // Expand to 16 bit range
967 pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
968 pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
969 pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
970 pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
971 pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
972 pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
973 break;
974 case VK_FORMAT_R5G6B5_UNORM_PACK16:
975 buffer += 2 * x;
Alexis Hetuf97fb9d2019-12-17 14:40:19 -0500976 buffer2 = buffer + pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000977 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
Nicolas Capens157ba262019-12-10 17:49:14 -0500978
Ben Claytonbc1c0672019-12-17 20:37:37 +0000979 pixel.x = c01 & Short4(0xF800u);
980 pixel.y = (c01 & Short4(0x07E0u)) << 5;
981 pixel.z = (c01 & Short4(0x001Fu)) << 11;
982 pixel.w = Short4(0xFFFFu);
Nicolas Capens157ba262019-12-10 17:49:14 -0500983
Ben Claytonbc1c0672019-12-17 20:37:37 +0000984 // Expand to 16 bit range
985 pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
986 pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
987 pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6);
988 pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12);
989 pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
990 pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
991 break;
992 case VK_FORMAT_B8G8R8A8_UNORM:
993 case VK_FORMAT_B8G8R8A8_SRGB:
994 buffer += 4 * x;
995 c01 = *Pointer<Short4>(buffer);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -0500996 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000997 c23 = *Pointer<Short4>(buffer);
998 pixel.z = c01;
999 pixel.y = c01;
1000 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1001 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1002 pixel.x = pixel.z;
1003 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1004 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1005 pixel.y = pixel.z;
1006 pixel.w = pixel.x;
1007 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1008 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1009 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1010 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1011 break;
1012 case VK_FORMAT_R8G8B8A8_UNORM:
1013 case VK_FORMAT_R8G8B8A8_SRGB:
1014 buffer += 4 * x;
1015 c01 = *Pointer<Short4>(buffer);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001016 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001017 c23 = *Pointer<Short4>(buffer);
1018 pixel.z = c01;
1019 pixel.y = c01;
1020 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1021 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1022 pixel.x = pixel.z;
1023 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1024 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1025 pixel.y = pixel.z;
1026 pixel.w = pixel.x;
1027 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1028 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1029 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1030 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1031 break;
1032 case VK_FORMAT_R8_UNORM:
1033 buffer += 1 * x;
1034 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001035 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001036 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
1037 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1038 pixel.y = Short4(0x0000);
1039 pixel.z = Short4(0x0000);
1040 pixel.w = Short4(0xFFFFu);
1041 break;
1042 case VK_FORMAT_R8G8_UNORM:
1043 buffer += 2 * x;
1044 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001045 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001046 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
1047 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
1048 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
1049 pixel.z = Short4(0x0000u);
1050 pixel.w = Short4(0xFFFFu);
1051 break;
1052 case VK_FORMAT_R16G16B16A16_UNORM:
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001053 buffer += 8 * x;
1054 pixel.x = *Pointer<Short4>(buffer + 0);
1055 pixel.y = *Pointer<Short4>(buffer + 8);
1056 buffer += pitchB;
1057 pixel.z = *Pointer<Short4>(buffer + 0);
1058 pixel.w = *Pointer<Short4>(buffer + 8);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001059 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
1060 break;
1061 case VK_FORMAT_R16G16_UNORM:
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001062 buffer += 4 * x;
1063 pixel.x = *Pointer<Short4>(buffer);
1064 buffer += pitchB;
1065 pixel.y = *Pointer<Short4>(buffer);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001066 pixel.z = pixel.x;
1067 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1068 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1069 pixel.y = pixel.z;
1070 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1071 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1072 pixel.z = Short4(0xFFFFu);
1073 pixel.w = Short4(0xFFFFu);
1074 break;
1075 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1076 {
1077 Int4 v = Int4(0);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001078 buffer += 4 * x;
1079 v = Insert(v, *Pointer<Int>(buffer + 0), 0);
1080 v = Insert(v, *Pointer<Int>(buffer + 4), 1);
1081 buffer += pitchB;
1082 v = Insert(v, *Pointer<Int>(buffer + 0), 2);
1083 v = Insert(v, *Pointer<Int>(buffer + 4), 3);
Nicolas Capens157ba262019-12-10 17:49:14 -05001084
Nicolas Capens0405ba02020-01-16 01:19:21 -05001085 pixel = a2b10g10r10Unpack(v);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001086 }
1087 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001088 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1089 {
1090 Int4 v = Int4(0);
1091 v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0);
1092 v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1);
1093 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1094 v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
1095 v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
1096
Nicolas Capens0405ba02020-01-16 01:19:21 -05001097 pixel = a2r10g10b10Unpack(v);
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001098 }
1099 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001100 default:
Nicolas Capensae100792020-06-11 13:48:40 -04001101 UNSUPPORTED("VkFormat %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001102 }
1103
Nicolas Capens157ba262019-12-10 17:49:14 -05001104 if(isSRGB(index))
Nicolas Capens68a82382018-10-02 13:16:55 -04001105 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001106 sRGBtoLinear16_12_16(pixel);
1107 }
1108}
Nicolas Capens68a82382018-10-02 13:16:55 -04001109
Nicolas Capens157ba262019-12-10 17:49:14 -05001110void PixelRoutine::alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s &current, const Int &x)
1111{
Nicolas Capensae100792020-06-11 13:48:40 -04001112 if(!state.blendState[index].alphaBlendEnable)
Nicolas Capens157ba262019-12-10 17:49:14 -05001113 {
1114 return;
Nicolas Capens68a82382018-10-02 13:16:55 -04001115 }
1116
Nicolas Capensae100792020-06-11 13:48:40 -04001117 ASSERT(state.targetFormat[index].supportsColorAttachmentBlend());
1118
Nicolas Capens157ba262019-12-10 17:49:14 -05001119 Vector4s pixel;
1120 readPixel(index, cBuffer, x, pixel);
1121
1122 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
1123 Vector4s sourceFactor;
1124 Vector4s destFactor;
1125
1126 blendFactor(sourceFactor, current, pixel, state.blendState[index].sourceBlendFactor);
1127 blendFactor(destFactor, current, pixel, state.blendState[index].destBlendFactor);
1128
1129 if(state.blendState[index].sourceBlendFactor != VK_BLEND_FACTOR_ONE && state.blendState[index].sourceBlendFactor != VK_BLEND_FACTOR_ZERO)
Nicolas Capens68a82382018-10-02 13:16:55 -04001130 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001131 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1132 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1133 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
1134 }
1135
1136 if(state.blendState[index].destBlendFactor != VK_BLEND_FACTOR_ONE && state.blendState[index].destBlendFactor != VK_BLEND_FACTOR_ZERO)
1137 {
1138 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1139 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1140 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
1141 }
1142
1143 switch(state.blendState[index].blendOperation)
1144 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001145 case VK_BLEND_OP_ADD:
1146 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1147 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1148 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
1149 break;
1150 case VK_BLEND_OP_SUBTRACT:
1151 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1152 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1153 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
1154 break;
1155 case VK_BLEND_OP_REVERSE_SUBTRACT:
1156 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1157 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1158 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
1159 break;
1160 case VK_BLEND_OP_MIN:
1161 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1162 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1163 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
1164 break;
1165 case VK_BLEND_OP_MAX:
1166 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1167 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1168 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
1169 break;
1170 case VK_BLEND_OP_SRC_EXT:
1171 // No operation
1172 break;
1173 case VK_BLEND_OP_DST_EXT:
1174 current.x = pixel.x;
1175 current.y = pixel.y;
1176 current.z = pixel.z;
1177 break;
1178 case VK_BLEND_OP_ZERO_EXT:
1179 current.x = Short4(0x0000);
1180 current.y = Short4(0x0000);
1181 current.z = Short4(0x0000);
1182 break;
1183 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05001184 UNSUPPORTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
Nicolas Capens157ba262019-12-10 17:49:14 -05001185 }
1186
1187 blendFactorAlpha(sourceFactor, current, pixel, state.blendState[index].sourceBlendFactorAlpha);
1188 blendFactorAlpha(destFactor, current, pixel, state.blendState[index].destBlendFactorAlpha);
1189
1190 if(state.blendState[index].sourceBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.blendState[index].sourceBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
1191 {
1192 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
1193 }
1194
1195 if(state.blendState[index].destBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.blendState[index].destBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
1196 {
1197 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
1198 }
1199
1200 switch(state.blendState[index].blendOperationAlpha)
1201 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001202 case VK_BLEND_OP_ADD:
1203 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
1204 break;
1205 case VK_BLEND_OP_SUBTRACT:
1206 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
1207 break;
1208 case VK_BLEND_OP_REVERSE_SUBTRACT:
1209 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
1210 break;
1211 case VK_BLEND_OP_MIN:
1212 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
1213 break;
1214 case VK_BLEND_OP_MAX:
1215 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
1216 break;
1217 case VK_BLEND_OP_SRC_EXT:
1218 // No operation
1219 break;
1220 case VK_BLEND_OP_DST_EXT:
1221 current.w = pixel.w;
1222 break;
1223 case VK_BLEND_OP_ZERO_EXT:
1224 current.w = Short4(0x0000);
1225 break;
1226 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05001227 UNSUPPORTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
Nicolas Capens157ba262019-12-10 17:49:14 -05001228 }
1229}
1230
1231void PixelRoutine::writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &current, const Int &sMask, const Int &zMask, const Int &cMask)
1232{
1233 if(isSRGB(index))
1234 {
1235 linearToSRGB16_12_16(current);
1236 }
1237
1238 switch(state.targetFormat[index])
1239 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001240 case VK_FORMAT_B8G8R8A8_UNORM:
1241 case VK_FORMAT_B8G8R8A8_SRGB:
1242 case VK_FORMAT_R8G8B8A8_UNORM:
1243 case VK_FORMAT_R8G8B8A8_SRGB:
1244 case VK_FORMAT_R8G8_UNORM:
1245 case VK_FORMAT_R8_UNORM:
1246 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1247 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
1248 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1249 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1250 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1251 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
1252 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001253 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1254 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1255 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 10) + Short4(0x0020);
1256 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 10) + Short4(0x0020);
1257 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 10) + Short4(0x0020);
1258 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 2) + Short4(0x2000);
1259 break;
Alexis Hetuf324fe12020-01-20 16:12:29 -05001260 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1261 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 5) + Short4(0x0400);
1262 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 5) + Short4(0x0400);
1263 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 5) + Short4(0x0400);
1264 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 1) + Short4(0x4000);
1265 break;
1266 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1267 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 5) + Short4(0x0400);
1268 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 6) + Short4(0x0200);
1269 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 5) + Short4(0x0400);
1270 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001271 default:
1272 break;
Nicolas Capens157ba262019-12-10 17:49:14 -05001273 }
1274
1275 int rgbaWriteMask = state.colorWriteActive(index);
1276 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
1277
1278 switch(state.targetFormat[index])
1279 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001280 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001281 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001282 current.w = current.w & Short4(0x8000u);
1283 current.x = As<UShort4>(current.x & Short4(0xF800)) >> 1;
1284 current.y = As<UShort4>(current.y & Short4(0xF800)) >> 6;
1285 current.z = As<UShort4>(current.z & Short4(0xF800)) >> 11;
1286
1287 current.x = current.x | current.y | current.z | current.w;
Nicolas Capens68a82382018-10-02 13:16:55 -04001288 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001289 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001290 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -04001291 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001292 current.x = current.x & Short4(0xF800u);
1293 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1294 current.z = As<UShort4>(current.z) >> 11;
1295
1296 current.x = current.x | current.y | current.z;
Nicolas Capens68a82382018-10-02 13:16:55 -04001297 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001298 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001299 case VK_FORMAT_B8G8R8A8_UNORM:
1300 case VK_FORMAT_B8G8R8A8_SRGB:
1301 if(rgbaWriteMask == 0x7)
1302 {
1303 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1304 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1305 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1306
1307 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1308 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1309
1310 current.x = current.z;
1311 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1312 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1313 current.y = current.z;
1314 current.z = As<Short4>(UnpackLow(current.z, current.x));
1315 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1316 }
1317 else
1318 {
1319 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1320 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1321 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1322 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1323
1324 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1325 current.y = As<Short4>(PackUnsigned(current.y, current.w));
1326
1327 current.x = current.z;
1328 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1329 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1330 current.y = current.z;
1331 current.z = As<Short4>(UnpackLow(current.z, current.x));
1332 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1333 }
1334 break;
1335 case VK_FORMAT_R8G8B8A8_UNORM:
1336 case VK_FORMAT_R8G8B8A8_SRGB:
1337 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1338 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
1339 if(rgbaWriteMask == 0x7)
1340 {
1341 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1342 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1343 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1344
1345 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1346 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1347
1348 current.x = current.z;
1349 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1350 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1351 current.y = current.z;
1352 current.z = As<Short4>(UnpackLow(current.z, current.x));
1353 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1354 }
1355 else
1356 {
1357 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1358 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1359 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1360 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1361
1362 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1363 current.y = As<Short4>(PackUnsigned(current.y, current.w));
1364
1365 current.x = current.z;
1366 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1367 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1368 current.y = current.z;
1369 current.z = As<Short4>(UnpackLow(current.z, current.x));
1370 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1371 }
1372 break;
1373 case VK_FORMAT_R8G8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -04001374 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1375 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001376 current.x = As<Short4>(PackUnsigned(current.x, current.x));
Nicolas Capens68a82382018-10-02 13:16:55 -04001377 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Ben Claytonbc1c0672019-12-17 20:37:37 +00001378 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1379 break;
1380 case VK_FORMAT_R8_UNORM:
Nicolas Capens157ba262019-12-10 17:49:14 -05001381 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001382 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1383 break;
1384 case VK_FORMAT_R16G16_UNORM:
1385 current.z = current.x;
1386 current.x = As<Short4>(UnpackLow(current.x, current.y));
1387 current.z = As<Short4>(UnpackHigh(current.z, current.y));
Nicolas Capens157ba262019-12-10 17:49:14 -05001388 current.y = current.z;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001389 break;
1390 case VK_FORMAT_R16G16B16A16_UNORM:
1391 transpose4x4(current.x, current.y, current.z, current.w);
1392 break;
1393 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04001394 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001395 auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
1396 auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
1397 auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
1398 auto a = (Int4(current.w) >> 14) & Int4(0x3);
1399 Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
1400 auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
1401 auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
1402 current.x = UnpackLow(c02, c13);
1403 current.y = UnpackHigh(c02, c13);
1404 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001405 }
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001406 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1407 {
1408 auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
1409 auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
1410 auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
1411 auto a = (Int4(current.w) >> 14) & Int4(0x3);
1412 Int4 packed = (a << 30) | (r << 20) | (g << 10) | b;
1413 auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
1414 auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
1415 current.x = UnpackLow(c02, c13);
1416 current.y = UnpackHigh(c02, c13);
1417 break;
1418 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00001419 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05001420 UNSUPPORTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001421 }
1422
1423 Short4 c01 = current.z;
1424 Short4 c23 = current.y;
1425
Ben Claytonbc1c0672019-12-17 20:37:37 +00001426 Int xMask; // Combination of all masks
Nicolas Capens157ba262019-12-10 17:49:14 -05001427
1428 if(state.depthTestActive)
1429 {
1430 xMask = zMask;
1431 }
1432 else
1433 {
1434 xMask = cMask;
1435 }
1436
1437 if(state.stencilActive)
1438 {
1439 xMask &= sMask;
1440 }
1441
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001442 Pointer<Byte> buffer = cBuffer;
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001443 Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001444
Nicolas Capens157ba262019-12-10 17:49:14 -05001445 switch(state.targetFormat[index])
1446 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001447 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
Nicolas Capens157ba262019-12-10 17:49:14 -05001448 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001449 buffer += 2 * x;
Nicolas Capens157ba262019-12-10 17:49:14 -05001450 Int value = *Pointer<Int>(buffer);
1451
Ben Claytonbc1c0672019-12-17 20:37:37 +00001452 Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask5551Q[bgraWriteMask & 0xF][0]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001453
1454 Int c01 = Extract(As<Int2>(current.x), 0);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001455 Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001456 if(bgraWriteMask != 0x0000000F)
Chris Forbesa3ff8782019-08-30 15:33:28 -07001457 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001458 mask01 &= channelMask;
Chris Forbesa3ff8782019-08-30 15:33:28 -07001459 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001460 *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
1461
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001462 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001463 value = *Pointer<Int>(buffer);
1464
1465 Int c23 = Extract(As<Int2>(current.x), 1);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001466 Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001467 if(bgraWriteMask != 0x0000000F)
Nicolas Capens68a82382018-10-02 13:16:55 -04001468 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001469 mask23 &= channelMask;
Nicolas Capens68a82382018-10-02 13:16:55 -04001470 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001471 *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
1472 }
1473 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001474 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens157ba262019-12-10 17:49:14 -05001475 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001476 buffer += 2 * x;
Nicolas Capens157ba262019-12-10 17:49:14 -05001477 Int value = *Pointer<Int>(buffer);
1478
Ben Claytonbc1c0672019-12-17 20:37:37 +00001479 Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001480
1481 Int c01 = Extract(As<Int2>(current.x), 0);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001482 Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001483 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capens68a82382018-10-02 13:16:55 -04001484 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001485 mask01 &= channelMask;
1486 }
1487 *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
1488
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001489 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001490 value = *Pointer<Int>(buffer);
1491
1492 Int c23 = Extract(As<Int2>(current.x), 1);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001493 Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001494 if((bgraWriteMask & 0x00000007) != 0x00000007)
1495 {
1496 mask23 &= channelMask;
1497 }
1498 *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
1499 }
1500 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001501 case VK_FORMAT_B8G8R8A8_UNORM:
1502 case VK_FORMAT_B8G8R8A8_SRGB:
Nicolas Capens157ba262019-12-10 17:49:14 -05001503 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001504 buffer += x * 4;
Nicolas Capens157ba262019-12-10 17:49:14 -05001505 Short4 value = *Pointer<Short4>(buffer);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001506 Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[bgraWriteMask][0]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001507
Ben Claytonbc1c0672019-12-17 20:37:37 +00001508 Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001509 if(bgraWriteMask != 0x0000000F)
1510 {
1511 mask01 &= channelMask;
1512 }
1513 *Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
1514
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001515 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001516 value = *Pointer<Short4>(buffer);
1517
Ben Claytonbc1c0672019-12-17 20:37:37 +00001518 Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001519 if(bgraWriteMask != 0x0000000F)
1520 {
1521 mask23 &= channelMask;
1522 }
1523 *Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
1524 }
1525 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001526 case VK_FORMAT_R8G8B8A8_UNORM:
1527 case VK_FORMAT_R8G8B8A8_SRGB:
1528 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1529 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001530 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001531 buffer += x * 4;
Nicolas Capens157ba262019-12-10 17:49:14 -05001532 Short4 value = *Pointer<Short4>(buffer);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001533 Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001534
Ben Claytonbc1c0672019-12-17 20:37:37 +00001535 Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001536 if(rgbaWriteMask != 0x0000000F)
1537 {
1538 mask01 &= channelMask;
1539 }
1540 *Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
1541
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001542 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001543 value = *Pointer<Short4>(buffer);
1544
Ben Claytonbc1c0672019-12-17 20:37:37 +00001545 Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001546 if(rgbaWriteMask != 0x0000000F)
1547 {
1548 mask23 &= channelMask;
1549 }
1550 *Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
1551 }
1552 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001553 case VK_FORMAT_R8G8_UNORM:
1554 if((rgbaWriteMask & 0x00000003) != 0x0)
Nicolas Capens157ba262019-12-10 17:49:14 -05001555 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001556 buffer += 2 * x;
1557 Int2 value;
1558 value = Insert(value, *Pointer<Int>(buffer), 0);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001559 value = Insert(value, *Pointer<Int>(buffer + pitchB), 1);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001560
1561 Int2 packedCol = As<Int2>(current.x);
1562
1563 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1564 if((rgbaWriteMask & 0x3) != 0x3)
1565 {
1566 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1567 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1568 mergedMask &= rgbaMask;
1569 }
1570
1571 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1572
1573 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001574 *Pointer<UInt>(buffer + pitchB) = As<UInt>(Extract(packedCol, 1));
Nicolas Capens157ba262019-12-10 17:49:14 -05001575 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00001576 break;
1577 case VK_FORMAT_R8_UNORM:
1578 if(rgbaWriteMask & 0x00000001)
1579 {
1580 buffer += 1 * x;
1581 Short4 value;
1582 value = Insert(value, *Pointer<Short>(buffer), 0);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001583 value = Insert(value, *Pointer<Short>(buffer + pitchB), 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05001584
Ben Claytonbc1c0672019-12-17 20:37:37 +00001585 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1586 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1587 current.x |= value;
Nicolas Capens157ba262019-12-10 17:49:14 -05001588
Ben Claytonbc1c0672019-12-17 20:37:37 +00001589 *Pointer<Short>(buffer) = Extract(current.x, 0);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001590 *Pointer<Short>(buffer + pitchB) = Extract(current.x, 1);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001591 }
1592 break;
1593 case VK_FORMAT_R16G16_UNORM:
Nicolas Capens157ba262019-12-10 17:49:14 -05001594 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001595 buffer += 4 * x;
Nicolas Capens157ba262019-12-10 17:49:14 -05001596
1597 Short4 value = *Pointer<Short4>(buffer);
1598
1599 if((rgbaWriteMask & 0x00000003) != 0x00000003)
1600 {
1601 Short4 masked = value;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001602 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0]));
1603 masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001604 current.x |= masked;
1605 }
1606
Ben Claytonbc1c0672019-12-17 20:37:37 +00001607 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
1608 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD01Q) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001609 current.x |= value;
1610 *Pointer<Short4>(buffer) = current.x;
1611
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001612 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001613
1614 value = *Pointer<Short4>(buffer);
1615
1616 if((rgbaWriteMask & 0x00000003) != 0x00000003)
1617 {
1618 Short4 masked = value;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001619 current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0]));
1620 masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001621 current.y |= masked;
1622 }
1623
Ben Claytonbc1c0672019-12-17 20:37:37 +00001624 current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
1625 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD23Q) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001626 current.y |= value;
1627 *Pointer<Short4>(buffer) = current.y;
1628 }
1629 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001630 case VK_FORMAT_R16G16B16A16_UNORM:
Nicolas Capens157ba262019-12-10 17:49:14 -05001631 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001632 buffer += 8 * x;
Nicolas Capens157ba262019-12-10 17:49:14 -05001633
1634 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001635 Short4 value = *Pointer<Short4>(buffer);
1636
Chris Forbes30d3c8a2019-09-08 15:50:05 +01001637 if(rgbaWriteMask != 0x0000000F)
Nicolas Capens68a82382018-10-02 13:16:55 -04001638 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001639 Short4 masked = value;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001640 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
1641 masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001642 current.x |= masked;
1643 }
1644
Ben Claytonbc1c0672019-12-17 20:37:37 +00001645 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ0Q) + xMask * 8);
1646 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ0Q) + xMask * 8);
Nicolas Capens68a82382018-10-02 13:16:55 -04001647 current.x |= value;
1648 *Pointer<Short4>(buffer) = current.x;
Nicolas Capens157ba262019-12-10 17:49:14 -05001649 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001650
Nicolas Capens157ba262019-12-10 17:49:14 -05001651 {
1652 Short4 value = *Pointer<Short4>(buffer + 8);
Nicolas Capens68a82382018-10-02 13:16:55 -04001653
Nicolas Capens157ba262019-12-10 17:49:14 -05001654 if(rgbaWriteMask != 0x0000000F)
Nicolas Capens68a82382018-10-02 13:16:55 -04001655 {
1656 Short4 masked = value;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001657 current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
1658 masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001659 current.y |= masked;
1660 }
1661
Ben Claytonbc1c0672019-12-17 20:37:37 +00001662 current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ1Q) + xMask * 8);
1663 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ1Q) + xMask * 8);
Nicolas Capens68a82382018-10-02 13:16:55 -04001664 current.y |= value;
Nicolas Capens157ba262019-12-10 17:49:14 -05001665 *Pointer<Short4>(buffer + 8) = current.y;
Nicolas Capens68a82382018-10-02 13:16:55 -04001666 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001667
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001668 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001669
Nicolas Capens68a82382018-10-02 13:16:55 -04001670 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001671 Short4 value = *Pointer<Short4>(buffer);
Nicolas Capens68a82382018-10-02 13:16:55 -04001672
Nicolas Capens157ba262019-12-10 17:49:14 -05001673 if(rgbaWriteMask != 0x0000000F)
Nicolas Capens68a82382018-10-02 13:16:55 -04001674 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001675 Short4 masked = value;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001676 current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
1677 masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001678 current.z |= masked;
Nicolas Capens68a82382018-10-02 13:16:55 -04001679 }
1680
Ben Claytonbc1c0672019-12-17 20:37:37 +00001681 current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ2Q) + xMask * 8);
1682 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ2Q) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001683 current.z |= value;
1684 *Pointer<Short4>(buffer) = current.z;
Nicolas Capens68a82382018-10-02 13:16:55 -04001685 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001686
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001687 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001688 Short4 value = *Pointer<Short4>(buffer + 8);
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001689
Nicolas Capens157ba262019-12-10 17:49:14 -05001690 if(rgbaWriteMask != 0x0000000F)
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001691 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001692 Short4 masked = value;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001693 current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
1694 masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001695 current.w |= masked;
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001696 }
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001697
Ben Claytonbc1c0672019-12-17 20:37:37 +00001698 current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ3Q) + xMask * 8);
1699 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ3Q) + xMask * 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001700 current.w |= value;
1701 *Pointer<Short4>(buffer + 8) = current.w;
Ben Clayton3d7b7ea2019-05-14 16:49:58 +01001702 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001703 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001704 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001705 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1706 rgbaWriteMask = bgraWriteMask;
1707 // [[fallthrough]]
Nicolas Capens157ba262019-12-10 17:49:14 -05001708 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1709 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001710 buffer += 4 * x;
Nicolas Capens157ba262019-12-10 17:49:14 -05001711
Nicolas Capens157ba262019-12-10 17:49:14 -05001712 Int2 value = *Pointer<Int2>(buffer, 16);
1713 Int2 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
Nicolas Capens81bc9d92019-12-16 15:05:57 -05001714 if(rgbaWriteMask != 0xF)
Nicolas Capens157ba262019-12-10 17:49:14 -05001715 {
1716 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
1717 }
1718 *Pointer<Int2>(buffer) = (As<Int2>(current.x) & mergedMask) | (value & ~mergedMask);
1719
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001720 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001721
1722 value = *Pointer<Int2>(buffer, 16);
1723 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
Nicolas Capens81bc9d92019-12-16 15:05:57 -05001724 if(rgbaWriteMask != 0xF)
Nicolas Capens157ba262019-12-10 17:49:14 -05001725 {
1726 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
1727 }
1728 *Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask);
1729 }
1730 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001731 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05001732 UNSUPPORTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens157ba262019-12-10 17:49:14 -05001733 }
1734}
1735
1736void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, VkBlendFactor blendFactorActive)
1737{
1738 switch(blendFactorActive)
1739 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001740 case VK_BLEND_FACTOR_ZERO:
1741 blendFactor.x = Float4(0);
1742 blendFactor.y = Float4(0);
1743 blendFactor.z = Float4(0);
1744 break;
1745 case VK_BLEND_FACTOR_ONE:
1746 blendFactor.x = Float4(1);
1747 blendFactor.y = Float4(1);
1748 blendFactor.z = Float4(1);
1749 break;
1750 case VK_BLEND_FACTOR_SRC_COLOR:
1751 blendFactor.x = oC.x;
1752 blendFactor.y = oC.y;
1753 blendFactor.z = oC.z;
1754 break;
1755 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1756 blendFactor.x = Float4(1.0f) - oC.x;
1757 blendFactor.y = Float4(1.0f) - oC.y;
1758 blendFactor.z = Float4(1.0f) - oC.z;
1759 break;
1760 case VK_BLEND_FACTOR_DST_COLOR:
1761 blendFactor.x = pixel.x;
1762 blendFactor.y = pixel.y;
1763 blendFactor.z = pixel.z;
1764 break;
1765 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1766 blendFactor.x = Float4(1.0f) - pixel.x;
1767 blendFactor.y = Float4(1.0f) - pixel.y;
1768 blendFactor.z = Float4(1.0f) - pixel.z;
1769 break;
1770 case VK_BLEND_FACTOR_SRC_ALPHA:
1771 blendFactor.x = oC.w;
1772 blendFactor.y = oC.w;
1773 blendFactor.z = oC.w;
1774 break;
1775 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
1776 blendFactor.x = Float4(1.0f) - oC.w;
1777 blendFactor.y = Float4(1.0f) - oC.w;
1778 blendFactor.z = Float4(1.0f) - oC.w;
1779 break;
1780 case VK_BLEND_FACTOR_DST_ALPHA:
1781 blendFactor.x = pixel.w;
1782 blendFactor.y = pixel.w;
1783 blendFactor.z = pixel.w;
1784 break;
1785 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
1786 blendFactor.x = Float4(1.0f) - pixel.w;
1787 blendFactor.y = Float4(1.0f) - pixel.w;
1788 blendFactor.z = Float4(1.0f) - pixel.w;
1789 break;
1790 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
1791 blendFactor.x = Float4(1.0f) - pixel.w;
1792 blendFactor.x = Min(blendFactor.x, oC.w);
1793 blendFactor.y = blendFactor.x;
1794 blendFactor.z = blendFactor.x;
1795 break;
1796 case VK_BLEND_FACTOR_CONSTANT_COLOR:
1797 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[0]));
1798 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[1]));
1799 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[2]));
1800 break;
1801 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1802 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
1803 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
1804 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
1805 break;
1806 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1807 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[0]));
1808 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[1]));
1809 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[2]));
1810 break;
1811 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1812 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
1813 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
1814 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
1815 break;
Nicolas Capens157ba262019-12-10 17:49:14 -05001816
Ben Claytonbc1c0672019-12-17 20:37:37 +00001817 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05001818 UNSUPPORTED("VkBlendFactor: %d", int(blendFactorActive));
Nicolas Capens157ba262019-12-10 17:49:14 -05001819 }
1820}
1821
1822void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, VkBlendFactor blendFactorAlphaActive)
1823{
1824 switch(blendFactorAlphaActive)
1825 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001826 case VK_BLEND_FACTOR_ZERO:
1827 blendFactor.w = Float4(0);
1828 break;
1829 case VK_BLEND_FACTOR_ONE:
1830 blendFactor.w = Float4(1);
1831 break;
1832 case VK_BLEND_FACTOR_SRC_COLOR:
1833 blendFactor.w = oC.w;
1834 break;
1835 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1836 blendFactor.w = Float4(1.0f) - oC.w;
1837 break;
1838 case VK_BLEND_FACTOR_DST_COLOR:
1839 blendFactor.w = pixel.w;
1840 break;
1841 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1842 blendFactor.w = Float4(1.0f) - pixel.w;
1843 break;
1844 case VK_BLEND_FACTOR_SRC_ALPHA:
1845 blendFactor.w = oC.w;
1846 break;
1847 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
1848 blendFactor.w = Float4(1.0f) - oC.w;
1849 break;
1850 case VK_BLEND_FACTOR_DST_ALPHA:
1851 blendFactor.w = pixel.w;
1852 break;
1853 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
1854 blendFactor.w = Float4(1.0f) - pixel.w;
1855 break;
1856 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
1857 blendFactor.w = Float4(1.0f);
1858 break;
1859 case VK_BLEND_FACTOR_CONSTANT_COLOR:
1860 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1861 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData, factor.blendConstant4F[3]));
1862 break;
1863 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1864 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1865 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData, factor.invBlendConstant4F[3]));
1866 break;
1867 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05001868 UNSUPPORTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
Nicolas Capens157ba262019-12-10 17:49:14 -05001869 }
1870}
1871
1872void PixelRoutine::alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4f &oC, const Int &x)
1873{
Nicolas Capensae100792020-06-11 13:48:40 -04001874 if(!state.blendState[index].alphaBlendEnable)
Nicolas Capens157ba262019-12-10 17:49:14 -05001875 {
1876 return;
Nicolas Capens68a82382018-10-02 13:16:55 -04001877 }
1878
Nicolas Capensae100792020-06-11 13:48:40 -04001879 vk::Format format = state.targetFormat[index];
1880 ASSERT(format.supportsColorAttachmentBlend());
1881
Alexis Hetu24c49dd2019-12-13 16:32:43 -05001882 Pointer<Byte> buffer = cBuffer;
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001883 Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Ben Clayton8ab40532019-05-10 16:23:13 +01001884
Nicolas Capens157ba262019-12-10 17:49:14 -05001885 // pixel holds four texel color values.
1886 // Note: Despite the type being Vector4f, the colors may be stored as
1887 // integers. Half-floats are stored as full 32-bit floats.
1888 // Non-float and non-fixed point formats are not alpha blended.
1889 Vector4f pixel;
1890
1891 Vector4s color;
1892 Short4 c01;
1893 Short4 c23;
1894
1895 Float4 one;
Nicolas Capens157ba262019-12-10 17:49:14 -05001896 if(format.isFloatFormat())
1897 {
1898 one = Float4(1.0f);
1899 }
Nicolas Capens9d9f30d2020-01-12 03:26:18 -05001900 else if(format.isUnnormalizedInteger())
Nicolas Capens157ba262019-12-10 17:49:14 -05001901 {
1902 one = As<Float4>(format.isUnsignedComponent(0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
Nicolas Capens68a82382018-10-02 13:16:55 -04001903 }
1904
Nicolas Capens157ba262019-12-10 17:49:14 -05001905 switch(state.targetFormat[index])
Nicolas Capens68a82382018-10-02 13:16:55 -04001906 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00001907 case VK_FORMAT_R32_SINT:
1908 case VK_FORMAT_R32_UINT:
1909 case VK_FORMAT_R32_SFLOAT:
1910 // FIXME: movlps
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001911 buffer += 4 * x;
1912 pixel.x.x = *Pointer<Float>(buffer + 0);
1913 pixel.x.y = *Pointer<Float>(buffer + 4);
1914 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001915 // FIXME: movhps
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001916 pixel.x.z = *Pointer<Float>(buffer + 0);
1917 pixel.x.w = *Pointer<Float>(buffer + 4);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001918 pixel.y = pixel.z = pixel.w = one;
1919 break;
1920 case VK_FORMAT_R32G32_SINT:
1921 case VK_FORMAT_R32G32_UINT:
1922 case VK_FORMAT_R32G32_SFLOAT:
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001923 buffer += 8 * x;
1924 pixel.x = *Pointer<Float4>(buffer, 16);
1925 buffer += pitchB;
1926 pixel.y = *Pointer<Float4>(buffer, 16);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001927 pixel.z = pixel.x;
1928 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202);
1929 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313);
1930 pixel.y = pixel.z;
1931 pixel.z = pixel.w = one;
1932 break;
1933 case VK_FORMAT_R32G32B32A32_SFLOAT:
1934 case VK_FORMAT_R32G32B32A32_SINT:
1935 case VK_FORMAT_R32G32B32A32_UINT:
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001936 buffer += 16 * x;
1937 pixel.x = *Pointer<Float4>(buffer + 0, 16);
1938 pixel.y = *Pointer<Float4>(buffer + 16, 16);
1939 buffer += pitchB;
1940 pixel.z = *Pointer<Float4>(buffer + 0, 16);
1941 pixel.w = *Pointer<Float4>(buffer + 16, 16);
Ben Claytonbc1c0672019-12-17 20:37:37 +00001942 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
1943 break;
1944 case VK_FORMAT_R16_SFLOAT:
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001945 buffer += 2 * x;
1946 pixel.x.x = Float(*Pointer<Half>(buffer + 0));
1947 pixel.x.y = Float(*Pointer<Half>(buffer + 2));
1948 buffer += pitchB;
1949 pixel.x.z = Float(*Pointer<Half>(buffer + 0));
1950 pixel.x.w = Float(*Pointer<Half>(buffer + 2));
Ben Claytonbc1c0672019-12-17 20:37:37 +00001951 pixel.y = pixel.z = pixel.w = one;
1952 break;
1953 case VK_FORMAT_R16G16_SFLOAT:
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001954 buffer += 4 * x;
1955 pixel.x.x = Float(*Pointer<Half>(buffer + 0));
1956 pixel.y.x = Float(*Pointer<Half>(buffer + 2));
1957 pixel.x.y = Float(*Pointer<Half>(buffer + 4));
1958 pixel.y.y = Float(*Pointer<Half>(buffer + 6));
1959 buffer += pitchB;
1960 pixel.x.z = Float(*Pointer<Half>(buffer + 0));
1961 pixel.y.z = Float(*Pointer<Half>(buffer + 2));
1962 pixel.x.w = Float(*Pointer<Half>(buffer + 4));
1963 pixel.y.w = Float(*Pointer<Half>(buffer + 6));
Ben Claytonbc1c0672019-12-17 20:37:37 +00001964 pixel.z = pixel.w = one;
1965 break;
1966 case VK_FORMAT_R16G16B16A16_SFLOAT:
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001967 buffer += 8 * x;
1968 pixel.x.x = Float(*Pointer<Half>(buffer + 0x0));
1969 pixel.y.x = Float(*Pointer<Half>(buffer + 0x2));
1970 pixel.z.x = Float(*Pointer<Half>(buffer + 0x4));
1971 pixel.w.x = Float(*Pointer<Half>(buffer + 0x6));
1972 pixel.x.y = Float(*Pointer<Half>(buffer + 0x8));
1973 pixel.y.y = Float(*Pointer<Half>(buffer + 0xa));
1974 pixel.z.y = Float(*Pointer<Half>(buffer + 0xc));
1975 pixel.w.y = Float(*Pointer<Half>(buffer + 0xe));
1976 buffer += pitchB;
1977 pixel.x.z = Float(*Pointer<Half>(buffer + 0x0));
1978 pixel.y.z = Float(*Pointer<Half>(buffer + 0x2));
1979 pixel.z.z = Float(*Pointer<Half>(buffer + 0x4));
1980 pixel.w.z = Float(*Pointer<Half>(buffer + 0x6));
1981 pixel.x.w = Float(*Pointer<Half>(buffer + 0x8));
1982 pixel.y.w = Float(*Pointer<Half>(buffer + 0xa));
1983 pixel.z.w = Float(*Pointer<Half>(buffer + 0xc));
1984 pixel.w.w = Float(*Pointer<Half>(buffer + 0xe));
Ben Claytonbc1c0672019-12-17 20:37:37 +00001985 break;
1986 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05001987 buffer += 4 * x;
1988 pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 0));
1989 pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4));
1990 buffer += pitchB;
1991 pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 0));
1992 pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4));
1993 transpose4x3(pixel.x, pixel.y, pixel.z, pixel.w);
1994 pixel.w = one;
Ben Claytonbc1c0672019-12-17 20:37:37 +00001995 break;
1996 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05001997 UNSUPPORTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens68a82382018-10-02 13:16:55 -04001998 }
1999
Nicolas Capens157ba262019-12-10 17:49:14 -05002000 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
2001 Vector4f sourceFactor;
2002 Vector4f destFactor;
2003
2004 blendFactor(sourceFactor, oC, pixel, state.blendState[index].sourceBlendFactor);
2005 blendFactor(destFactor, oC, pixel, state.blendState[index].destBlendFactor);
2006
2007 oC.x *= sourceFactor.x;
2008 oC.y *= sourceFactor.y;
2009 oC.z *= sourceFactor.z;
2010
2011 pixel.x *= destFactor.x;
2012 pixel.y *= destFactor.y;
2013 pixel.z *= destFactor.z;
2014
2015 switch(state.blendState[index].blendOperation)
Nicolas Capens68a82382018-10-02 13:16:55 -04002016 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002017 case VK_BLEND_OP_ADD:
2018 oC.x += pixel.x;
2019 oC.y += pixel.y;
2020 oC.z += pixel.z;
2021 break;
2022 case VK_BLEND_OP_SUBTRACT:
2023 oC.x -= pixel.x;
2024 oC.y -= pixel.y;
2025 oC.z -= pixel.z;
2026 break;
2027 case VK_BLEND_OP_REVERSE_SUBTRACT:
2028 oC.x = pixel.x - oC.x;
2029 oC.y = pixel.y - oC.y;
2030 oC.z = pixel.z - oC.z;
2031 break;
2032 case VK_BLEND_OP_MIN:
2033 oC.x = Min(oC.x, pixel.x);
2034 oC.y = Min(oC.y, pixel.y);
2035 oC.z = Min(oC.z, pixel.z);
2036 break;
2037 case VK_BLEND_OP_MAX:
2038 oC.x = Max(oC.x, pixel.x);
2039 oC.y = Max(oC.y, pixel.y);
2040 oC.z = Max(oC.z, pixel.z);
2041 break;
2042 case VK_BLEND_OP_SRC_EXT:
2043 // No operation
2044 break;
2045 case VK_BLEND_OP_DST_EXT:
2046 oC.x = pixel.x;
2047 oC.y = pixel.y;
2048 oC.z = pixel.z;
2049 break;
2050 case VK_BLEND_OP_ZERO_EXT:
2051 oC.x = Float4(0.0f);
2052 oC.y = Float4(0.0f);
2053 oC.z = Float4(0.0f);
2054 break;
2055 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05002056 UNSUPPORTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
Nicolas Capens157ba262019-12-10 17:49:14 -05002057 }
2058
2059 blendFactorAlpha(sourceFactor, oC, pixel, state.blendState[index].sourceBlendFactorAlpha);
2060 blendFactorAlpha(destFactor, oC, pixel, state.blendState[index].destBlendFactorAlpha);
2061
2062 oC.w *= sourceFactor.w;
2063 pixel.w *= destFactor.w;
2064
2065 switch(state.blendState[index].blendOperationAlpha)
2066 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002067 case VK_BLEND_OP_ADD:
2068 oC.w += pixel.w;
2069 break;
2070 case VK_BLEND_OP_SUBTRACT:
2071 oC.w -= pixel.w;
2072 break;
2073 case VK_BLEND_OP_REVERSE_SUBTRACT:
2074 pixel.w -= oC.w;
2075 oC.w = pixel.w;
2076 break;
2077 case VK_BLEND_OP_MIN:
2078 oC.w = Min(oC.w, pixel.w);
2079 break;
2080 case VK_BLEND_OP_MAX:
2081 oC.w = Max(oC.w, pixel.w);
2082 break;
2083 case VK_BLEND_OP_SRC_EXT:
2084 // No operation
2085 break;
2086 case VK_BLEND_OP_DST_EXT:
2087 oC.w = pixel.w;
2088 break;
2089 case VK_BLEND_OP_ZERO_EXT:
2090 oC.w = Float4(0.0f);
2091 break;
2092 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05002093 UNSUPPORTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
Nicolas Capens157ba262019-12-10 17:49:14 -05002094 }
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002095
2096 if(format.isUnsignedComponent(0)) { oC.x = Max(oC.x, Float4(0.0f)); }
2097 if(format.isUnsignedComponent(1)) { oC.y = Max(oC.y, Float4(0.0f)); }
2098 if(format.isUnsignedComponent(2)) { oC.z = Max(oC.z, Float4(0.0f)); }
2099 if(format.isUnsignedComponent(3)) { oC.w = Max(oC.w, Float4(0.0f)); }
Nicolas Capens157ba262019-12-10 17:49:14 -05002100}
2101
2102void PixelRoutine::writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &oC, const Int &sMask, const Int &zMask, const Int &cMask)
2103{
2104 switch(state.targetFormat[index])
2105 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002106 case VK_FORMAT_R16_SFLOAT:
2107 case VK_FORMAT_R32_SFLOAT:
2108 case VK_FORMAT_R32_SINT:
2109 case VK_FORMAT_R32_UINT:
2110 case VK_FORMAT_R16_SINT:
2111 case VK_FORMAT_R16_UINT:
2112 case VK_FORMAT_R8_SINT:
2113 case VK_FORMAT_R8_UINT:
2114 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Alexis Hetub8a61bf2020-01-09 15:26:34 -05002115 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
Ben Claytonbc1c0672019-12-17 20:37:37 +00002116 break;
2117 case VK_FORMAT_R16G16_SFLOAT:
2118 case VK_FORMAT_R32G32_SFLOAT:
2119 case VK_FORMAT_R32G32_SINT:
2120 case VK_FORMAT_R32G32_UINT:
2121 case VK_FORMAT_R16G16_SINT:
2122 case VK_FORMAT_R16G16_UINT:
2123 case VK_FORMAT_R8G8_SINT:
2124 case VK_FORMAT_R8G8_UINT:
2125 oC.z = oC.x;
2126 oC.x = UnpackLow(oC.x, oC.y);
2127 oC.z = UnpackHigh(oC.z, oC.y);
2128 oC.y = oC.z;
2129 break;
2130 case VK_FORMAT_R16G16B16A16_SFLOAT:
2131 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
2132 case VK_FORMAT_R32G32B32A32_SFLOAT:
2133 case VK_FORMAT_R32G32B32A32_SINT:
2134 case VK_FORMAT_R32G32B32A32_UINT:
2135 case VK_FORMAT_R16G16B16A16_SINT:
2136 case VK_FORMAT_R16G16B16A16_UINT:
2137 case VK_FORMAT_R8G8B8A8_SINT:
2138 case VK_FORMAT_R8G8B8A8_UINT:
2139 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
2140 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
2141 transpose4x4(oC.x, oC.y, oC.z, oC.w);
2142 break;
2143 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05002144 UNSUPPORTED("VkFormat: %d", int(state.targetFormat[index]));
Nicolas Capens157ba262019-12-10 17:49:14 -05002145 }
2146
2147 int rgbaWriteMask = state.colorWriteActive(index);
Alexis Hetub8a61bf2020-01-09 15:26:34 -05002148 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
Nicolas Capens157ba262019-12-10 17:49:14 -05002149
Ben Claytonbc1c0672019-12-17 20:37:37 +00002150 Int xMask; // Combination of all masks
Nicolas Capens157ba262019-12-10 17:49:14 -05002151
2152 if(state.depthTestActive)
2153 {
2154 xMask = zMask;
2155 }
2156 else
2157 {
2158 xMask = cMask;
2159 }
2160
2161 if(state.stencilActive)
2162 {
2163 xMask &= sMask;
2164 }
2165
2166 auto targetFormat = state.targetFormat[index];
2167
Alexis Hetu24c49dd2019-12-13 16:32:43 -05002168 Pointer<Byte> buffer = cBuffer;
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002169 Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens157ba262019-12-10 17:49:14 -05002170 Float4 value;
2171
2172 switch(targetFormat)
2173 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002174 case VK_FORMAT_R32_SFLOAT:
2175 case VK_FORMAT_R32_SINT:
2176 case VK_FORMAT_R32_UINT:
2177 if(rgbaWriteMask & 0x00000001)
Nicolas Capens68a82382018-10-02 13:16:55 -04002178 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002179 buffer += 4 * x;
2180
2181 // FIXME: movlps
2182 value.x = *Pointer<Float>(buffer + 0);
2183 value.y = *Pointer<Float>(buffer + 4);
2184
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002185 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002186
2187 // FIXME: movhps
2188 value.z = *Pointer<Float>(buffer + 0);
2189 value.w = *Pointer<Float>(buffer + 4);
2190
2191 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2192 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2193 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2194
2195 // FIXME: movhps
2196 *Pointer<Float>(buffer + 0) = oC.x.z;
2197 *Pointer<Float>(buffer + 4) = oC.x.w;
Ben Clayton8ab40532019-05-10 16:23:13 +01002198
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002199 buffer -= pitchB;
Ben Clayton8ab40532019-05-10 16:23:13 +01002200
Ben Claytonbc1c0672019-12-17 20:37:37 +00002201 // FIXME: movlps
2202 *Pointer<Float>(buffer + 0) = oC.x.x;
2203 *Pointer<Float>(buffer + 4) = oC.x.y;
Ben Clayton8ab40532019-05-10 16:23:13 +01002204 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00002205 break;
2206 case VK_FORMAT_R16_SFLOAT:
2207 if(rgbaWriteMask & 0x00000001)
Nicolas Capens68a82382018-10-02 13:16:55 -04002208 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002209 buffer += 2 * x;
2210
2211 value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
2212 value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
2213
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002214 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002215
2216 value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
2217 value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
2218
2219 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2220 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2221 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2222
2223 *Pointer<Half>(buffer + 0) = Half(oC.x.z);
2224 *Pointer<Half>(buffer + 2) = Half(oC.x.w);
Nicolas Capens68a82382018-10-02 13:16:55 -04002225
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002226 buffer -= pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05002227
Ben Claytonbc1c0672019-12-17 20:37:37 +00002228 *Pointer<Half>(buffer + 0) = Half(oC.x.x);
2229 *Pointer<Half>(buffer + 2) = Half(oC.x.y);
Nicolas Capens68a82382018-10-02 13:16:55 -04002230 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00002231 break;
2232 case VK_FORMAT_R16_SINT:
2233 case VK_FORMAT_R16_UINT:
2234 if(rgbaWriteMask & 0x00000001)
Nicolas Capens157ba262019-12-10 17:49:14 -05002235 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002236 buffer += 2 * x;
2237
2238 UShort4 xyzw;
2239 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2240
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002241 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002242
2243 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2244 value = As<Float4>(Int4(xyzw));
2245
2246 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2247 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2248 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2249
2250 if(targetFormat == VK_FORMAT_R16_SINT)
2251 {
2252 Float component = oC.x.z;
2253 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2254 component = oC.x.w;
2255 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2256
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002257 buffer -= pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002258
2259 component = oC.x.x;
2260 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2261 component = oC.x.y;
2262 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2263 }
2264 else // VK_FORMAT_R16_UINT
2265 {
2266 Float component = oC.x.z;
2267 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2268 component = oC.x.w;
2269 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2270
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002271 buffer -= pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002272
2273 component = oC.x.x;
2274 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2275 component = oC.x.y;
2276 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2277 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002278 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00002279 break;
2280 case VK_FORMAT_R8_SINT:
2281 case VK_FORMAT_R8_UINT:
2282 if(rgbaWriteMask & 0x00000001)
Nicolas Capens157ba262019-12-10 17:49:14 -05002283 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002284 buffer += x;
2285
2286 UInt xyzw, packedCol;
2287
2288 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002289 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002290 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
2291
2292 Short4 tmpCol = Short4(As<Int4>(oC.x));
2293 if(targetFormat == VK_FORMAT_R8_SINT)
2294 {
2295 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
2296 }
2297 else
2298 {
2299 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
2300 }
2301 packedCol = Extract(As<Int2>(tmpCol), 0);
2302
2303 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2304 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2305
2306 *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002307 buffer -= pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002308 *Pointer<UShort>(buffer) = UShort(packedCol);
Nicolas Capens157ba262019-12-10 17:49:14 -05002309 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00002310 break;
2311 case VK_FORMAT_R32G32_SFLOAT:
2312 case VK_FORMAT_R32G32_SINT:
2313 case VK_FORMAT_R32G32_UINT:
2314 buffer += 8 * x;
Nicolas Capens157ba262019-12-10 17:49:14 -05002315
Ben Claytonbc1c0672019-12-17 20:37:37 +00002316 value = *Pointer<Float4>(buffer);
Nicolas Capens157ba262019-12-10 17:49:14 -05002317
Ben Claytonbc1c0672019-12-17 20:37:37 +00002318 if((rgbaWriteMask & 0x00000003) != 0x00000003)
Nicolas Capens68a82382018-10-02 13:16:55 -04002319 {
2320 Float4 masked = value;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002321 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0])));
2322 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0])));
Nicolas Capens68a82382018-10-02 13:16:55 -04002323 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
2324 }
2325
Ben Claytonbc1c0672019-12-17 20:37:37 +00002326 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16, 16));
2327 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ01X) + xMask * 16, 16));
Nicolas Capens68a82382018-10-02 13:16:55 -04002328 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
Ben Claytonbc1c0672019-12-17 20:37:37 +00002329 *Pointer<Float4>(buffer) = oC.x;
Nicolas Capens68a82382018-10-02 13:16:55 -04002330
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002331 buffer += pitchB;
Nicolas Capens68a82382018-10-02 13:16:55 -04002332
Ben Claytonbc1c0672019-12-17 20:37:37 +00002333 value = *Pointer<Float4>(buffer);
2334
2335 if((rgbaWriteMask & 0x00000003) != 0x00000003)
Nicolas Capens68a82382018-10-02 13:16:55 -04002336 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002337 Float4 masked;
2338
2339 masked = value;
2340 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[rgbaWriteMask & 0x3][0])));
2341 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, maskD01X[~rgbaWriteMask & 0x3][0])));
Nicolas Capens68a82382018-10-02 13:16:55 -04002342 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
2343 }
2344
Ben Claytonbc1c0672019-12-17 20:37:37 +00002345 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16, 16));
2346 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskQ23X) + xMask * 16, 16));
Nicolas Capens68a82382018-10-02 13:16:55 -04002347 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
Ben Claytonbc1c0672019-12-17 20:37:37 +00002348 *Pointer<Float4>(buffer) = oC.y;
2349 break;
2350 case VK_FORMAT_R16G16_SFLOAT:
2351 if((rgbaWriteMask & 0x00000003) != 0x0)
Nicolas Capens157ba262019-12-10 17:49:14 -05002352 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002353 buffer += 4 * x;
2354
2355 UInt2 rgbaMask;
2356 UInt2 packedCol;
2357 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
2358 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
2359
2360 UShort4 value = *Pointer<UShort4>(buffer);
2361 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2362 if((rgbaWriteMask & 0x3) != 0x3)
2363 {
2364 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2365 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2366 mergedMask &= rgbaMask;
2367 }
2368 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2369
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002370 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002371
2372 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0);
2373 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1);
2374 value = *Pointer<UShort4>(buffer);
2375 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2376 if((rgbaWriteMask & 0x3) != 0x3)
2377 {
2378 mergedMask &= rgbaMask;
2379 }
2380 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2381 }
2382 break;
2383 case VK_FORMAT_R16G16_SINT:
2384 case VK_FORMAT_R16G16_UINT:
2385 if((rgbaWriteMask & 0x00000003) != 0x0)
2386 {
2387 buffer += 4 * x;
2388
2389 UInt2 rgbaMask;
2390 UShort4 packedCol = UShort4(As<Int4>(oC.x));
2391 UShort4 value = *Pointer<UShort4>(buffer);
2392 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2393 if((rgbaWriteMask & 0x3) != 0x3)
2394 {
2395 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2396 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2397 mergedMask &= rgbaMask;
2398 }
2399 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2400
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002401 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002402
2403 packedCol = UShort4(As<Int4>(oC.y));
2404 value = *Pointer<UShort4>(buffer);
2405 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2406 if((rgbaWriteMask & 0x3) != 0x3)
2407 {
2408 mergedMask &= rgbaMask;
2409 }
2410 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2411 }
2412 break;
2413 case VK_FORMAT_R8G8_SINT:
2414 case VK_FORMAT_R8G8_UINT:
2415 if((rgbaWriteMask & 0x00000003) != 0x0)
2416 {
2417 buffer += 2 * x;
2418
2419 Int2 xyzw, packedCol;
2420
2421 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002422 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002423 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2424
2425 if(targetFormat == VK_FORMAT_R8G8_SINT)
2426 {
2427 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2428 }
2429 else
2430 {
2431 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2432 }
2433
2434 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2435 if((rgbaWriteMask & 0x3) != 0x3)
2436 {
2437 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2438 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2439 mergedMask &= rgbaMask;
2440 }
2441
2442 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2443
2444 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002445 buffer -= pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002446 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2447 }
2448 break;
2449 case VK_FORMAT_R32G32B32A32_SFLOAT:
2450 case VK_FORMAT_R32G32B32A32_SINT:
2451 case VK_FORMAT_R32G32B32A32_UINT:
2452 buffer += 16 * x;
2453
2454 {
2455 value = *Pointer<Float4>(buffer, 16);
2456
2457 if(rgbaWriteMask != 0x0000000F)
2458 {
2459 Float4 masked = value;
2460 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
2461 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
2462 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
2463 }
2464
2465 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskX0X) + xMask * 16, 16));
2466 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX0X) + xMask * 16, 16));
2467 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2468 *Pointer<Float4>(buffer, 16) = oC.x;
Nicolas Capens157ba262019-12-10 17:49:14 -05002469 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002470
Nicolas Capens157ba262019-12-10 17:49:14 -05002471 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002472 value = *Pointer<Float4>(buffer + 16, 16);
Nicolas Capens68a82382018-10-02 13:16:55 -04002473
Ben Claytonbc1c0672019-12-17 20:37:37 +00002474 if(rgbaWriteMask != 0x0000000F)
2475 {
2476 Float4 masked = value;
2477 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
2478 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
2479 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
2480 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002481
Ben Claytonbc1c0672019-12-17 20:37:37 +00002482 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants, maskX1X) + xMask * 16, 16));
2483 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX1X) + xMask * 16, 16));
2484 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2485 *Pointer<Float4>(buffer + 16, 16) = oC.y;
Nicolas Capens157ba262019-12-10 17:49:14 -05002486 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002487
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002488 buffer += pitchB;
Nicolas Capens68a82382018-10-02 13:16:55 -04002489
Nicolas Capens157ba262019-12-10 17:49:14 -05002490 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002491 value = *Pointer<Float4>(buffer, 16);
2492
2493 if(rgbaWriteMask != 0x0000000F)
2494 {
2495 Float4 masked = value;
2496 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
2497 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
2498 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
2499 }
2500
2501 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants, maskX2X) + xMask * 16, 16));
2502 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX2X) + xMask * 16, 16));
2503 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2504 *Pointer<Float4>(buffer, 16) = oC.z;
Nicolas Capens157ba262019-12-10 17:49:14 -05002505 }
Alexis Hetu24c49dd2019-12-13 16:32:43 -05002506
Nicolas Capens157ba262019-12-10 17:49:14 -05002507 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002508 value = *Pointer<Float4>(buffer + 16, 16);
2509
2510 if(rgbaWriteMask != 0x0000000F)
2511 {
2512 Float4 masked = value;
2513 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X[rgbaWriteMask][0])));
2514 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X[rgbaWriteMask][0])));
2515 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
2516 }
2517
2518 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants, maskX3X) + xMask * 16, 16));
2519 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskX3X) + xMask * 16, 16));
2520 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2521 *Pointer<Float4>(buffer + 16, 16) = oC.w;
Nicolas Capens157ba262019-12-10 17:49:14 -05002522 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00002523 break;
2524 case VK_FORMAT_R16G16B16A16_SFLOAT:
2525 if((rgbaWriteMask & 0x0000000F) != 0x0)
Nicolas Capens157ba262019-12-10 17:49:14 -05002526 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002527 buffer += 8 * x;
Nicolas Capens68a82382018-10-02 13:16:55 -04002528
Ben Claytonbc1c0672019-12-17 20:37:37 +00002529 UInt4 rgbaMask;
2530 UInt4 value = *Pointer<UInt4>(buffer);
2531 UInt4 packedCol;
2532 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.y))) << 16) | UInt(As<UShort>(Half(oC.x.x))), 0);
2533 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.x.w))) << 16) | UInt(As<UShort>(Half(oC.x.z))), 1);
2534 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 2);
2535 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 3);
2536 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2537 if((rgbaWriteMask & 0xF) != 0xF)
2538 {
2539 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2540 rgbaMask = UInt4(tmpMask, tmpMask);
2541 mergedMask &= rgbaMask;
2542 }
2543 *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05002544
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002545 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05002546
Ben Claytonbc1c0672019-12-17 20:37:37 +00002547 value = *Pointer<UInt4>(buffer);
2548 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0);
2549 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.w))) << 16) | UInt(As<UShort>(Half(oC.z.z))), 1);
2550 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.y))) << 16) | UInt(As<UShort>(Half(oC.w.x))), 2);
2551 packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.w.w))) << 16) | UInt(As<UShort>(Half(oC.w.z))), 3);
2552 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2553 if((rgbaWriteMask & 0xF) != 0xF)
2554 {
2555 mergedMask &= rgbaMask;
2556 }
2557 *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05002558 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00002559 break;
2560 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
2561 if((rgbaWriteMask & 0x7) != 0x0)
Nicolas Capens157ba262019-12-10 17:49:14 -05002562 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002563 buffer += 4 * x;
Nicolas Capens157ba262019-12-10 17:49:14 -05002564
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002565 UInt4 packedCol;
2566 packedCol = Insert(packedCol, r11g11b10Pack(oC.x), 0);
2567 packedCol = Insert(packedCol, r11g11b10Pack(oC.y), 1);
2568 packedCol = Insert(packedCol, r11g11b10Pack(oC.z), 2);
2569 packedCol = Insert(packedCol, r11g11b10Pack(oC.w), 3);
Nicolas Capens157ba262019-12-10 17:49:14 -05002570
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002571 UInt4 value;
2572 value = Insert(value, *Pointer<UInt>(buffer + 0), 0);
2573 value = Insert(value, *Pointer<UInt>(buffer + 4), 1);
2574 buffer += pitchB;
2575 value = Insert(value, *Pointer<UInt>(buffer + 0), 2);
2576 value = Insert(value, *Pointer<UInt>(buffer + 4), 3);
2577
2578 UInt4 mask = *Pointer<UInt4>(constants + OFFSET(Constants, maskD4X[0][0]) + xMask * 16, 16);
2579 if((rgbaWriteMask & 0x7) != 0x7)
2580 {
2581 mask &= *Pointer<UInt4>(constants + OFFSET(Constants, mask11X[rgbaWriteMask & 0x7][0]), 16);
2582 }
2583 value = (packedCol & mask) | (value & ~mask);
2584
2585 *Pointer<UInt>(buffer + 0) = value.z;
2586 *Pointer<UInt>(buffer + 4) = value.w;
2587 buffer -= pitchB;
2588 *Pointer<UInt>(buffer + 0) = value.x;
2589 *Pointer<UInt>(buffer + 4) = value.y;
Nicolas Capens157ba262019-12-10 17:49:14 -05002590 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00002591 break;
2592 case VK_FORMAT_R16G16B16A16_SINT:
2593 case VK_FORMAT_R16G16B16A16_UINT:
2594 if((rgbaWriteMask & 0x0000000F) != 0x0)
Nicolas Capens157ba262019-12-10 17:49:14 -05002595 {
Ben Claytonbc1c0672019-12-17 20:37:37 +00002596 buffer += 8 * x;
Nicolas Capens157ba262019-12-10 17:49:14 -05002597
Ben Claytonbc1c0672019-12-17 20:37:37 +00002598 UInt4 rgbaMask;
2599 UShort8 value = *Pointer<UShort8>(buffer);
2600 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
2601 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2602 if((rgbaWriteMask & 0xF) != 0xF)
2603 {
2604 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2605 rgbaMask = UInt4(tmpMask, tmpMask);
2606 mergedMask &= rgbaMask;
2607 }
2608 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05002609
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002610 buffer += pitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05002611
Ben Claytonbc1c0672019-12-17 20:37:37 +00002612 value = *Pointer<UShort8>(buffer);
2613 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
2614 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2615 if((rgbaWriteMask & 0xF) != 0xF)
2616 {
2617 mergedMask &= rgbaMask;
2618 }
2619 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05002620 }
Ben Claytonbc1c0672019-12-17 20:37:37 +00002621 break;
2622 case VK_FORMAT_R8G8B8A8_SINT:
2623 case VK_FORMAT_R8G8B8A8_UINT:
2624 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
2625 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
2626 if((rgbaWriteMask & 0x0000000F) != 0x0)
2627 {
2628 UInt2 value, packedCol, mergedMask;
2629
2630 buffer += 4 * x;
2631
2632 bool isSigned = targetFormat == VK_FORMAT_R8G8B8A8_SINT || targetFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32;
2633
2634 if(isSigned)
2635 {
2636 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2637 }
2638 else
2639 {
2640 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2641 }
2642 value = *Pointer<UInt2>(buffer, 16);
2643 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2644 if(rgbaWriteMask != 0xF)
2645 {
2646 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2647 }
2648 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2649
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002650 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002651
2652 if(isSigned)
2653 {
2654 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
2655 }
2656 else
2657 {
2658 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
2659 }
2660 value = *Pointer<UInt2>(buffer, 16);
2661 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2662 if(rgbaWriteMask != 0xF)
2663 {
2664 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2665 }
2666 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2667 }
2668 break;
2669 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
2670 if((rgbaWriteMask & 0x0000000F) != 0x0)
2671 {
2672 Int2 mergedMask, packedCol, value;
2673 Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
2674 ((As<Int4>(oC.z) & Int4(0x3ff)) << 20) |
2675 ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
2676 ((As<Int4>(oC.x) & Int4(0x3ff)));
2677
2678 buffer += 4 * x;
2679 value = *Pointer<Int2>(buffer, 16);
2680 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2681 if(rgbaWriteMask != 0xF)
2682 {
2683 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
2684 }
2685 *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
2686
Alexis Hetuf97fb9d2019-12-17 14:40:19 -05002687 buffer += pitchB;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002688
2689 value = *Pointer<Int2>(buffer, 16);
2690 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2691 if(rgbaWriteMask != 0xF)
2692 {
2693 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
2694 }
2695 *Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
2696 }
2697 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -05002698 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
2699 if((bgraWriteMask & 0x0000000F) != 0x0)
2700 {
2701 Int2 mergedMask, packedCol, value;
2702 Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
2703 ((As<Int4>(oC.x) & Int4(0x3ff)) << 20) |
2704 ((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
2705 ((As<Int4>(oC.z) & Int4(0x3ff)));
2706
2707 buffer += 4 * x;
2708 value = *Pointer<Int2>(buffer, 16);
2709 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2710 if(bgraWriteMask != 0xF)
2711 {
2712 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[bgraWriteMask][0]));
2713 }
2714 *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
2715
2716 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2717
2718 value = *Pointer<Int2>(buffer, 16);
2719 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2720 if(bgraWriteMask != 0xF)
2721 {
2722 mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[bgraWriteMask][0]));
2723 }
2724 *Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
2725 }
2726 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +00002727 default:
Nicolas Capens865f8892020-01-21 14:27:10 -05002728 UNSUPPORTED("VkFormat: %d", int(targetFormat));
Nicolas Capens68a82382018-10-02 13:16:55 -04002729 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002730}
Nicolas Capens157ba262019-12-10 17:49:14 -05002731
2732UShort4 PixelRoutine::convertFixed16(const Float4 &cf, bool saturate)
2733{
2734 return UShort4(cf * Float4(0xFFFF), saturate);
2735}
2736
2737void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
2738{
Ben Claytonbc1c0672019-12-17 20:37:37 +00002739 Pointer<Byte> LUT = constants + OFFSET(Constants, sRGBtoLinear12_16);
Nicolas Capens157ba262019-12-10 17:49:14 -05002740
Nicolas Capens2883de92020-01-27 14:58:14 -05002741 c.x = AddSat(As<UShort4>(c.x), UShort4(0x0007)) >> 4;
2742 c.y = AddSat(As<UShort4>(c.y), UShort4(0x0007)) >> 4;
2743 c.z = AddSat(As<UShort4>(c.z), UShort4(0x0007)) >> 4;
Nicolas Capens157ba262019-12-10 17:49:14 -05002744
2745 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2746 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2747 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2748 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
2749
2750 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2751 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2752 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2753 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
2754
2755 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2756 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2757 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2758 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
2759}
2760
2761void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
2762{
Nicolas Capens2883de92020-01-27 14:58:14 -05002763 c.x = AddSat(As<UShort4>(c.x), UShort4(0x0007)) >> 4;
2764 c.y = AddSat(As<UShort4>(c.y), UShort4(0x0007)) >> 4;
2765 c.z = AddSat(As<UShort4>(c.z), UShort4(0x0007)) >> 4;
Nicolas Capens157ba262019-12-10 17:49:14 -05002766
2767 linearToSRGB12_16(c);
2768}
2769
2770void PixelRoutine::linearToSRGB12_16(Vector4s &c)
2771{
Ben Claytonbc1c0672019-12-17 20:37:37 +00002772 Pointer<Byte> LUT = constants + OFFSET(Constants, linearToSRGB12_16);
Nicolas Capens157ba262019-12-10 17:49:14 -05002773
2774 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2775 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2776 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2777 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
2778
2779 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2780 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2781 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2782 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
2783
2784 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2785 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2786 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2787 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
2788}
2789
Ben Claytonbc1c0672019-12-17 20:37:37 +00002790Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
Nicolas Capens157ba262019-12-10 17:49:14 -05002791{
2792 Float4 linear = x * x;
2793 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2794
2795 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2796}
2797
2798} // namespace sw