blob: bb7a3ad1f827a9ebab5698e365d90779dd1aba04 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "VertexRoutine.hpp"
16
Nicolas Capens68a82382018-10-02 13:16:55 -040017#include "Constants.hpp"
Nicolas Capens8bcd1742019-06-12 11:41:14 -040018#include "SpirvShader.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050019#include "Device/Renderer.hpp"
Ben Claytonbc1c0672019-12-17 20:37:37 +000020#include "Device/Vertex.hpp"
Ben Clayton25e06e02020-02-07 11:19:08 +000021#include "System/Debug.hpp"
Nicolas Capens8bcd1742019-06-12 11:41:14 -040022#include "System/Half.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040023
Nicolas Capens157ba262019-12-10 17:49:14 -050024namespace sw {
25
26VertexRoutine::VertexRoutine(
Ben Claytonbc1c0672019-12-17 20:37:37 +000027 const VertexProcessor::State &state,
28 vk::PipelineLayout const *pipelineLayout,
29 SpirvShader const *spirvShader)
30 : routine(pipelineLayout)
31 , state(state)
32 , spirvShader(spirvShader)
Nicolas Capens68a82382018-10-02 13:16:55 -040033{
Nicolas Capens157ba262019-12-10 17:49:14 -050034 spirvShader->emitProlog(&routine);
35}
36
37VertexRoutine::~VertexRoutine()
38{
39}
40
41void VertexRoutine::generate()
42{
Ben Claytonbc1c0672019-12-17 20:37:37 +000043 Pointer<Byte> cache = task + OFFSET(VertexTask, vertexCache);
44 Pointer<Byte> vertexCache = cache + OFFSET(VertexCache, vertex);
45 Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache, tag));
Nicolas Capens157ba262019-12-10 17:49:14 -050046
Ben Claytonbc1c0672019-12-17 20:37:37 +000047 UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask, vertexCount));
Nicolas Capens157ba262019-12-10 17:49:14 -050048
Ben Claytonbc1c0672019-12-17 20:37:37 +000049 constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, constants));
Nicolas Capens157ba262019-12-10 17:49:14 -050050
51 // Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer.
52 // On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache
53 // in reverse order to guarantee that the first one doesn't get evicted and can be written out.
54
55 Do
Nicolas Capens68a82382018-10-02 13:16:55 -040056 {
Nicolas Capens157ba262019-12-10 17:49:14 -050057 UInt index = *batch;
58 UInt cacheIndex = index & VertexCache::TAG_MASK;
59
60 If(tagCache[cacheIndex] != index)
61 {
62 readInput(batch);
63 program(batch, vertexCount);
64 computeClipFlags();
65 computeCullMask();
66
67 writeCache(vertexCache, tagCache, batch);
68 }
69
70 Pointer<Byte> cacheEntry = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
71
72 // For points, vertexCount is 1 per primitive, so duplicate vertex for all 3 vertices of the primitive
73 for(int i = 0; i < (state.isPoint ? 3 : 1); i++)
74 {
75 writeVertex(vertex, cacheEntry);
76 vertex += sizeof(Vertex);
77 }
78
79 batch = Pointer<UInt>(Pointer<Byte>(batch) + sizeof(uint32_t));
80 vertexCount--;
Nicolas Capens68a82382018-10-02 13:16:55 -040081 }
Ben Clayton71af5932019-12-11 10:05:24 +000082 Until(vertexCount == 0);
Nicolas Capens68a82382018-10-02 13:16:55 -040083
Nicolas Capens157ba262019-12-10 17:49:14 -050084 Return();
85}
86
87void VertexRoutine::readInput(Pointer<UInt> &batch)
88{
89 for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
Nicolas Capens68a82382018-10-02 13:16:55 -040090 {
Nicolas Capens157ba262019-12-10 17:49:14 -050091 if(spirvShader->inputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
92 spirvShader->inputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
93 spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
94 spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
Nicolas Capens68a82382018-10-02 13:16:55 -040095 {
Ben Claytonbc1c0672019-12-17 20:37:37 +000096 Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * (i / 4));
Nicolas Capens157ba262019-12-10 17:49:14 -050097 UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4));
98 Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex));
99 UInt robustnessSize(0);
100 if(state.robustBufferAccess)
Nicolas Capens68a82382018-10-02 13:16:55 -0400101 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500102 robustnessSize = *Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) * (i / 4));
Nicolas Capens68a82382018-10-02 13:16:55 -0400103 }
104
Nicolas Capens157ba262019-12-10 17:49:14 -0500105 auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize, baseVertex);
106 routine.inputs[i + 0] = value.x;
107 routine.inputs[i + 1] = value.y;
108 routine.inputs[i + 2] = value.z;
109 routine.inputs[i + 3] = value.w;
Ben Clayton9ad035b2019-08-09 23:44:09 +0100110 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400111 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400112}
Nicolas Capens157ba262019-12-10 17:49:14 -0500113
114void VertexRoutine::computeClipFlags()
115{
116 auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
117 assert(it != spirvShader->outputBuiltins.end());
118 assert(it->second.SizeInComponents == 4);
119 auto &pos = routine.getVariable(it->second.Id);
120 auto posX = pos[it->second.FirstComponent + 0];
121 auto posY = pos[it->second.FirstComponent + 1];
122 auto posZ = pos[it->second.FirstComponent + 2];
123 auto posW = pos[it->second.FirstComponent + 3];
124
125 Int4 maxX = CmpLT(posW, posX);
126 Int4 maxY = CmpLT(posW, posY);
127 Int4 maxZ = CmpLT(posW, posZ);
128 Int4 minX = CmpNLE(-posW, posX);
129 Int4 minY = CmpNLE(-posW, posY);
130 Int4 minZ = CmpNLE(Float4(0.0f), posZ);
131
Ben Claytonbc1c0672019-12-17 20:37:37 +0000132 clipFlags = Pointer<Int>(constants + OFFSET(Constants, maxX))[SignMask(maxX)];
133 clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxY))[SignMask(maxY)];
134 clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxZ))[SignMask(maxZ)];
135 clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minX))[SignMask(minX)];
136 clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minY))[SignMask(minY)];
137 clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minZ))[SignMask(minZ)];
Nicolas Capens157ba262019-12-10 17:49:14 -0500138
Ben Claytonbc1c0672019-12-17 20:37:37 +0000139 Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
140 Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
141 Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants, maxPos)));
Nicolas Capens157ba262019-12-10 17:49:14 -0500142
143 Int4 finiteXYZ = finiteX & finiteY & finiteZ;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000144 clipFlags |= Pointer<Int>(constants + OFFSET(Constants, fini))[SignMask(finiteXYZ)];
Nicolas Capens157ba262019-12-10 17:49:14 -0500145}
146
147void VertexRoutine::computeCullMask()
148{
149 cullMask = Int(15);
150
151 auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500152 if(it != spirvShader->outputBuiltins.end())
Nicolas Capens157ba262019-12-10 17:49:14 -0500153 {
154 auto count = spirvShader->getNumOutputCullDistances();
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500155 for(uint32_t i = 0; i < count; i++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500156 {
157 auto const &distance = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
158 auto mask = SignMask(CmpGE(distance, SIMD::Float(0)));
159 cullMask &= mask;
160 }
161 }
162}
163
164Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
Ben Claytonbc1c0672019-12-17 20:37:37 +0000165 bool robustBufferAccess, UInt &robustnessSize, Int baseVertex)
Nicolas Capens157ba262019-12-10 17:49:14 -0500166{
167 Vector4f v;
168 // Because of the following rule in the Vulkan spec, we do not care if a very large negative
169 // baseVertex would overflow all the way back into a valid region of the index buffer:
170 // "Out-of-bounds buffer loads will return any of the following values :
171 // - Values from anywhere within the memory range(s) bound to the buffer (possibly including
172 // bytes of memory past the end of the buffer, up to the end of the bound range)."
173 UInt4 offsets = (*Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4(baseVertex))) * UInt4(stride);
174
175 Pointer<Byte> source0 = buffer + offsets.x;
176 Pointer<Byte> source1 = buffer + offsets.y;
177 Pointer<Byte> source2 = buffer + offsets.z;
178 Pointer<Byte> source3 = buffer + offsets.w;
179
Alexis Hetub766e5e2020-01-20 11:40:28 -0500180 vk::Format format(stream.format);
181
Nicolas Capens157ba262019-12-10 17:49:14 -0500182 UInt4 zero(0);
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500183 if(robustBufferAccess)
Nicolas Capens157ba262019-12-10 17:49:14 -0500184 {
185 // TODO(b/141124876): Optimize for wide-vector gather operations.
Alexis Hetub766e5e2020-01-20 11:40:28 -0500186 UInt4 limits = offsets + UInt4(format.bytes());
Nicolas Capens157ba262019-12-10 17:49:14 -0500187 Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero);
188 source0 = IfThenElse(limits.x <= robustnessSize, source0, zeroSource);
189 source1 = IfThenElse(limits.y <= robustnessSize, source1, zeroSource);
190 source2 = IfThenElse(limits.z <= robustnessSize, source2, zeroSource);
191 source3 = IfThenElse(limits.w <= robustnessSize, source3, zeroSource);
192 }
193
Alexis Hetub766e5e2020-01-20 11:40:28 -0500194 int componentCount = format.componentCount();
195 bool normalized = !format.isUnnormalizedInteger();
196 bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || normalized;
197 bool bgra = false;
Nicolas Capens157ba262019-12-10 17:49:14 -0500198
Alexis Hetub766e5e2020-01-20 11:40:28 -0500199 switch(stream.format)
Nicolas Capens157ba262019-12-10 17:49:14 -0500200 {
Alexis Hetub766e5e2020-01-20 11:40:28 -0500201 case VK_FORMAT_R32_SFLOAT:
202 case VK_FORMAT_R32G32_SFLOAT:
203 case VK_FORMAT_R32G32B32_SFLOAT:
204 case VK_FORMAT_R32G32B32A32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500205 {
Alexis Hetub766e5e2020-01-20 11:40:28 -0500206 if(componentCount == 0)
Nicolas Capens157ba262019-12-10 17:49:14 -0500207 {
208 // Null stream, all default components
209 }
210 else
211 {
Alexis Hetub766e5e2020-01-20 11:40:28 -0500212 if(componentCount == 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500213 {
214 v.x.x = *Pointer<Float>(source0);
215 v.x.y = *Pointer<Float>(source1);
216 v.x.z = *Pointer<Float>(source2);
217 v.x.w = *Pointer<Float>(source3);
218 }
219 else
220 {
221 v.x = *Pointer<Float4>(source0);
222 v.y = *Pointer<Float4>(source1);
223 v.z = *Pointer<Float4>(source2);
224 v.w = *Pointer<Float4>(source3);
225
Alexis Hetub766e5e2020-01-20 11:40:28 -0500226 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
Nicolas Capens157ba262019-12-10 17:49:14 -0500227 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500228 }
229 }
230 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500231 case VK_FORMAT_B8G8R8A8_UNORM:
232 bgra = true;
Chris Forbes12e857a2020-03-23 09:43:48 -0700233 // [[fallthrough]]
Alexis Hetub766e5e2020-01-20 11:40:28 -0500234 case VK_FORMAT_R8_UNORM:
235 case VK_FORMAT_R8G8_UNORM:
236 case VK_FORMAT_R8G8B8A8_UNORM:
Chris Forbes12e857a2020-03-23 09:43:48 -0700237 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
238 v.x = Float4(*Pointer<Byte4>(source0));
239 v.y = Float4(*Pointer<Byte4>(source1));
240 v.z = Float4(*Pointer<Byte4>(source2));
241 v.w = Float4(*Pointer<Byte4>(source3));
242
243 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
244
245 if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
246 if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
247 if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
248 if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
249 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500250 case VK_FORMAT_R8_UINT:
251 case VK_FORMAT_R8G8_UINT:
252 case VK_FORMAT_R8G8B8A8_UINT:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500253 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
Chris Forbes12e857a2020-03-23 09:43:48 -0700254 v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
255 v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
256 v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
257 v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
Ben Claytonbc1c0672019-12-17 20:37:37 +0000258
Chris Forbes12e857a2020-03-23 09:43:48 -0700259 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000260 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500261 case VK_FORMAT_R8_SNORM:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500262 case VK_FORMAT_R8G8_SNORM:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500263 case VK_FORMAT_R8G8B8A8_SNORM:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500264 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
Chris Forbes12e857a2020-03-23 09:43:48 -0700265 v.x = Float4(*Pointer<SByte4>(source0));
266 v.y = Float4(*Pointer<SByte4>(source1));
267 v.z = Float4(*Pointer<SByte4>(source2));
268 v.w = Float4(*Pointer<SByte4>(source3));
269
270 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
271
Corentin Wallez5c883d32020-04-08 15:26:26 +0200272 if(componentCount >= 1) v.x = Max(v.x * *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)), Float4(-1.0f));
273 if(componentCount >= 2) v.y = Max(v.y * *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)), Float4(-1.0f));
274 if(componentCount >= 3) v.z = Max(v.z * *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)), Float4(-1.0f));
275 if(componentCount >= 4) v.w = Max(v.w * *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)), Float4(-1.0f));
Chris Forbes12e857a2020-03-23 09:43:48 -0700276 break;
277 case VK_FORMAT_R8_SINT:
278 case VK_FORMAT_R8G8_SINT:
279 case VK_FORMAT_R8G8B8A8_SINT:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500280 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Chris Forbes12e857a2020-03-23 09:43:48 -0700281 v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
282 v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
283 v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
284 v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
Ben Claytonbc1c0672019-12-17 20:37:37 +0000285
Chris Forbes12e857a2020-03-23 09:43:48 -0700286 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000287 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500288 case VK_FORMAT_R16_SNORM:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500289 case VK_FORMAT_R16G16_SNORM:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500290 case VK_FORMAT_R16G16B16A16_SNORM:
Chris Forbes12e857a2020-03-23 09:43:48 -0700291 v.x = Float4(*Pointer<Short4>(source0));
292 v.y = Float4(*Pointer<Short4>(source1));
293 v.z = Float4(*Pointer<Short4>(source2));
294 v.w = Float4(*Pointer<Short4>(source3));
295
296 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
297
Corentin Wallez5c883d32020-04-08 15:26:26 +0200298 if(componentCount >= 1) v.x = Max(v.x * *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)), Float4(-1.0f));
299 if(componentCount >= 2) v.y = Max(v.y * *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)), Float4(-1.0f));
300 if(componentCount >= 3) v.z = Max(v.z * *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)), Float4(-1.0f));
301 if(componentCount >= 4) v.w = Max(v.w * *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)), Float4(-1.0f));
Chris Forbes12e857a2020-03-23 09:43:48 -0700302 break;
303 case VK_FORMAT_R16_SINT:
304 case VK_FORMAT_R16G16_SINT:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500305 case VK_FORMAT_R16G16B16A16_SINT:
Chris Forbes12e857a2020-03-23 09:43:48 -0700306 v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
307 v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
308 v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
309 v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
Ben Claytonbc1c0672019-12-17 20:37:37 +0000310
Chris Forbes12e857a2020-03-23 09:43:48 -0700311 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000312 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500313 case VK_FORMAT_R16_UNORM:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500314 case VK_FORMAT_R16G16_UNORM:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500315 case VK_FORMAT_R16G16B16A16_UNORM:
Chris Forbes12e857a2020-03-23 09:43:48 -0700316 v.x = Float4(*Pointer<UShort4>(source0));
317 v.y = Float4(*Pointer<UShort4>(source1));
318 v.z = Float4(*Pointer<UShort4>(source2));
319 v.w = Float4(*Pointer<UShort4>(source3));
320
321 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
322
323 if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
324 if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
325 if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
326 if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
327 break;
328 case VK_FORMAT_R16_UINT:
329 case VK_FORMAT_R16G16_UINT:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500330 case VK_FORMAT_R16G16B16A16_UINT:
Chris Forbes12e857a2020-03-23 09:43:48 -0700331 v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
332 v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
333 v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
334 v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
Ben Claytonbc1c0672019-12-17 20:37:37 +0000335
Chris Forbes12e857a2020-03-23 09:43:48 -0700336 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000337 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500338 case VK_FORMAT_R32_SINT:
339 case VK_FORMAT_R32G32_SINT:
340 case VK_FORMAT_R32G32B32_SINT:
341 case VK_FORMAT_R32G32B32A32_SINT:
Chris Forbes12e857a2020-03-23 09:43:48 -0700342 v.x = *Pointer<Float4>(source0);
343 v.y = *Pointer<Float4>(source1);
344 v.z = *Pointer<Float4>(source2);
345 v.w = *Pointer<Float4>(source3);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000346
Chris Forbes12e857a2020-03-23 09:43:48 -0700347 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000348 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500349 case VK_FORMAT_R32_UINT:
350 case VK_FORMAT_R32G32_UINT:
351 case VK_FORMAT_R32G32B32_UINT:
352 case VK_FORMAT_R32G32B32A32_UINT:
Chris Forbes12e857a2020-03-23 09:43:48 -0700353 v.x = *Pointer<Float4>(source0);
354 v.y = *Pointer<Float4>(source1);
355 v.z = *Pointer<Float4>(source2);
356 v.w = *Pointer<Float4>(source3);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000357
Chris Forbes12e857a2020-03-23 09:43:48 -0700358 transpose4xN(v.x, v.y, v.z, v.w, componentCount);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000359 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500360 case VK_FORMAT_R16_SFLOAT:
361 case VK_FORMAT_R16G16_SFLOAT:
362 case VK_FORMAT_R16G16B16A16_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500363 {
Alexis Hetub766e5e2020-01-20 11:40:28 -0500364 if(componentCount >= 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500365 {
366 UShort x0 = *Pointer<UShort>(source0 + 0);
367 UShort x1 = *Pointer<UShort>(source1 + 0);
368 UShort x2 = *Pointer<UShort>(source2 + 0);
369 UShort x3 = *Pointer<UShort>(source3 + 0);
370
Ben Claytonbc1c0672019-12-17 20:37:37 +0000371 v.x.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x0) * 4);
372 v.x.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x1) * 4);
373 v.x.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x2) * 4);
374 v.x.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x3) * 4);
Nicolas Capens157ba262019-12-10 17:49:14 -0500375 }
376
Alexis Hetub766e5e2020-01-20 11:40:28 -0500377 if(componentCount >= 2)
Nicolas Capens157ba262019-12-10 17:49:14 -0500378 {
379 UShort y0 = *Pointer<UShort>(source0 + 2);
380 UShort y1 = *Pointer<UShort>(source1 + 2);
381 UShort y2 = *Pointer<UShort>(source2 + 2);
382 UShort y3 = *Pointer<UShort>(source3 + 2);
383
Ben Claytonbc1c0672019-12-17 20:37:37 +0000384 v.y.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y0) * 4);
385 v.y.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y1) * 4);
386 v.y.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y2) * 4);
387 v.y.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y3) * 4);
Nicolas Capens157ba262019-12-10 17:49:14 -0500388 }
389
Alexis Hetub766e5e2020-01-20 11:40:28 -0500390 if(componentCount >= 3)
Nicolas Capens157ba262019-12-10 17:49:14 -0500391 {
392 UShort z0 = *Pointer<UShort>(source0 + 4);
393 UShort z1 = *Pointer<UShort>(source1 + 4);
394 UShort z2 = *Pointer<UShort>(source2 + 4);
395 UShort z3 = *Pointer<UShort>(source3 + 4);
396
Ben Claytonbc1c0672019-12-17 20:37:37 +0000397 v.z.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z0) * 4);
398 v.z.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z1) * 4);
399 v.z.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z2) * 4);
400 v.z.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z3) * 4);
Nicolas Capens157ba262019-12-10 17:49:14 -0500401 }
402
Alexis Hetub766e5e2020-01-20 11:40:28 -0500403 if(componentCount >= 4)
Nicolas Capens157ba262019-12-10 17:49:14 -0500404 {
405 UShort w0 = *Pointer<UShort>(source0 + 6);
406 UShort w1 = *Pointer<UShort>(source1 + 6);
407 UShort w2 = *Pointer<UShort>(source2 + 6);
408 UShort w3 = *Pointer<UShort>(source3 + 6);
409
Ben Claytonbc1c0672019-12-17 20:37:37 +0000410 v.w.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w0) * 4);
411 v.w.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w1) * 4);
412 v.w.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w2) * 4);
413 v.w.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w3) * 4);
Nicolas Capens157ba262019-12-10 17:49:14 -0500414 }
415 }
416 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500417 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
Chris Forbes12e857a2020-03-23 09:43:48 -0700418 bgra = true;
419 // [[fallthrough]]
420 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
421 {
422 Int4 src;
423 src = Insert(src, *Pointer<Int>(source0), 0);
424 src = Insert(src, *Pointer<Int>(source1), 1);
425 src = Insert(src, *Pointer<Int>(source2), 2);
426 src = Insert(src, *Pointer<Int>(source3), 3);
427 v.x = Float4((src << 22) >> 22);
428 v.y = Float4((src << 12) >> 22);
429 v.z = Float4((src << 02) >> 22);
430 v.w = Float4(src >> 30);
431
432 v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f));
433 v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f));
434 v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f));
435 v.w = Max(v.w, Float4(-1.0f));
436 }
437 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500438 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
439 bgra = true;
Chris Forbes12e857a2020-03-23 09:43:48 -0700440 // [[fallthrough]]
Alexis Hetub766e5e2020-01-20 11:40:28 -0500441 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500442 {
443 Int4 src;
444 src = Insert(src, *Pointer<Int>(source0), 0);
445 src = Insert(src, *Pointer<Int>(source1), 1);
446 src = Insert(src, *Pointer<Int>(source2), 2);
447 src = Insert(src, *Pointer<Int>(source3), 3);
Chris Forbes12e857a2020-03-23 09:43:48 -0700448 v.x = As<Float4>((src << 22) >> 22);
449 v.y = As<Float4>((src << 12) >> 22);
450 v.z = As<Float4>((src << 02) >> 22);
451 v.w = As<Float4>(src >> 30);
Nicolas Capens157ba262019-12-10 17:49:14 -0500452 }
453 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500454 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
Chris Forbes12e857a2020-03-23 09:43:48 -0700455 bgra = true;
456 // [[fallthrough]]
457 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
458 {
459 Int4 src;
460 src = Insert(src, *Pointer<Int>(source0), 0);
461 src = Insert(src, *Pointer<Int>(source1), 1);
462 src = Insert(src, *Pointer<Int>(source2), 2);
463 src = Insert(src, *Pointer<Int>(source3), 3);
464
465 v.x = Float4(src & Int4(0x3FF));
466 v.y = Float4((src >> 10) & Int4(0x3FF));
467 v.z = Float4((src >> 20) & Int4(0x3FF));
468 v.w = Float4((src >> 30) & Int4(0x3));
469
470 v.x *= Float4(1.0f / 0x3FF);
471 v.y *= Float4(1.0f / 0x3FF);
472 v.z *= Float4(1.0f / 0x3FF);
473 v.w *= Float4(1.0f / 0x3);
474 }
475 break;
Alexis Hetub766e5e2020-01-20 11:40:28 -0500476 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
477 bgra = true;
Chris Forbes12e857a2020-03-23 09:43:48 -0700478 // [[fallthrough]]
Alexis Hetub766e5e2020-01-20 11:40:28 -0500479 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500480 {
481 Int4 src;
482 src = Insert(src, *Pointer<Int>(source0), 0);
483 src = Insert(src, *Pointer<Int>(source1), 1);
484 src = Insert(src, *Pointer<Int>(source2), 2);
485 src = Insert(src, *Pointer<Int>(source3), 3);
486
Chris Forbes12e857a2020-03-23 09:43:48 -0700487 v.x = As<Float4>(src & Int4(0x3FF));
488 v.y = As<Float4>((src >> 10) & Int4(0x3FF));
489 v.z = As<Float4>((src >> 20) & Int4(0x3FF));
490 v.w = As<Float4>((src >> 30) & Int4(0x3));
Nicolas Capens157ba262019-12-10 17:49:14 -0500491 }
492 break;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000493 default:
Alexis Hetub766e5e2020-01-20 11:40:28 -0500494 UNSUPPORTED("stream.format %d", int(stream.format));
Nicolas Capens157ba262019-12-10 17:49:14 -0500495 }
496
Alexis Hetub766e5e2020-01-20 11:40:28 -0500497 if(bgra)
498 {
499 // Swap red and blue
500 Float4 t = v.x;
501 v.x = v.z;
502 v.z = t;
503 }
504
505 if(componentCount < 1) v.x = Float4(0.0f);
506 if(componentCount < 2) v.y = Float4(0.0f);
507 if(componentCount < 3) v.z = Float4(0.0f);
508 if(componentCount < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(1));
Nicolas Capens157ba262019-12-10 17:49:14 -0500509
510 return v;
511}
512
513void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch)
514{
515 UInt index0 = batch[0];
516 UInt index1 = batch[1];
517 UInt index2 = batch[2];
518 UInt index3 = batch[3];
519
520 UInt cacheIndex0 = index0 & VertexCache::TAG_MASK;
521 UInt cacheIndex1 = index1 & VertexCache::TAG_MASK;
522 UInt cacheIndex2 = index2 & VertexCache::TAG_MASK;
523 UInt cacheIndex3 = index3 & VertexCache::TAG_MASK;
524
525 // We processed a SIMD group of vertices, with the first one being the one that missed the cache tag check.
526 // Write them out in reverse order here and below to ensure the first one is now guaranteed to be in the cache.
527 tagCache[cacheIndex3] = index3;
528 tagCache[cacheIndex2] = index2;
529 tagCache[cacheIndex1] = index1;
530 tagCache[cacheIndex0] = index0;
531
532 auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
533 assert(it != spirvShader->outputBuiltins.end());
534 assert(it->second.SizeInComponents == 4);
535 auto &position = routine.getVariable(it->second.Id);
536
537 Vector4f pos;
538 pos.x = position[it->second.FirstComponent + 0];
539 pos.y = position[it->second.FirstComponent + 1];
540 pos.z = position[it->second.FirstComponent + 2];
541 pos.w = position[it->second.FirstComponent + 3];
542
543 // Projection and viewport transform.
544 Float4 w = As<Float4>(As<Int4>(pos.w) | (As<Int4>(CmpEQ(pos.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
545 Float4 rhw = Float4(1.0f) / w;
546
547 Vector4f proj;
Ben Claytonbc1c0672019-12-17 20:37:37 +0000548 proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData, WxF))));
549 proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData, HxF))));
Nicolas Capens157ba262019-12-10 17:49:14 -0500550 proj.z = pos.z * rhw;
551 proj.w = rhw;
552
553 transpose4x4(pos.x, pos.y, pos.z, pos.w);
554
Ben Claytonbc1c0672019-12-17 20:37:37 +0000555 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, position), 16) = pos.w;
556 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, position), 16) = pos.z;
557 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, position), 16) = pos.y;
558 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, position), 16) = pos.x;
Nicolas Capens157ba262019-12-10 17:49:14 -0500559
560 it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize);
561 if(it != spirvShader->outputBuiltins.end())
562 {
563 ASSERT(it->second.SizeInComponents == 1);
564 auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent];
565
Ben Claytonbc1c0672019-12-17 20:37:37 +0000566 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, pointSize)) = Extract(psize, 3);
567 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, pointSize)) = Extract(psize, 2);
568 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, pointSize)) = Extract(psize, 1);
569 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, pointSize)) = Extract(psize, 0);
Nicolas Capens157ba262019-12-10 17:49:14 -0500570 }
571
572 it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
573 if(it != spirvShader->outputBuiltins.end())
574 {
575 auto count = spirvShader->getNumOutputClipDistances();
576 for(unsigned int i = 0; i < count; i++)
577 {
578 auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
Ben Claytonbc1c0672019-12-17 20:37:37 +0000579 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 3);
580 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 2);
581 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 1);
582 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 0);
Nicolas Capens157ba262019-12-10 17:49:14 -0500583 }
584 }
585
586 it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
587 if(it != spirvShader->outputBuiltins.end())
588 {
589 auto count = spirvShader->getNumOutputCullDistances();
590 for(unsigned int i = 0; i < count; i++)
591 {
592 auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
Ben Claytonbc1c0672019-12-17 20:37:37 +0000593 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 3);
594 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 2);
595 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 1);
596 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 0);
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 }
598 }
599
Ben Claytonbc1c0672019-12-17 20:37:37 +0000600 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
601 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
602 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 8) & 0x0000000FF;
603 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 0) & 0x0000000FF;
Nicolas Capens157ba262019-12-10 17:49:14 -0500604
Ben Claytonbc1c0672019-12-17 20:37:37 +0000605 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullMask)) = -((cullMask >> 3) & 1);
606 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullMask)) = -((cullMask >> 2) & 1);
607 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullMask)) = -((cullMask >> 1) & 1);
608 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullMask)) = -((cullMask >> 0) & 1);
Nicolas Capens157ba262019-12-10 17:49:14 -0500609
610 transpose4x4(proj.x, proj.y, proj.z, proj.w);
611
Ben Claytonbc1c0672019-12-17 20:37:37 +0000612 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, projected), 16) = proj.w;
613 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, projected), 16) = proj.z;
614 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, projected), 16) = proj.y;
615 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, projected), 16) = proj.x;
Nicolas Capens157ba262019-12-10 17:49:14 -0500616
617 for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
618 {
619 if(spirvShader->outputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
620 spirvShader->outputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
621 spirvShader->outputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
622 spirvShader->outputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
623 {
624 Vector4f v;
625 v.x = routine.outputs[i + 0];
626 v.y = routine.outputs[i + 1];
627 v.z = routine.outputs[i + 2];
628 v.w = routine.outputs[i + 3];
629
630 transpose4x4(v.x, v.y, v.z, v.w);
631
Ben Claytonbc1c0672019-12-17 20:37:37 +0000632 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, v[i]), 16) = v.w;
633 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, v[i]), 16) = v.z;
634 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, v[i]), 16) = v.y;
635 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, v[i]), 16) = v.x;
Nicolas Capens157ba262019-12-10 17:49:14 -0500636 }
637 }
638}
639
640void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry)
641{
Ben Claytonbc1c0672019-12-17 20:37:37 +0000642 *Pointer<Int4>(vertex + OFFSET(Vertex, position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, position));
643 *Pointer<Int>(vertex + OFFSET(Vertex, pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, pointSize));
Nicolas Capens157ba262019-12-10 17:49:14 -0500644
Ben Claytonbc1c0672019-12-17 20:37:37 +0000645 *Pointer<Int>(vertex + OFFSET(Vertex, clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, clipFlags));
646 *Pointer<Int>(vertex + OFFSET(Vertex, cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, cullMask));
647 *Pointer<Int4>(vertex + OFFSET(Vertex, projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, projected));
Nicolas Capens157ba262019-12-10 17:49:14 -0500648
649 for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
650 {
651 if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED)
652 {
653 *Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4);
654 }
655 }
656 for(unsigned int i = 0; i < spirvShader->getNumOutputClipDistances(); i++)
657 {
658 *Pointer<Float>(vertex + OFFSET(Vertex, clipDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, clipDistance[i]), 4);
659 }
660 for(unsigned int i = 0; i < spirvShader->getNumOutputCullDistances(); i++)
661 {
662 *Pointer<Float>(vertex + OFFSET(Vertex, cullDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, cullDistance[i]), 4);
663 }
664}
665
666} // namespace sw