Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "VertexRoutine.hpp" |
| 16 | |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 17 | #include "Constants.hpp" |
Nicolas Capens | 8bcd174 | 2019-06-12 11:41:14 -0400 | [diff] [blame] | 18 | #include "SpirvShader.hpp" |
Nicolas Capens | 1d8c8db | 2018-11-05 16:30:42 -0500 | [diff] [blame] | 19 | #include "Device/Renderer.hpp" |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 20 | #include "Device/Vertex.hpp" |
Ben Clayton | 25e06e0 | 2020-02-07 11:19:08 +0000 | [diff] [blame] | 21 | #include "System/Debug.hpp" |
Nicolas Capens | 8bcd174 | 2019-06-12 11:41:14 -0400 | [diff] [blame] | 22 | #include "System/Half.hpp" |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 23 | |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 24 | namespace sw { |
| 25 | |
| 26 | VertexRoutine::VertexRoutine( |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 27 | const VertexProcessor::State &state, |
| 28 | vk::PipelineLayout const *pipelineLayout, |
| 29 | SpirvShader const *spirvShader) |
| 30 | : routine(pipelineLayout) |
| 31 | , state(state) |
| 32 | , spirvShader(spirvShader) |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 33 | { |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 34 | spirvShader->emitProlog(&routine); |
| 35 | } |
| 36 | |
| 37 | VertexRoutine::~VertexRoutine() |
| 38 | { |
| 39 | } |
| 40 | |
| 41 | void VertexRoutine::generate() |
| 42 | { |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 43 | Pointer<Byte> cache = task + OFFSET(VertexTask, vertexCache); |
| 44 | Pointer<Byte> vertexCache = cache + OFFSET(VertexCache, vertex); |
| 45 | Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache, tag)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 46 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 47 | UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask, vertexCount)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 48 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 49 | constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, constants)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 50 | |
| 51 | // Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer. |
| 52 | // On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache |
| 53 | // in reverse order to guarantee that the first one doesn't get evicted and can be written out. |
| 54 | |
| 55 | Do |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 56 | { |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 57 | UInt index = *batch; |
| 58 | UInt cacheIndex = index & VertexCache::TAG_MASK; |
| 59 | |
| 60 | If(tagCache[cacheIndex] != index) |
| 61 | { |
| 62 | readInput(batch); |
| 63 | program(batch, vertexCount); |
| 64 | computeClipFlags(); |
| 65 | computeCullMask(); |
| 66 | |
| 67 | writeCache(vertexCache, tagCache, batch); |
| 68 | } |
| 69 | |
| 70 | Pointer<Byte> cacheEntry = vertexCache + cacheIndex * UInt((int)sizeof(Vertex)); |
| 71 | |
| 72 | // For points, vertexCount is 1 per primitive, so duplicate vertex for all 3 vertices of the primitive |
| 73 | for(int i = 0; i < (state.isPoint ? 3 : 1); i++) |
| 74 | { |
| 75 | writeVertex(vertex, cacheEntry); |
| 76 | vertex += sizeof(Vertex); |
| 77 | } |
| 78 | |
| 79 | batch = Pointer<UInt>(Pointer<Byte>(batch) + sizeof(uint32_t)); |
| 80 | vertexCount--; |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 81 | } |
Ben Clayton | 71af593 | 2019-12-11 10:05:24 +0000 | [diff] [blame] | 82 | Until(vertexCount == 0); |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 83 | |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 84 | Return(); |
| 85 | } |
| 86 | |
| 87 | void VertexRoutine::readInput(Pointer<UInt> &batch) |
| 88 | { |
| 89 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4) |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 90 | { |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 91 | if(spirvShader->inputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
| 92 | spirvShader->inputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
| 93 | spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
| 94 | spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED) |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 95 | { |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 96 | Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * (i / 4)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 97 | UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4)); |
| 98 | Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex)); |
| 99 | UInt robustnessSize(0); |
| 100 | if(state.robustBufferAccess) |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 101 | { |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 102 | robustnessSize = *Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) * (i / 4)); |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 103 | } |
| 104 | |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 105 | auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize, baseVertex); |
| 106 | routine.inputs[i + 0] = value.x; |
| 107 | routine.inputs[i + 1] = value.y; |
| 108 | routine.inputs[i + 2] = value.z; |
| 109 | routine.inputs[i + 3] = value.w; |
Ben Clayton | 9ad035b | 2019-08-09 23:44:09 +0100 | [diff] [blame] | 110 | } |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 111 | } |
Nicolas Capens | 68a8238 | 2018-10-02 13:16:55 -0400 | [diff] [blame] | 112 | } |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 113 | |
| 114 | void VertexRoutine::computeClipFlags() |
| 115 | { |
| 116 | auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); |
| 117 | assert(it != spirvShader->outputBuiltins.end()); |
| 118 | assert(it->second.SizeInComponents == 4); |
| 119 | auto &pos = routine.getVariable(it->second.Id); |
| 120 | auto posX = pos[it->second.FirstComponent + 0]; |
| 121 | auto posY = pos[it->second.FirstComponent + 1]; |
| 122 | auto posZ = pos[it->second.FirstComponent + 2]; |
| 123 | auto posW = pos[it->second.FirstComponent + 3]; |
| 124 | |
| 125 | Int4 maxX = CmpLT(posW, posX); |
| 126 | Int4 maxY = CmpLT(posW, posY); |
| 127 | Int4 maxZ = CmpLT(posW, posZ); |
| 128 | Int4 minX = CmpNLE(-posW, posX); |
| 129 | Int4 minY = CmpNLE(-posW, posY); |
| 130 | Int4 minZ = CmpNLE(Float4(0.0f), posZ); |
| 131 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 132 | clipFlags = Pointer<Int>(constants + OFFSET(Constants, maxX))[SignMask(maxX)]; |
| 133 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxY))[SignMask(maxY)]; |
| 134 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxZ))[SignMask(maxZ)]; |
| 135 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minX))[SignMask(minX)]; |
| 136 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minY))[SignMask(minY)]; |
| 137 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minZ))[SignMask(minZ)]; |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 138 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 139 | Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants, maxPos))); |
| 140 | Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants, maxPos))); |
| 141 | Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants, maxPos))); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 142 | |
| 143 | Int4 finiteXYZ = finiteX & finiteY & finiteZ; |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 144 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants, fini))[SignMask(finiteXYZ)]; |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 145 | } |
| 146 | |
| 147 | void VertexRoutine::computeCullMask() |
| 148 | { |
| 149 | cullMask = Int(15); |
| 150 | |
| 151 | auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance); |
Nicolas Capens | 81bc9d9 | 2019-12-16 15:05:57 -0500 | [diff] [blame] | 152 | if(it != spirvShader->outputBuiltins.end()) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 153 | { |
| 154 | auto count = spirvShader->getNumOutputCullDistances(); |
Nicolas Capens | 81bc9d9 | 2019-12-16 15:05:57 -0500 | [diff] [blame] | 155 | for(uint32_t i = 0; i < count; i++) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 156 | { |
| 157 | auto const &distance = routine.getVariable(it->second.Id)[it->second.FirstComponent + i]; |
| 158 | auto mask = SignMask(CmpGE(distance, SIMD::Float(0))); |
| 159 | cullMask &= mask; |
| 160 | } |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch, |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 165 | bool robustBufferAccess, UInt &robustnessSize, Int baseVertex) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 166 | { |
| 167 | Vector4f v; |
| 168 | // Because of the following rule in the Vulkan spec, we do not care if a very large negative |
| 169 | // baseVertex would overflow all the way back into a valid region of the index buffer: |
| 170 | // "Out-of-bounds buffer loads will return any of the following values : |
| 171 | // - Values from anywhere within the memory range(s) bound to the buffer (possibly including |
| 172 | // bytes of memory past the end of the buffer, up to the end of the bound range)." |
| 173 | UInt4 offsets = (*Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4(baseVertex))) * UInt4(stride); |
| 174 | |
| 175 | Pointer<Byte> source0 = buffer + offsets.x; |
| 176 | Pointer<Byte> source1 = buffer + offsets.y; |
| 177 | Pointer<Byte> source2 = buffer + offsets.z; |
| 178 | Pointer<Byte> source3 = buffer + offsets.w; |
| 179 | |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 180 | vk::Format format(stream.format); |
| 181 | |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 182 | UInt4 zero(0); |
Nicolas Capens | 81bc9d9 | 2019-12-16 15:05:57 -0500 | [diff] [blame] | 183 | if(robustBufferAccess) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 184 | { |
| 185 | // TODO(b/141124876): Optimize for wide-vector gather operations. |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 186 | UInt4 limits = offsets + UInt4(format.bytes()); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 187 | Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero); |
| 188 | source0 = IfThenElse(limits.x <= robustnessSize, source0, zeroSource); |
| 189 | source1 = IfThenElse(limits.y <= robustnessSize, source1, zeroSource); |
| 190 | source2 = IfThenElse(limits.z <= robustnessSize, source2, zeroSource); |
| 191 | source3 = IfThenElse(limits.w <= robustnessSize, source3, zeroSource); |
| 192 | } |
| 193 | |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 194 | int componentCount = format.componentCount(); |
| 195 | bool normalized = !format.isUnnormalizedInteger(); |
| 196 | bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || normalized; |
| 197 | bool bgra = false; |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 198 | |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 199 | switch(stream.format) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 200 | { |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 201 | case VK_FORMAT_R32_SFLOAT: |
| 202 | case VK_FORMAT_R32G32_SFLOAT: |
| 203 | case VK_FORMAT_R32G32B32_SFLOAT: |
| 204 | case VK_FORMAT_R32G32B32A32_SFLOAT: |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 205 | { |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 206 | if(componentCount == 0) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 207 | { |
| 208 | // Null stream, all default components |
| 209 | } |
| 210 | else |
| 211 | { |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 212 | if(componentCount == 1) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 213 | { |
| 214 | v.x.x = *Pointer<Float>(source0); |
| 215 | v.x.y = *Pointer<Float>(source1); |
| 216 | v.x.z = *Pointer<Float>(source2); |
| 217 | v.x.w = *Pointer<Float>(source3); |
| 218 | } |
| 219 | else |
| 220 | { |
| 221 | v.x = *Pointer<Float4>(source0); |
| 222 | v.y = *Pointer<Float4>(source1); |
| 223 | v.z = *Pointer<Float4>(source2); |
| 224 | v.w = *Pointer<Float4>(source3); |
| 225 | |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 226 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 227 | } |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 228 | } |
| 229 | } |
| 230 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 231 | case VK_FORMAT_B8G8R8A8_UNORM: |
| 232 | bgra = true; |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 233 | // [[fallthrough]] |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 234 | case VK_FORMAT_R8_UNORM: |
| 235 | case VK_FORMAT_R8G8_UNORM: |
| 236 | case VK_FORMAT_R8G8B8A8_UNORM: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 237 | case VK_FORMAT_A8B8G8R8_UNORM_PACK32: |
| 238 | v.x = Float4(*Pointer<Byte4>(source0)); |
| 239 | v.y = Float4(*Pointer<Byte4>(source1)); |
| 240 | v.z = Float4(*Pointer<Byte4>(source2)); |
| 241 | v.w = Float4(*Pointer<Byte4>(source3)); |
| 242 | |
| 243 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
| 244 | |
| 245 | if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); |
| 246 | if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); |
| 247 | if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); |
| 248 | if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte)); |
| 249 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 250 | case VK_FORMAT_R8_UINT: |
| 251 | case VK_FORMAT_R8G8_UINT: |
| 252 | case VK_FORMAT_R8G8B8A8_UINT: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 253 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 254 | v.x = As<Float4>(Int4(*Pointer<Byte4>(source0))); |
| 255 | v.y = As<Float4>(Int4(*Pointer<Byte4>(source1))); |
| 256 | v.z = As<Float4>(Int4(*Pointer<Byte4>(source2))); |
| 257 | v.w = As<Float4>(Int4(*Pointer<Byte4>(source3))); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 258 | |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 259 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 260 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 261 | case VK_FORMAT_R8_SNORM: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 262 | case VK_FORMAT_R8G8_SNORM: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 263 | case VK_FORMAT_R8G8B8A8_SNORM: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 264 | case VK_FORMAT_A8B8G8R8_SNORM_PACK32: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 265 | v.x = Float4(*Pointer<SByte4>(source0)); |
| 266 | v.y = Float4(*Pointer<SByte4>(source1)); |
| 267 | v.z = Float4(*Pointer<SByte4>(source2)); |
| 268 | v.w = Float4(*Pointer<SByte4>(source3)); |
| 269 | |
| 270 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
| 271 | |
Corentin Wallez | 5c883d3 | 2020-04-08 15:26:26 +0200 | [diff] [blame] | 272 | if(componentCount >= 1) v.x = Max(v.x * *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)), Float4(-1.0f)); |
| 273 | if(componentCount >= 2) v.y = Max(v.y * *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)), Float4(-1.0f)); |
| 274 | if(componentCount >= 3) v.z = Max(v.z * *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)), Float4(-1.0f)); |
| 275 | if(componentCount >= 4) v.w = Max(v.w * *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte)), Float4(-1.0f)); |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 276 | break; |
| 277 | case VK_FORMAT_R8_SINT: |
| 278 | case VK_FORMAT_R8G8_SINT: |
| 279 | case VK_FORMAT_R8G8B8A8_SINT: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 280 | case VK_FORMAT_A8B8G8R8_SINT_PACK32: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 281 | v.x = As<Float4>(Int4(*Pointer<SByte4>(source0))); |
| 282 | v.y = As<Float4>(Int4(*Pointer<SByte4>(source1))); |
| 283 | v.z = As<Float4>(Int4(*Pointer<SByte4>(source2))); |
| 284 | v.w = As<Float4>(Int4(*Pointer<SByte4>(source3))); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 285 | |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 286 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 287 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 288 | case VK_FORMAT_R16_SNORM: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 289 | case VK_FORMAT_R16G16_SNORM: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 290 | case VK_FORMAT_R16G16B16A16_SNORM: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 291 | v.x = Float4(*Pointer<Short4>(source0)); |
| 292 | v.y = Float4(*Pointer<Short4>(source1)); |
| 293 | v.z = Float4(*Pointer<Short4>(source2)); |
| 294 | v.w = Float4(*Pointer<Short4>(source3)); |
| 295 | |
| 296 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
| 297 | |
Corentin Wallez | 5c883d3 | 2020-04-08 15:26:26 +0200 | [diff] [blame] | 298 | if(componentCount >= 1) v.x = Max(v.x * *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)), Float4(-1.0f)); |
| 299 | if(componentCount >= 2) v.y = Max(v.y * *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)), Float4(-1.0f)); |
| 300 | if(componentCount >= 3) v.z = Max(v.z * *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)), Float4(-1.0f)); |
| 301 | if(componentCount >= 4) v.w = Max(v.w * *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort)), Float4(-1.0f)); |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 302 | break; |
| 303 | case VK_FORMAT_R16_SINT: |
| 304 | case VK_FORMAT_R16G16_SINT: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 305 | case VK_FORMAT_R16G16B16A16_SINT: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 306 | v.x = As<Float4>(Int4(*Pointer<Short4>(source0))); |
| 307 | v.y = As<Float4>(Int4(*Pointer<Short4>(source1))); |
| 308 | v.z = As<Float4>(Int4(*Pointer<Short4>(source2))); |
| 309 | v.w = As<Float4>(Int4(*Pointer<Short4>(source3))); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 310 | |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 311 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 312 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 313 | case VK_FORMAT_R16_UNORM: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 314 | case VK_FORMAT_R16G16_UNORM: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 315 | case VK_FORMAT_R16G16B16A16_UNORM: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 316 | v.x = Float4(*Pointer<UShort4>(source0)); |
| 317 | v.y = Float4(*Pointer<UShort4>(source1)); |
| 318 | v.z = Float4(*Pointer<UShort4>(source2)); |
| 319 | v.w = Float4(*Pointer<UShort4>(source3)); |
| 320 | |
| 321 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
| 322 | |
| 323 | if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort)); |
| 324 | if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort)); |
| 325 | if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort)); |
| 326 | if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort)); |
| 327 | break; |
| 328 | case VK_FORMAT_R16_UINT: |
| 329 | case VK_FORMAT_R16G16_UINT: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 330 | case VK_FORMAT_R16G16B16A16_UINT: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 331 | v.x = As<Float4>(Int4(*Pointer<UShort4>(source0))); |
| 332 | v.y = As<Float4>(Int4(*Pointer<UShort4>(source1))); |
| 333 | v.z = As<Float4>(Int4(*Pointer<UShort4>(source2))); |
| 334 | v.w = As<Float4>(Int4(*Pointer<UShort4>(source3))); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 335 | |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 336 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 337 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 338 | case VK_FORMAT_R32_SINT: |
| 339 | case VK_FORMAT_R32G32_SINT: |
| 340 | case VK_FORMAT_R32G32B32_SINT: |
| 341 | case VK_FORMAT_R32G32B32A32_SINT: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 342 | v.x = *Pointer<Float4>(source0); |
| 343 | v.y = *Pointer<Float4>(source1); |
| 344 | v.z = *Pointer<Float4>(source2); |
| 345 | v.w = *Pointer<Float4>(source3); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 346 | |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 347 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 348 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 349 | case VK_FORMAT_R32_UINT: |
| 350 | case VK_FORMAT_R32G32_UINT: |
| 351 | case VK_FORMAT_R32G32B32_UINT: |
| 352 | case VK_FORMAT_R32G32B32A32_UINT: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 353 | v.x = *Pointer<Float4>(source0); |
| 354 | v.y = *Pointer<Float4>(source1); |
| 355 | v.z = *Pointer<Float4>(source2); |
| 356 | v.w = *Pointer<Float4>(source3); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 357 | |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 358 | transpose4xN(v.x, v.y, v.z, v.w, componentCount); |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 359 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 360 | case VK_FORMAT_R16_SFLOAT: |
| 361 | case VK_FORMAT_R16G16_SFLOAT: |
| 362 | case VK_FORMAT_R16G16B16A16_SFLOAT: |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 363 | { |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 364 | if(componentCount >= 1) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 365 | { |
| 366 | UShort x0 = *Pointer<UShort>(source0 + 0); |
| 367 | UShort x1 = *Pointer<UShort>(source1 + 0); |
| 368 | UShort x2 = *Pointer<UShort>(source2 + 0); |
| 369 | UShort x3 = *Pointer<UShort>(source3 + 0); |
| 370 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 371 | v.x.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x0) * 4); |
| 372 | v.x.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x1) * 4); |
| 373 | v.x.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x2) * 4); |
| 374 | v.x.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x3) * 4); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 375 | } |
| 376 | |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 377 | if(componentCount >= 2) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 378 | { |
| 379 | UShort y0 = *Pointer<UShort>(source0 + 2); |
| 380 | UShort y1 = *Pointer<UShort>(source1 + 2); |
| 381 | UShort y2 = *Pointer<UShort>(source2 + 2); |
| 382 | UShort y3 = *Pointer<UShort>(source3 + 2); |
| 383 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 384 | v.y.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y0) * 4); |
| 385 | v.y.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y1) * 4); |
| 386 | v.y.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y2) * 4); |
| 387 | v.y.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y3) * 4); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 388 | } |
| 389 | |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 390 | if(componentCount >= 3) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 391 | { |
| 392 | UShort z0 = *Pointer<UShort>(source0 + 4); |
| 393 | UShort z1 = *Pointer<UShort>(source1 + 4); |
| 394 | UShort z2 = *Pointer<UShort>(source2 + 4); |
| 395 | UShort z3 = *Pointer<UShort>(source3 + 4); |
| 396 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 397 | v.z.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z0) * 4); |
| 398 | v.z.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z1) * 4); |
| 399 | v.z.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z2) * 4); |
| 400 | v.z.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z3) * 4); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 401 | } |
| 402 | |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 403 | if(componentCount >= 4) |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 404 | { |
| 405 | UShort w0 = *Pointer<UShort>(source0 + 6); |
| 406 | UShort w1 = *Pointer<UShort>(source1 + 6); |
| 407 | UShort w2 = *Pointer<UShort>(source2 + 6); |
| 408 | UShort w3 = *Pointer<UShort>(source3 + 6); |
| 409 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 410 | v.w.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w0) * 4); |
| 411 | v.w.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w1) * 4); |
| 412 | v.w.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w2) * 4); |
| 413 | v.w.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w3) * 4); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 414 | } |
| 415 | } |
| 416 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 417 | case VK_FORMAT_A2R10G10B10_SNORM_PACK32: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 418 | bgra = true; |
| 419 | // [[fallthrough]] |
| 420 | case VK_FORMAT_A2B10G10R10_SNORM_PACK32: |
| 421 | { |
| 422 | Int4 src; |
| 423 | src = Insert(src, *Pointer<Int>(source0), 0); |
| 424 | src = Insert(src, *Pointer<Int>(source1), 1); |
| 425 | src = Insert(src, *Pointer<Int>(source2), 2); |
| 426 | src = Insert(src, *Pointer<Int>(source3), 3); |
| 427 | v.x = Float4((src << 22) >> 22); |
| 428 | v.y = Float4((src << 12) >> 22); |
| 429 | v.z = Float4((src << 02) >> 22); |
| 430 | v.w = Float4(src >> 30); |
| 431 | |
| 432 | v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
| 433 | v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
| 434 | v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
| 435 | v.w = Max(v.w, Float4(-1.0f)); |
| 436 | } |
| 437 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 438 | case VK_FORMAT_A2R10G10B10_SINT_PACK32: |
| 439 | bgra = true; |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 440 | // [[fallthrough]] |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 441 | case VK_FORMAT_A2B10G10R10_SINT_PACK32: |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 442 | { |
| 443 | Int4 src; |
| 444 | src = Insert(src, *Pointer<Int>(source0), 0); |
| 445 | src = Insert(src, *Pointer<Int>(source1), 1); |
| 446 | src = Insert(src, *Pointer<Int>(source2), 2); |
| 447 | src = Insert(src, *Pointer<Int>(source3), 3); |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 448 | v.x = As<Float4>((src << 22) >> 22); |
| 449 | v.y = As<Float4>((src << 12) >> 22); |
| 450 | v.z = As<Float4>((src << 02) >> 22); |
| 451 | v.w = As<Float4>(src >> 30); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 452 | } |
| 453 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 454 | case VK_FORMAT_A2R10G10B10_UNORM_PACK32: |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 455 | bgra = true; |
| 456 | // [[fallthrough]] |
| 457 | case VK_FORMAT_A2B10G10R10_UNORM_PACK32: |
| 458 | { |
| 459 | Int4 src; |
| 460 | src = Insert(src, *Pointer<Int>(source0), 0); |
| 461 | src = Insert(src, *Pointer<Int>(source1), 1); |
| 462 | src = Insert(src, *Pointer<Int>(source2), 2); |
| 463 | src = Insert(src, *Pointer<Int>(source3), 3); |
| 464 | |
| 465 | v.x = Float4(src & Int4(0x3FF)); |
| 466 | v.y = Float4((src >> 10) & Int4(0x3FF)); |
| 467 | v.z = Float4((src >> 20) & Int4(0x3FF)); |
| 468 | v.w = Float4((src >> 30) & Int4(0x3)); |
| 469 | |
| 470 | v.x *= Float4(1.0f / 0x3FF); |
| 471 | v.y *= Float4(1.0f / 0x3FF); |
| 472 | v.z *= Float4(1.0f / 0x3FF); |
| 473 | v.w *= Float4(1.0f / 0x3); |
| 474 | } |
| 475 | break; |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 476 | case VK_FORMAT_A2R10G10B10_UINT_PACK32: |
| 477 | bgra = true; |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 478 | // [[fallthrough]] |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 479 | case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 480 | { |
| 481 | Int4 src; |
| 482 | src = Insert(src, *Pointer<Int>(source0), 0); |
| 483 | src = Insert(src, *Pointer<Int>(source1), 1); |
| 484 | src = Insert(src, *Pointer<Int>(source2), 2); |
| 485 | src = Insert(src, *Pointer<Int>(source3), 3); |
| 486 | |
Chris Forbes | 12e857a | 2020-03-23 09:43:48 -0700 | [diff] [blame] | 487 | v.x = As<Float4>(src & Int4(0x3FF)); |
| 488 | v.y = As<Float4>((src >> 10) & Int4(0x3FF)); |
| 489 | v.z = As<Float4>((src >> 20) & Int4(0x3FF)); |
| 490 | v.w = As<Float4>((src >> 30) & Int4(0x3)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 491 | } |
| 492 | break; |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 493 | default: |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 494 | UNSUPPORTED("stream.format %d", int(stream.format)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 495 | } |
| 496 | |
Alexis Hetu | b766e5e | 2020-01-20 11:40:28 -0500 | [diff] [blame] | 497 | if(bgra) |
| 498 | { |
| 499 | // Swap red and blue |
| 500 | Float4 t = v.x; |
| 501 | v.x = v.z; |
| 502 | v.z = t; |
| 503 | } |
| 504 | |
| 505 | if(componentCount < 1) v.x = Float4(0.0f); |
| 506 | if(componentCount < 2) v.y = Float4(0.0f); |
| 507 | if(componentCount < 3) v.z = Float4(0.0f); |
| 508 | if(componentCount < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(1)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 509 | |
| 510 | return v; |
| 511 | } |
| 512 | |
| 513 | void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch) |
| 514 | { |
| 515 | UInt index0 = batch[0]; |
| 516 | UInt index1 = batch[1]; |
| 517 | UInt index2 = batch[2]; |
| 518 | UInt index3 = batch[3]; |
| 519 | |
| 520 | UInt cacheIndex0 = index0 & VertexCache::TAG_MASK; |
| 521 | UInt cacheIndex1 = index1 & VertexCache::TAG_MASK; |
| 522 | UInt cacheIndex2 = index2 & VertexCache::TAG_MASK; |
| 523 | UInt cacheIndex3 = index3 & VertexCache::TAG_MASK; |
| 524 | |
| 525 | // We processed a SIMD group of vertices, with the first one being the one that missed the cache tag check. |
| 526 | // Write them out in reverse order here and below to ensure the first one is now guaranteed to be in the cache. |
| 527 | tagCache[cacheIndex3] = index3; |
| 528 | tagCache[cacheIndex2] = index2; |
| 529 | tagCache[cacheIndex1] = index1; |
| 530 | tagCache[cacheIndex0] = index0; |
| 531 | |
| 532 | auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); |
| 533 | assert(it != spirvShader->outputBuiltins.end()); |
| 534 | assert(it->second.SizeInComponents == 4); |
| 535 | auto &position = routine.getVariable(it->second.Id); |
| 536 | |
| 537 | Vector4f pos; |
| 538 | pos.x = position[it->second.FirstComponent + 0]; |
| 539 | pos.y = position[it->second.FirstComponent + 1]; |
| 540 | pos.z = position[it->second.FirstComponent + 2]; |
| 541 | pos.w = position[it->second.FirstComponent + 3]; |
| 542 | |
| 543 | // Projection and viewport transform. |
| 544 | Float4 w = As<Float4>(As<Int4>(pos.w) | (As<Int4>(CmpEQ(pos.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); |
| 545 | Float4 rhw = Float4(1.0f) / w; |
| 546 | |
| 547 | Vector4f proj; |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 548 | proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData, WxF)))); |
| 549 | proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData, Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData, HxF)))); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 550 | proj.z = pos.z * rhw; |
| 551 | proj.w = rhw; |
| 552 | |
| 553 | transpose4x4(pos.x, pos.y, pos.z, pos.w); |
| 554 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 555 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, position), 16) = pos.w; |
| 556 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, position), 16) = pos.z; |
| 557 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, position), 16) = pos.y; |
| 558 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, position), 16) = pos.x; |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 559 | |
| 560 | it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize); |
| 561 | if(it != spirvShader->outputBuiltins.end()) |
| 562 | { |
| 563 | ASSERT(it->second.SizeInComponents == 1); |
| 564 | auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent]; |
| 565 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 566 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, pointSize)) = Extract(psize, 3); |
| 567 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, pointSize)) = Extract(psize, 2); |
| 568 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, pointSize)) = Extract(psize, 1); |
| 569 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, pointSize)) = Extract(psize, 0); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 570 | } |
| 571 | |
| 572 | it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance); |
| 573 | if(it != spirvShader->outputBuiltins.end()) |
| 574 | { |
| 575 | auto count = spirvShader->getNumOutputClipDistances(); |
| 576 | for(unsigned int i = 0; i < count; i++) |
| 577 | { |
| 578 | auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i]; |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 579 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 3); |
| 580 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 2); |
| 581 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 1); |
| 582 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 0); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 583 | } |
| 584 | } |
| 585 | |
| 586 | it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance); |
| 587 | if(it != spirvShader->outputBuiltins.end()) |
| 588 | { |
| 589 | auto count = spirvShader->getNumOutputCullDistances(); |
| 590 | for(unsigned int i = 0; i < count; i++) |
| 591 | { |
| 592 | auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i]; |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 593 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 3); |
| 594 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 2); |
| 595 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 1); |
| 596 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 0); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 597 | } |
| 598 | } |
| 599 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 600 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 24) & 0x0000000FF; |
| 601 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 16) & 0x0000000FF; |
| 602 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 8) & 0x0000000FF; |
| 603 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipFlags)) = (clipFlags >> 0) & 0x0000000FF; |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 604 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 605 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullMask)) = -((cullMask >> 3) & 1); |
| 606 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullMask)) = -((cullMask >> 2) & 1); |
| 607 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullMask)) = -((cullMask >> 1) & 1); |
| 608 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullMask)) = -((cullMask >> 0) & 1); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 609 | |
| 610 | transpose4x4(proj.x, proj.y, proj.z, proj.w); |
| 611 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 612 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, projected), 16) = proj.w; |
| 613 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, projected), 16) = proj.z; |
| 614 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, projected), 16) = proj.y; |
| 615 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, projected), 16) = proj.x; |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 616 | |
| 617 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4) |
| 618 | { |
| 619 | if(spirvShader->outputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
| 620 | spirvShader->outputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
| 621 | spirvShader->outputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
| 622 | spirvShader->outputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED) |
| 623 | { |
| 624 | Vector4f v; |
| 625 | v.x = routine.outputs[i + 0]; |
| 626 | v.y = routine.outputs[i + 1]; |
| 627 | v.z = routine.outputs[i + 2]; |
| 628 | v.w = routine.outputs[i + 3]; |
| 629 | |
| 630 | transpose4x4(v.x, v.y, v.z, v.w); |
| 631 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 632 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, v[i]), 16) = v.w; |
| 633 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, v[i]), 16) = v.z; |
| 634 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, v[i]), 16) = v.y; |
| 635 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, v[i]), 16) = v.x; |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 636 | } |
| 637 | } |
| 638 | } |
| 639 | |
| 640 | void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry) |
| 641 | { |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 642 | *Pointer<Int4>(vertex + OFFSET(Vertex, position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, position)); |
| 643 | *Pointer<Int>(vertex + OFFSET(Vertex, pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, pointSize)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 644 | |
Ben Clayton | bc1c067 | 2019-12-17 20:37:37 +0000 | [diff] [blame] | 645 | *Pointer<Int>(vertex + OFFSET(Vertex, clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, clipFlags)); |
| 646 | *Pointer<Int>(vertex + OFFSET(Vertex, cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, cullMask)); |
| 647 | *Pointer<Int4>(vertex + OFFSET(Vertex, projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, projected)); |
Nicolas Capens | 157ba26 | 2019-12-10 17:49:14 -0500 | [diff] [blame] | 648 | |
| 649 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++) |
| 650 | { |
| 651 | if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED) |
| 652 | { |
| 653 | *Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4); |
| 654 | } |
| 655 | } |
| 656 | for(unsigned int i = 0; i < spirvShader->getNumOutputClipDistances(); i++) |
| 657 | { |
| 658 | *Pointer<Float>(vertex + OFFSET(Vertex, clipDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, clipDistance[i]), 4); |
| 659 | } |
| 660 | for(unsigned int i = 0; i < spirvShader->getNumOutputCullDistances(); i++) |
| 661 | { |
| 662 | *Pointer<Float>(vertex + OFFSET(Vertex, cullDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, cullDistance[i]), 4); |
| 663 | } |
| 664 | } |
| 665 | |
| 666 | } // namespace sw |