blob: 6d8e8909f0d00f9c8f08ff55e779ec8802100293 [file] [log] [blame]
Ben Claytonf2be26a2019-03-08 12:02:05 +00001// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "ComputeProgram.hpp"
Chris Forbes548e3662019-04-25 10:00:06 -070016#include "Constants.hpp"
Ben Claytonf2be26a2019-03-08 12:02:05 +000017
Ben Clayton25e06e02020-02-07 11:19:08 +000018#include "System/Debug.hpp"
Ben Claytonf2be26a2019-03-08 12:02:05 +000019#include "Vulkan/VkPipelineLayout.hpp"
20
Ben Claytone693b622019-09-05 12:48:37 +010021#include "marl/defer.h"
22#include "marl/trace.h"
23#include "marl/waitgroup.h"
Ben Claytonf7b7b702019-08-27 10:23:29 +010024
Ben Claytonecfeede2019-05-08 08:51:01 +010025#include <queue>
26
Nicolas Capens157ba262019-12-10 17:49:14 -050027namespace {
28
Ben Claytonbc1c0672019-12-17 20:37:37 +000029enum
30{
31 X,
32 Y,
33 Z
34};
Nicolas Capens157ba262019-12-10 17:49:14 -050035
36} // anonymous namespace
37
38namespace sw {
39
40ComputeProgram::ComputeProgram(SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets)
Ben Claytonbc1c0672019-12-17 20:37:37 +000041 : shader(shader)
42 , pipelineLayout(pipelineLayout)
43 , descriptorSets(descriptorSets)
Ben Claytonc2bb50b2019-03-13 14:28:32 +000044{
Nicolas Capens157ba262019-12-10 17:49:14 -050045}
Ben Claytonc2bb50b2019-03-13 14:28:32 +000046
Nicolas Capens157ba262019-12-10 17:49:14 -050047ComputeProgram::~ComputeProgram()
Ben Claytonf2be26a2019-03-08 12:02:05 +000048{
Nicolas Capens157ba262019-12-10 17:49:14 -050049}
50
51void ComputeProgram::generate()
52{
53 MARL_SCOPED_EVENT("ComputeProgram::generate");
54
55 SpirvRoutine routine(pipelineLayout);
56 shader->emitProlog(&routine);
57 emit(&routine);
58 shader->emitEpilog(&routine);
59}
60
Ben Claytonbc1c0672019-12-17 20:37:37 +000061void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3])
Nicolas Capens157ba262019-12-10 17:49:14 -050062{
Ben Clayton5beaef92019-12-03 12:23:35 +000063 // TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff
64 // they are ever going to be read.
65 routine->numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
66 routine->workgroupID = Insert(Insert(Insert(Int4(0), workgroupID[X], X), workgroupID[Y], Y), workgroupID[Z], Z);
67 routine->workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
68 routine->subgroupsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, subgroupsPerWorkgroup));
69 routine->invocationsPerSubgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerSubgroup));
70
Ben Claytonbc1c0672019-12-17 20:37:37 +000071 routine->setInputBuiltin(shader, spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens81bc9d92019-12-16 15:05:57 -050072 for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
Ben Claytonf2be26a2019-03-08 12:02:05 +000073 {
Nicolas Capens157ba262019-12-10 17:49:14 -050074 value[builtin.FirstComponent + component] =
Ben Clayton5beaef92019-12-03 12:23:35 +000075 As<SIMD::Float>(SIMD::Int(Extract(routine->numWorkgroups, component)));
Ben Clayton13dcbec2019-05-08 08:43:55 +010076 }
Nicolas Capens157ba262019-12-10 17:49:14 -050077 });
Ben Clayton13dcbec2019-05-08 08:43:55 +010078
Ben Claytonbc1c0672019-12-17 20:37:37 +000079 routine->setInputBuiltin(shader, spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens81bc9d92019-12-16 15:05:57 -050080 for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
Ben Clayton13dcbec2019-05-08 08:43:55 +010081 {
Nicolas Capens157ba262019-12-10 17:49:14 -050082 value[builtin.FirstComponent + component] =
Ben Claytonbc1c0672019-12-17 20:37:37 +000083 As<SIMD::Float>(SIMD::Int(workgroupID[component]));
Ben Claytonf2be26a2019-03-08 12:02:05 +000084 }
Nicolas Capens157ba262019-12-10 17:49:14 -050085 });
86
Ben Claytonbc1c0672019-12-17 20:37:37 +000087 routine->setInputBuiltin(shader, spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens81bc9d92019-12-16 15:05:57 -050088 for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
Nicolas Capens157ba262019-12-10 17:49:14 -050089 {
90 value[builtin.FirstComponent + component] =
Ben Clayton5beaef92019-12-03 12:23:35 +000091 As<SIMD::Float>(SIMD::Int(Extract(routine->workgroupSize, component)));
Nicolas Capens157ba262019-12-10 17:49:14 -050092 }
93 });
94
Ben Claytonbc1c0672019-12-17 20:37:37 +000095 routine->setInputBuiltin(shader, spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens157ba262019-12-10 17:49:14 -050096 ASSERT(builtin.SizeInComponents == 1);
Ben Clayton5beaef92019-12-03 12:23:35 +000097 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine->subgroupsPerWorkgroup));
Nicolas Capens157ba262019-12-10 17:49:14 -050098 });
99
Ben Claytonbc1c0672019-12-17 20:37:37 +0000100 routine->setInputBuiltin(shader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens157ba262019-12-10 17:49:14 -0500101 ASSERT(builtin.SizeInComponents == 1);
Ben Clayton5beaef92019-12-03 12:23:35 +0000102 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine->invocationsPerSubgroup));
Nicolas Capens157ba262019-12-10 17:49:14 -0500103 });
104
105 routine->setImmutableInputBuiltins(shader);
106}
107
Ben Claytonbc1c0672019-12-17 20:37:37 +0000108void ComputeProgram::setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3], SIMD::Int localInvocationIndex, Int subgroupIndex)
Nicolas Capens157ba262019-12-10 17:49:14 -0500109{
110 Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
111 Int4 workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
112
113 // TODO: Fix Int4 swizzles so we can just use workgroupSize.x, workgroupSize.y.
114 Int workgroupSizeX = Extract(workgroupSize, X);
115 Int workgroupSizeY = Extract(workgroupSize, Y);
116
117 SIMD::Int localInvocationID[3];
118 {
119 SIMD::Int idx = localInvocationIndex;
120 localInvocationID[Z] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000121 idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo
Nicolas Capens157ba262019-12-10 17:49:14 -0500122 localInvocationID[Y] = idx / SIMD::Int(workgroupSizeX);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000123 idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX); // modulo
Nicolas Capens157ba262019-12-10 17:49:14 -0500124 localInvocationID[X] = idx;
Ben Claytonf2be26a2019-03-08 12:02:05 +0000125 }
126
Ben Clayton5beaef92019-12-03 12:23:35 +0000127 Int4 wgID = Insert(Insert(Insert(SIMD::Int(0), workgroupID[X], X), workgroupID[Y], Y), workgroupID[Z], Z);
128 auto localBase = workgroupSize * wgID;
129 SIMD::Int globalInvocationID[3];
130 globalInvocationID[X] = SIMD::Int(Extract(localBase, X)) + localInvocationID[X];
131 globalInvocationID[Y] = SIMD::Int(Extract(localBase, Y)) + localInvocationID[Y];
132 globalInvocationID[Z] = SIMD::Int(Extract(localBase, Z)) + localInvocationID[Z];
133
134 routine->localInvocationIndex = localInvocationIndex;
135 routine->subgroupIndex = subgroupIndex;
136 routine->localInvocationID[X] = localInvocationID[X];
137 routine->localInvocationID[Y] = localInvocationID[Y];
138 routine->localInvocationID[Z] = localInvocationID[Z];
139 routine->globalInvocationID[X] = globalInvocationID[X];
140 routine->globalInvocationID[Y] = globalInvocationID[Y];
141 routine->globalInvocationID[Z] = globalInvocationID[Z];
142
Ben Claytonbc1c0672019-12-17 20:37:37 +0000143 routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens157ba262019-12-10 17:49:14 -0500144 ASSERT(builtin.SizeInComponents == 1);
145 value[builtin.FirstComponent] = As<SIMD::Float>(localInvocationIndex);
146 });
Ben Clayton13dcbec2019-05-08 08:43:55 +0100147
Ben Claytonbc1c0672019-12-17 20:37:37 +0000148 routine->setInputBuiltin(shader, spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens157ba262019-12-10 17:49:14 -0500149 ASSERT(builtin.SizeInComponents == 1);
150 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(subgroupIndex));
151 });
Ben Claytonf2be26a2019-03-08 12:02:05 +0000152
Ben Claytonbc1c0672019-12-17 20:37:37 +0000153 routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500154 for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
Ben Claytonf7b7b702019-08-27 10:23:29 +0100155 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500156 value[builtin.FirstComponent + component] =
Ben Claytonbc1c0672019-12-17 20:37:37 +0000157 As<SIMD::Float>(localInvocationID[component]);
Nicolas Capens157ba262019-12-10 17:49:14 -0500158 }
159 });
Ben Claytonf7b7b702019-08-27 10:23:29 +0100160
Ben Claytonbc1c0672019-12-17 20:37:37 +0000161 routine->setInputBuiltin(shader, spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500162 for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500163 {
Ben Clayton5beaef92019-12-03 12:23:35 +0000164 value[builtin.FirstComponent + component] =
165 As<SIMD::Float>(globalInvocationID[component]);
Nicolas Capens157ba262019-12-10 17:49:14 -0500166 }
167 });
168}
169
Ben Claytonbc1c0672019-12-17 20:37:37 +0000170void ComputeProgram::emit(SpirvRoutine *routine)
Nicolas Capens157ba262019-12-10 17:49:14 -0500171{
172 Pointer<Byte> data = Arg<0>();
173 Int workgroupX = Arg<1>();
174 Int workgroupY = Arg<2>();
175 Int workgroupZ = Arg<3>();
176 Pointer<Byte> workgroupMemory = Arg<4>();
177 Int firstSubgroup = Arg<5>();
178 Int subgroupCount = Arg<6>();
179
180 routine->descriptorSets = data + OFFSET(Data, descriptorSets);
181 routine->descriptorDynamicOffsets = data + OFFSET(Data, descriptorDynamicOffsets);
182 routine->pushConstants = data + OFFSET(Data, pushConstants);
183 routine->constants = *Pointer<Pointer<Byte>>(data + OFFSET(Data, constants));
184 routine->workgroupMemory = workgroupMemory;
185
186 Int invocationsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerWorkgroup));
187
Ben Claytonbc1c0672019-12-17 20:37:37 +0000188 Int workgroupID[3] = { workgroupX, workgroupY, workgroupZ };
Nicolas Capens157ba262019-12-10 17:49:14 -0500189 setWorkgroupBuiltins(data, routine, workgroupID);
190
191 For(Int i = 0, i < subgroupCount, i++)
192 {
193 auto subgroupIndex = firstSubgroup + i;
194
195 // TODO: Replace SIMD::Int(0, 1, 2, 3) with SIMD-width equivalent
196 auto localInvocationIndex = SIMD::Int(subgroupIndex * SIMD::Width) + SIMD::Int(0, 1, 2, 3);
197
198 // Disable lanes where (invocationIDs >= invocationsPerWorkgroup)
199 auto activeLaneMask = CmpLT(localInvocationIndex, SIMD::Int(invocationsPerWorkgroup));
200
201 setSubgroupBuiltins(data, routine, workgroupID, localInvocationIndex, subgroupIndex);
202
203 shader->emit(routine, activeLaneMask, activeLaneMask, descriptorSets);
204 }
205}
206
207void ComputeProgram::run(
Alexis Hetu4f438a52020-06-15 16:13:51 -0400208 vk::DescriptorSet::Array const &descriptorSetObjects,
Ben Claytonbc1c0672019-12-17 20:37:37 +0000209 vk::DescriptorSet::Bindings const &descriptorSets,
210 vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
211 PushConstantStorage const &pushConstants,
212 uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
213 uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
Nicolas Capens157ba262019-12-10 17:49:14 -0500214{
215 auto &modes = shader->getModes();
216
217 auto invocationsPerSubgroup = SIMD::Width;
218 auto invocationsPerWorkgroup = modes.WorkgroupSizeX * modes.WorkgroupSizeY * modes.WorkgroupSizeZ;
219 auto subgroupsPerWorkgroup = (invocationsPerWorkgroup + invocationsPerSubgroup - 1) / invocationsPerSubgroup;
220
221 Data data;
222 data.descriptorSets = descriptorSets;
223 data.descriptorDynamicOffsets = descriptorDynamicOffsets;
224 data.numWorkgroups[X] = groupCountX;
225 data.numWorkgroups[Y] = groupCountY;
226 data.numWorkgroups[Z] = groupCountZ;
227 data.numWorkgroups[3] = 0;
228 data.workgroupSize[X] = modes.WorkgroupSizeX;
229 data.workgroupSize[Y] = modes.WorkgroupSizeY;
230 data.workgroupSize[Z] = modes.WorkgroupSizeZ;
231 data.workgroupSize[3] = 0;
232 data.invocationsPerSubgroup = invocationsPerSubgroup;
233 data.invocationsPerWorkgroup = invocationsPerWorkgroup;
234 data.subgroupsPerWorkgroup = subgroupsPerWorkgroup;
235 data.pushConstants = pushConstants;
236 data.constants = &sw::constants;
237
238 marl::WaitGroup wg;
239 const uint32_t batchCount = 16;
240
241 auto groupCount = groupCountX * groupCountY * groupCountZ;
242
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500243 for(uint32_t batchID = 0; batchID < batchCount && batchID < groupCount; batchID++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500244 {
245 wg.add(1);
Ben Claytonbc1c0672019-12-17 20:37:37 +0000246 marl::schedule([=, &data] {
Nicolas Capens157ba262019-12-10 17:49:14 -0500247 defer(wg.done());
248 std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
249
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500250 for(uint32_t groupIndex = batchID; groupIndex < groupCount; groupIndex += batchCount)
Nicolas Capens157ba262019-12-10 17:49:14 -0500251 {
252 auto modulo = groupIndex;
253 auto groupOffsetZ = modulo / (groupCountX * groupCountY);
254 modulo -= groupOffsetZ * (groupCountX * groupCountY);
255 auto groupOffsetY = modulo / groupCountX;
256 modulo -= groupOffsetY * groupCountX;
257 auto groupOffsetX = modulo;
258
259 auto groupZ = baseGroupZ + groupOffsetZ;
260 auto groupY = baseGroupY + groupOffsetY;
261 auto groupX = baseGroupX + groupOffsetX;
262 MARL_SCOPED_EVENT("groupX: %d, groupY: %d, groupZ: %d", groupX, groupY, groupZ);
263
264 using Coroutine = std::unique_ptr<rr::Stream<SpirvShader::YieldResult>>;
265 std::queue<Coroutine> coroutines;
266
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500267 if(modes.ContainsControlBarriers)
Ben Claytonf7b7b702019-08-27 10:23:29 +0100268 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500269 // Make a function call per subgroup so each subgroup
270 // can yield, bringing all subgroups to the barrier
271 // together.
272 for(int subgroupIndex = 0; subgroupIndex < subgroupsPerWorkgroup; subgroupIndex++)
Ben Claytonecfeede2019-05-08 08:51:01 +0100273 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500274 auto coroutine = (*this)(&data, groupX, groupY, groupZ, workgroupMemory.data(), subgroupIndex, 1);
Ben Claytonecfeede2019-05-08 08:51:01 +0100275 coroutines.push(std::move(coroutine));
276 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500277 }
278 else
279 {
280 auto coroutine = (*this)(&data, groupX, groupY, groupZ, workgroupMemory.data(), 0, subgroupsPerWorkgroup);
281 coroutines.push(std::move(coroutine));
282 }
Ben Claytonecfeede2019-05-08 08:51:01 +0100283
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500284 while(coroutines.size() > 0)
Nicolas Capens157ba262019-12-10 17:49:14 -0500285 {
286 auto coroutine = std::move(coroutines.front());
287 coroutines.pop();
288
289 SpirvShader::YieldResult result;
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500290 if(coroutine->await(result))
Ben Claytonecfeede2019-05-08 08:51:01 +0100291 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500292 // TODO: Consider result (when the enum is more than 1 entry).
293 coroutines.push(std::move(coroutine));
Ben Claytonecfeede2019-05-08 08:51:01 +0100294 }
Ben Claytonf7b7b702019-08-27 10:23:29 +0100295 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500296 }
297 });
Ben Claytonf2be26a2019-03-08 12:02:05 +0000298 }
Ben Clayton13dcbec2019-05-08 08:43:55 +0100299
Nicolas Capens157ba262019-12-10 17:49:14 -0500300 wg.wait();
Alexis Hetu4f438a52020-06-15 16:13:51 -0400301
302 if(shader->containsImageWrite())
303 {
304 vk::DescriptorSet::ContentsChanged(descriptorSetObjects, pipelineLayout);
305 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500306}
307
308} // namespace sw