blob: 8ce207a73143b62884658f13f9c97928f87cde45 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "Surface.hpp"
16
17#include "Color.hpp"
18#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040019#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040020#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040021#include "Common/Half.hpp"
22#include "Common/Memory.hpp"
23#include "Common/CPUID.hpp"
24#include "Common/Resource.hpp"
25#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040026#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040027
Nicolas Capens47dc8672017-04-25 12:54:39 -040028#if defined(__i386__) || defined(__x86_64__)
29 #include <xmmintrin.h>
30 #include <emmintrin.h>
31#endif
John Bauman89401822014-05-06 15:04:28 -040032
33#undef min
34#undef max
35
36namespace sw
37{
38 extern bool quadLayoutEnabled;
39 extern bool complementaryDepthBuffer;
40 extern TranscendentalPrecision logPrecision;
41
42 unsigned int *Surface::palette = 0;
43 unsigned int Surface::paletteID = 0;
44
45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
46 {
Alexis Hetu1fa20672018-06-18 15:19:55 -040047 ASSERT((x >= -border) && (x < (width + border)));
48 ASSERT((y >= -border) && (y < (height + border)));
49 ASSERT((z >= 0) && (z < depth));
Nicolas Capens700a1a62018-06-15 11:41:28 -040050
Nicolas Capensbfa23b32017-12-11 10:06:37 -050051 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
John Bauman89401822014-05-06 15:04:28 -040052
Nicolas Capensbfa23b32017-12-11 10:06:37 -050053 for(int i = 0; i < samples; i++)
54 {
55 write(element, color);
56 element += sliceB;
57 }
John Bauman89401822014-05-06 15:04:28 -040058 }
59
60 void Surface::Buffer::write(int x, int y, const Color<float> &color)
61 {
Alexis Hetu1fa20672018-06-18 15:19:55 -040062 ASSERT((x >= -border) && (x < (width + border)));
63 ASSERT((y >= -border) && (y < (height + border)));
Nicolas Capens700a1a62018-06-15 11:41:28 -040064
Nicolas Capensbfa23b32017-12-11 10:06:37 -050065 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB;
John Bauman89401822014-05-06 15:04:28 -040066
Nicolas Capensbfa23b32017-12-11 10:06:37 -050067 for(int i = 0; i < samples; i++)
68 {
69 write(element, color);
70 element += sliceB;
71 }
John Bauman89401822014-05-06 15:04:28 -040072 }
73
74 inline void Surface::Buffer::write(void *element, const Color<float> &color)
75 {
Nicolas Capens1efac522017-12-01 16:59:38 -050076 float r = color.r;
77 float g = color.g;
78 float b = color.b;
79 float a = color.a;
80
81 if(isSRGBformat(format))
82 {
83 r = linearToSRGB(r);
84 g = linearToSRGB(g);
85 b = linearToSRGB(b);
86 }
87
John Bauman89401822014-05-06 15:04:28 -040088 switch(format)
89 {
90 case FORMAT_A8:
Nicolas Capens1efac522017-12-01 16:59:38 -050091 *(unsigned char*)element = unorm<8>(a);
John Bauman89401822014-05-06 15:04:28 -040092 break;
Nicolas Capens975adb72017-12-19 15:34:20 -050093 case FORMAT_R8_SNORM:
Nicolas Capens1efac522017-12-01 16:59:38 -050094 *(char*)element = snorm<8>(r);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040095 break;
John Bauman89401822014-05-06 15:04:28 -040096 case FORMAT_R8:
Nicolas Capens1efac522017-12-01 16:59:38 -050097 *(unsigned char*)element = unorm<8>(r);
John Bauman89401822014-05-06 15:04:28 -040098 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040099 case FORMAT_R8I:
Nicolas Capens1efac522017-12-01 16:59:38 -0500100 *(char*)element = scast<8>(r);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400101 break;
102 case FORMAT_R8UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500103 *(unsigned char*)element = ucast<8>(r);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400104 break;
105 case FORMAT_R16I:
Nicolas Capens1efac522017-12-01 16:59:38 -0500106 *(short*)element = scast<16>(r);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400107 break;
108 case FORMAT_R16UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500109 *(unsigned short*)element = ucast<16>(r);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400110 break;
111 case FORMAT_R32I:
Nicolas Capens1efac522017-12-01 16:59:38 -0500112 *(int*)element = static_cast<int>(r);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400113 break;
114 case FORMAT_R32UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500115 *(unsigned int*)element = static_cast<unsigned int>(r);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400116 break;
John Bauman89401822014-05-06 15:04:28 -0400117 case FORMAT_R3G3B2:
Nicolas Capens1efac522017-12-01 16:59:38 -0500118 *(unsigned char*)element = (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400119 break;
120 case FORMAT_A8R3G3B2:
Nicolas Capens1efac522017-12-01 16:59:38 -0500121 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400122 break;
123 case FORMAT_X4R4G4B4:
Nicolas Capens1efac522017-12-01 16:59:38 -0500124 *(unsigned short*)element = 0xF000 | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400125 break;
126 case FORMAT_A4R4G4B4:
Nicolas Capens1efac522017-12-01 16:59:38 -0500127 *(unsigned short*)element = (unorm<4>(a) << 12) | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400128 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400129 case FORMAT_R4G4B4A4:
Nicolas Capens1efac522017-12-01 16:59:38 -0500130 *(unsigned short*)element = (unorm<4>(r) << 12) | (unorm<4>(g) << 8) | (unorm<4>(b) << 4) | (unorm<4>(a) << 0);
Nicolas Capens80594422015-06-09 16:42:56 -0400131 break;
John Bauman89401822014-05-06 15:04:28 -0400132 case FORMAT_R5G6B5:
Nicolas Capens1efac522017-12-01 16:59:38 -0500133 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<6>(g) << 5) | (unorm<5>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400134 break;
135 case FORMAT_A1R5G5B5:
Nicolas Capens1efac522017-12-01 16:59:38 -0500136 *(unsigned short*)element = (unorm<1>(a) << 15) | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400137 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400138 case FORMAT_R5G5B5A1:
Nicolas Capens1efac522017-12-01 16:59:38 -0500139 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<5>(g) << 6) | (unorm<5>(b) << 1) | (unorm<5>(a) << 0);
Nicolas Capens80594422015-06-09 16:42:56 -0400140 break;
John Bauman89401822014-05-06 15:04:28 -0400141 case FORMAT_X1R5G5B5:
Nicolas Capens1efac522017-12-01 16:59:38 -0500142 *(unsigned short*)element = 0x8000 | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400143 break;
144 case FORMAT_A8R8G8B8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500145 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400146 break;
147 case FORMAT_X8R8G8B8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500148 *(unsigned int*)element = 0xFF000000 | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400149 break;
Nicolas Capens975adb72017-12-19 15:34:20 -0500150 case FORMAT_A8B8G8R8_SNORM:
Nicolas Capens1efac522017-12-01 16:59:38 -0500151 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(a)) << 24) |
152 (static_cast<unsigned int>(snorm<8>(b)) << 16) |
153 (static_cast<unsigned int>(snorm<8>(g)) << 8) |
154 (static_cast<unsigned int>(snorm<8>(r)) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400155 break;
John Bauman89401822014-05-06 15:04:28 -0400156 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400157 case FORMAT_SRGB8_A8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500158 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400159 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400160 case FORMAT_A8B8G8R8I:
Nicolas Capens1efac522017-12-01 16:59:38 -0500161 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(a)) << 24) |
162 (static_cast<unsigned int>(scast<8>(b)) << 16) |
163 (static_cast<unsigned int>(scast<8>(g)) << 8) |
164 (static_cast<unsigned int>(scast<8>(r)) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400165 break;
166 case FORMAT_A8B8G8R8UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500167 *(unsigned int*)element = (ucast<8>(a) << 24) | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400168 break;
Nicolas Capens975adb72017-12-19 15:34:20 -0500169 case FORMAT_X8B8G8R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400170 *(unsigned int*)element = 0x7F000000 |
Nicolas Capens1efac522017-12-01 16:59:38 -0500171 (static_cast<unsigned int>(snorm<8>(b)) << 16) |
172 (static_cast<unsigned int>(snorm<8>(g)) << 8) |
173 (static_cast<unsigned int>(snorm<8>(r)) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400174 break;
John Bauman89401822014-05-06 15:04:28 -0400175 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400176 case FORMAT_SRGB8_X8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500177 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400178 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400179 case FORMAT_X8B8G8R8I:
180 *(unsigned int*)element = 0x7F000000 |
Nicolas Capens1efac522017-12-01 16:59:38 -0500181 (static_cast<unsigned int>(scast<8>(b)) << 16) |
182 (static_cast<unsigned int>(scast<8>(g)) << 8) |
183 (static_cast<unsigned int>(scast<8>(r)) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400184 case FORMAT_X8B8G8R8UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500185 *(unsigned int*)element = 0xFF000000 | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400186 break;
John Bauman89401822014-05-06 15:04:28 -0400187 case FORMAT_A2R10G10B10:
Nicolas Capens1efac522017-12-01 16:59:38 -0500188 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(r) << 20) | (unorm<10>(g) << 10) | (unorm<10>(b) << 0);
John Bauman89401822014-05-06 15:04:28 -0400189 break;
190 case FORMAT_A2B10G10R10:
Nicolas Capens5555af42017-12-14 13:14:03 -0500191 case FORMAT_A2B10G10R10UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500192 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(b) << 20) | (unorm<10>(g) << 10) | (unorm<10>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400193 break;
Nicolas Capens975adb72017-12-19 15:34:20 -0500194 case FORMAT_G8R8_SNORM:
Nicolas Capens1efac522017-12-01 16:59:38 -0500195 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(g)) << 8) |
196 (static_cast<unsigned short>(snorm<8>(r)) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400197 break;
John Bauman89401822014-05-06 15:04:28 -0400198 case FORMAT_G8R8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500199 *(unsigned short*)element = (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400200 break;
201 case FORMAT_G8R8I:
Nicolas Capens1efac522017-12-01 16:59:38 -0500202 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(g)) << 8) |
203 (static_cast<unsigned short>(scast<8>(r)) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400204 break;
205 case FORMAT_G8R8UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500206 *(unsigned short*)element = (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400207 break;
208 case FORMAT_G16R16:
Nicolas Capens1efac522017-12-01 16:59:38 -0500209 *(unsigned int*)element = (unorm<16>(g) << 16) | (unorm<16>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400210 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400211 case FORMAT_G16R16I:
Nicolas Capens1efac522017-12-01 16:59:38 -0500212 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(g)) << 16) |
213 (static_cast<unsigned int>(scast<16>(r)) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400214 break;
215 case FORMAT_G16R16UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500216 *(unsigned int*)element = (ucast<16>(g) << 16) | (ucast<16>(r) << 0);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400217 break;
218 case FORMAT_G32R32I:
219 case FORMAT_G32R32UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500220 ((unsigned int*)element)[0] = static_cast<unsigned int>(r);
221 ((unsigned int*)element)[1] = static_cast<unsigned int>(g);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400222 break;
John Bauman89401822014-05-06 15:04:28 -0400223 case FORMAT_A16B16G16R16:
Nicolas Capens1efac522017-12-01 16:59:38 -0500224 ((unsigned short*)element)[0] = unorm<16>(r);
225 ((unsigned short*)element)[1] = unorm<16>(g);
226 ((unsigned short*)element)[2] = unorm<16>(b);
227 ((unsigned short*)element)[3] = unorm<16>(a);
John Bauman89401822014-05-06 15:04:28 -0400228 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400229 case FORMAT_A16B16G16R16I:
Nicolas Capens1efac522017-12-01 16:59:38 -0500230 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r));
231 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g));
232 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b));
233 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(a));
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400234 break;
235 case FORMAT_A16B16G16R16UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500236 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r));
237 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g));
238 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b));
239 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(a));
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400240 break;
241 case FORMAT_X16B16G16R16I:
Nicolas Capens1efac522017-12-01 16:59:38 -0500242 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r));
243 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g));
244 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b));
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400245 break;
246 case FORMAT_X16B16G16R16UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500247 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r));
248 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g));
249 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b));
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400250 break;
251 case FORMAT_A32B32G32R32I:
252 case FORMAT_A32B32G32R32UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500253 ((unsigned int*)element)[0] = static_cast<unsigned int>(r);
254 ((unsigned int*)element)[1] = static_cast<unsigned int>(g);
255 ((unsigned int*)element)[2] = static_cast<unsigned int>(b);
256 ((unsigned int*)element)[3] = static_cast<unsigned int>(a);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400257 break;
258 case FORMAT_X32B32G32R32I:
259 case FORMAT_X32B32G32R32UI:
Nicolas Capens1efac522017-12-01 16:59:38 -0500260 ((unsigned int*)element)[0] = static_cast<unsigned int>(r);
261 ((unsigned int*)element)[1] = static_cast<unsigned int>(g);
262 ((unsigned int*)element)[2] = static_cast<unsigned int>(b);
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400263 break;
John Bauman89401822014-05-06 15:04:28 -0400264 case FORMAT_V8U8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500265 *(unsigned short*)element = (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400266 break;
267 case FORMAT_L6V5U5:
Nicolas Capens1efac522017-12-01 16:59:38 -0500268 *(unsigned short*)element = (unorm<6>(b) << 10) | (snorm<5>(g) << 5) | (snorm<5>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400269 break;
270 case FORMAT_Q8W8V8U8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500271 *(unsigned int*)element = (snorm<8>(a) << 24) | (snorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400272 break;
273 case FORMAT_X8L8V8U8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500274 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400275 break;
276 case FORMAT_V16U16:
Nicolas Capens1efac522017-12-01 16:59:38 -0500277 *(unsigned int*)element = (snorm<16>(g) << 16) | (snorm<16>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400278 break;
279 case FORMAT_A2W10V10U10:
Nicolas Capens1efac522017-12-01 16:59:38 -0500280 *(unsigned int*)element = (unorm<2>(a) << 30) | (snorm<10>(b) << 20) | (snorm<10>(g) << 10) | (snorm<10>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400281 break;
282 case FORMAT_A16W16V16U16:
Nicolas Capens1efac522017-12-01 16:59:38 -0500283 ((unsigned short*)element)[0] = snorm<16>(r);
284 ((unsigned short*)element)[1] = snorm<16>(g);
285 ((unsigned short*)element)[2] = snorm<16>(b);
286 ((unsigned short*)element)[3] = unorm<16>(a);
John Bauman89401822014-05-06 15:04:28 -0400287 break;
288 case FORMAT_Q16W16V16U16:
Nicolas Capens1efac522017-12-01 16:59:38 -0500289 ((unsigned short*)element)[0] = snorm<16>(r);
290 ((unsigned short*)element)[1] = snorm<16>(g);
291 ((unsigned short*)element)[2] = snorm<16>(b);
292 ((unsigned short*)element)[3] = snorm<16>(a);
John Bauman89401822014-05-06 15:04:28 -0400293 break;
294 case FORMAT_R8G8B8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500295 ((unsigned char*)element)[0] = unorm<8>(b);
296 ((unsigned char*)element)[1] = unorm<8>(g);
297 ((unsigned char*)element)[2] = unorm<8>(r);
John Bauman89401822014-05-06 15:04:28 -0400298 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400299 case FORMAT_B8G8R8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500300 ((unsigned char*)element)[0] = unorm<8>(r);
301 ((unsigned char*)element)[1] = unorm<8>(g);
302 ((unsigned char*)element)[2] = unorm<8>(b);
Nicolas Capens80594422015-06-09 16:42:56 -0400303 break;
John Bauman89401822014-05-06 15:04:28 -0400304 case FORMAT_R16F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500305 *(half*)element = (half)r;
John Bauman89401822014-05-06 15:04:28 -0400306 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400307 case FORMAT_A16F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500308 *(half*)element = (half)a;
Nicolas Capens80594422015-06-09 16:42:56 -0400309 break;
John Bauman89401822014-05-06 15:04:28 -0400310 case FORMAT_G16R16F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500311 ((half*)element)[0] = (half)r;
312 ((half*)element)[1] = (half)g;
John Bauman89401822014-05-06 15:04:28 -0400313 break;
Nicolas Capens67fdd832017-12-21 11:20:54 -0500314 case FORMAT_X16B16G16R16F_UNSIGNED:
315 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f);
316 // Fall through to FORMAT_X16B16G16R16F.
Nicolas Capensa6bc61d2017-12-20 11:07:45 -0500317 case FORMAT_X16B16G16R16F:
318 ((half*)element)[3] = 1.0f;
Nicolas Capens67fdd832017-12-21 11:20:54 -0500319 // Fall through to FORMAT_B16G16R16F.
Nicolas Capens80594422015-06-09 16:42:56 -0400320 case FORMAT_B16G16R16F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500321 ((half*)element)[0] = (half)r;
322 ((half*)element)[1] = (half)g;
323 ((half*)element)[2] = (half)b;
Nicolas Capens80594422015-06-09 16:42:56 -0400324 break;
John Bauman89401822014-05-06 15:04:28 -0400325 case FORMAT_A16B16G16R16F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500326 ((half*)element)[0] = (half)r;
327 ((half*)element)[1] = (half)g;
328 ((half*)element)[2] = (half)b;
329 ((half*)element)[3] = (half)a;
John Bauman89401822014-05-06 15:04:28 -0400330 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400331 case FORMAT_A32F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500332 *(float*)element = a;
Nicolas Capens80594422015-06-09 16:42:56 -0400333 break;
John Bauman89401822014-05-06 15:04:28 -0400334 case FORMAT_R32F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500335 *(float*)element = r;
John Bauman89401822014-05-06 15:04:28 -0400336 break;
337 case FORMAT_G32R32F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500338 ((float*)element)[0] = r;
339 ((float*)element)[1] = g;
John Bauman89401822014-05-06 15:04:28 -0400340 break;
Nicolas Capens67fdd832017-12-21 11:20:54 -0500341 case FORMAT_X32B32G32R32F_UNSIGNED:
342 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f);
343 // Fall through to FORMAT_X32B32G32R32F.
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400344 case FORMAT_X32B32G32R32F:
345 ((float*)element)[3] = 1.0f;
Nicolas Capens67fdd832017-12-21 11:20:54 -0500346 // Fall through to FORMAT_B32G32R32F.
Nicolas Capens80594422015-06-09 16:42:56 -0400347 case FORMAT_B32G32R32F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500348 ((float*)element)[0] = r;
349 ((float*)element)[1] = g;
350 ((float*)element)[2] = b;
Nicolas Capens80594422015-06-09 16:42:56 -0400351 break;
John Bauman89401822014-05-06 15:04:28 -0400352 case FORMAT_A32B32G32R32F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500353 ((float*)element)[0] = r;
354 ((float*)element)[1] = g;
355 ((float*)element)[2] = b;
356 ((float*)element)[3] = a;
John Bauman89401822014-05-06 15:04:28 -0400357 break;
358 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500359 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -0400360 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400361 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500362 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -0400363 case FORMAT_D32FS8_SHADOW:
Nicolas Capens1efac522017-12-01 16:59:38 -0500364 *((float*)element) = r;
John Bauman89401822014-05-06 15:04:28 -0400365 break;
366 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500367 case FORMAT_D32FS8_COMPLEMENTARY:
Nicolas Capens1efac522017-12-01 16:59:38 -0500368 *((float*)element) = 1 - r;
John Bauman89401822014-05-06 15:04:28 -0400369 break;
370 case FORMAT_S8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500371 *((unsigned char*)element) = unorm<8>(r);
John Bauman89401822014-05-06 15:04:28 -0400372 break;
373 case FORMAT_L8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500374 *(unsigned char*)element = unorm<8>(r);
John Bauman89401822014-05-06 15:04:28 -0400375 break;
376 case FORMAT_A4L4:
Nicolas Capens1efac522017-12-01 16:59:38 -0500377 *(unsigned char*)element = (unorm<4>(a) << 4) | (unorm<4>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400378 break;
379 case FORMAT_L16:
Nicolas Capens1efac522017-12-01 16:59:38 -0500380 *(unsigned short*)element = unorm<16>(r);
John Bauman89401822014-05-06 15:04:28 -0400381 break;
382 case FORMAT_A8L8:
Nicolas Capens1efac522017-12-01 16:59:38 -0500383 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<8>(r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400384 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400385 case FORMAT_L16F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500386 *(half*)element = (half)r;
Nicolas Capens80594422015-06-09 16:42:56 -0400387 break;
388 case FORMAT_A16L16F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500389 ((half*)element)[0] = (half)r;
390 ((half*)element)[1] = (half)a;
Nicolas Capens80594422015-06-09 16:42:56 -0400391 break;
392 case FORMAT_L32F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500393 *(float*)element = r;
Nicolas Capens80594422015-06-09 16:42:56 -0400394 break;
395 case FORMAT_A32L32F:
Nicolas Capens1efac522017-12-01 16:59:38 -0500396 ((float*)element)[0] = r;
397 ((float*)element)[1] = a;
Nicolas Capens80594422015-06-09 16:42:56 -0400398 break;
John Bauman89401822014-05-06 15:04:28 -0400399 default:
400 ASSERT(false);
401 }
402 }
403
404 Color<float> Surface::Buffer::read(int x, int y, int z) const
405 {
Alexis Hetu1fa20672018-06-18 15:19:55 -0400406 ASSERT((x >= -border) && (x < (width + border)));
407 ASSERT((y >= -border) && (y < (height + border)));
408 ASSERT((z >= 0) && (z < depth));
Nicolas Capens700a1a62018-06-15 11:41:28 -0400409
Nicolas Capensbfa23b32017-12-11 10:06:37 -0500410 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
John Bauman89401822014-05-06 15:04:28 -0400411
412 return read(element);
413 }
414
415 Color<float> Surface::Buffer::read(int x, int y) const
416 {
Alexis Hetu1fa20672018-06-18 15:19:55 -0400417 ASSERT((x >= -border) && (x < (width + border)));
418 ASSERT((y >= -border) && (y < (height + border)));
Nicolas Capens700a1a62018-06-15 11:41:28 -0400419
Alexis Hetu9c6d5222016-11-29 17:02:14 -0500420 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
John Bauman89401822014-05-06 15:04:28 -0400421
422 return read(element);
423 }
424
425 inline Color<float> Surface::Buffer::read(void *element) const
426 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400427 float r = 0.0f;
428 float g = 0.0f;
429 float b = 0.0f;
430 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400431
432 switch(format)
433 {
434 case FORMAT_P8:
435 {
436 ASSERT(palette);
437
438 unsigned int abgr = palette[*(unsigned char*)element];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400439
John Bauman89401822014-05-06 15:04:28 -0400440 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
441 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
442 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
443 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
444 }
445 break;
446 case FORMAT_A8P8:
447 {
448 ASSERT(palette);
449
450 unsigned int bgr = palette[((unsigned char*)element)[0]];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400451
John Bauman89401822014-05-06 15:04:28 -0400452 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
453 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
454 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
455 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
456 }
457 break;
458 case FORMAT_A8:
459 r = 0;
460 g = 0;
461 b = 0;
462 a = *(unsigned char*)element * (1.0f / 0xFF);
463 break;
Nicolas Capens975adb72017-12-19 15:34:20 -0500464 case FORMAT_R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400465 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
466 break;
John Bauman89401822014-05-06 15:04:28 -0400467 case FORMAT_R8:
468 r = *(unsigned char*)element * (1.0f / 0xFF);
469 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400470 case FORMAT_R8I:
471 r = *(signed char*)element;
472 break;
473 case FORMAT_R8UI:
474 r = *(unsigned char*)element;
475 break;
John Bauman89401822014-05-06 15:04:28 -0400476 case FORMAT_R3G3B2:
477 {
478 unsigned char rgb = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400479
John Bauman89401822014-05-06 15:04:28 -0400480 r = (rgb & 0xE0) * (1.0f / 0xE0);
481 g = (rgb & 0x1C) * (1.0f / 0x1C);
482 b = (rgb & 0x03) * (1.0f / 0x03);
483 }
484 break;
485 case FORMAT_A8R3G3B2:
486 {
487 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400488
John Bauman89401822014-05-06 15:04:28 -0400489 a = (argb & 0xFF00) * (1.0f / 0xFF00);
490 r = (argb & 0x00E0) * (1.0f / 0x00E0);
491 g = (argb & 0x001C) * (1.0f / 0x001C);
492 b = (argb & 0x0003) * (1.0f / 0x0003);
493 }
494 break;
495 case FORMAT_X4R4G4B4:
496 {
497 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400498
John Bauman89401822014-05-06 15:04:28 -0400499 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
500 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
501 b = (rgb & 0x000F) * (1.0f / 0x000F);
502 }
503 break;
504 case FORMAT_A4R4G4B4:
505 {
506 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400507
John Bauman89401822014-05-06 15:04:28 -0400508 a = (argb & 0xF000) * (1.0f / 0xF000);
509 r = (argb & 0x0F00) * (1.0f / 0x0F00);
510 g = (argb & 0x00F0) * (1.0f / 0x00F0);
511 b = (argb & 0x000F) * (1.0f / 0x000F);
512 }
513 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400514 case FORMAT_R4G4B4A4:
515 {
516 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400517
Nicolas Capens80594422015-06-09 16:42:56 -0400518 r = (rgba & 0xF000) * (1.0f / 0xF000);
519 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
520 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
521 a = (rgba & 0x000F) * (1.0f / 0x000F);
522 }
523 break;
John Bauman89401822014-05-06 15:04:28 -0400524 case FORMAT_R5G6B5:
525 {
526 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400527
John Bauman89401822014-05-06 15:04:28 -0400528 r = (rgb & 0xF800) * (1.0f / 0xF800);
529 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
530 b = (rgb & 0x001F) * (1.0f / 0x001F);
531 }
532 break;
533 case FORMAT_A1R5G5B5:
534 {
535 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400536
John Bauman89401822014-05-06 15:04:28 -0400537 a = (argb & 0x8000) * (1.0f / 0x8000);
538 r = (argb & 0x7C00) * (1.0f / 0x7C00);
539 g = (argb & 0x03E0) * (1.0f / 0x03E0);
540 b = (argb & 0x001F) * (1.0f / 0x001F);
541 }
542 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400543 case FORMAT_R5G5B5A1:
544 {
545 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400546
Nicolas Capens80594422015-06-09 16:42:56 -0400547 r = (rgba & 0xF800) * (1.0f / 0xF800);
548 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
549 b = (rgba & 0x003E) * (1.0f / 0x003E);
550 a = (rgba & 0x0001) * (1.0f / 0x0001);
551 }
552 break;
John Bauman89401822014-05-06 15:04:28 -0400553 case FORMAT_X1R5G5B5:
554 {
555 unsigned short xrgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400556
John Bauman89401822014-05-06 15:04:28 -0400557 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
558 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
559 b = (xrgb & 0x001F) * (1.0f / 0x001F);
560 }
561 break;
562 case FORMAT_A8R8G8B8:
563 {
564 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400565
John Bauman89401822014-05-06 15:04:28 -0400566 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
567 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
568 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
569 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
570 }
571 break;
572 case FORMAT_X8R8G8B8:
573 {
574 unsigned int xrgb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400575
John Bauman89401822014-05-06 15:04:28 -0400576 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
577 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
578 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
579 }
580 break;
Nicolas Capens975adb72017-12-19 15:34:20 -0500581 case FORMAT_A8B8G8R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400582 {
583 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400584
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400585 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
586 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
587 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
588 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
589 }
590 break;
John Bauman89401822014-05-06 15:04:28 -0400591 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400592 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400593 {
594 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400595
John Bauman89401822014-05-06 15:04:28 -0400596 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
597 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
598 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
599 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
600 }
601 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400602 case FORMAT_A8B8G8R8I:
603 {
604 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400605
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400606 r = abgr[0];
607 g = abgr[1];
608 b = abgr[2];
609 a = abgr[3];
610 }
611 break;
612 case FORMAT_A8B8G8R8UI:
613 {
614 unsigned char* abgr = (unsigned char*)element;
615
616 r = abgr[0];
617 g = abgr[1];
618 b = abgr[2];
619 a = abgr[3];
620 }
621 break;
Nicolas Capens975adb72017-12-19 15:34:20 -0500622 case FORMAT_X8B8G8R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400623 {
624 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400625
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400626 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
627 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
628 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
629 }
630 break;
John Bauman89401822014-05-06 15:04:28 -0400631 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400632 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400633 {
634 unsigned int xbgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400635
John Bauman89401822014-05-06 15:04:28 -0400636 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
637 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
638 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
639 }
640 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400641 case FORMAT_X8B8G8R8I:
642 {
643 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400644
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400645 r = bgr[0];
646 g = bgr[1];
647 b = bgr[2];
648 }
649 break;
650 case FORMAT_X8B8G8R8UI:
651 {
652 unsigned char* bgr = (unsigned char*)element;
653
654 r = bgr[0];
655 g = bgr[1];
656 b = bgr[2];
657 }
658 break;
Nicolas Capens975adb72017-12-19 15:34:20 -0500659 case FORMAT_G8R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400660 {
661 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400662
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400663 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
664 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
665 }
666 break;
John Bauman89401822014-05-06 15:04:28 -0400667 case FORMAT_G8R8:
668 {
669 unsigned short gr = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400670
John Bauman89401822014-05-06 15:04:28 -0400671 g = (gr & 0xFF00) * (1.0f / 0xFF00);
672 r = (gr & 0x00FF) * (1.0f / 0x00FF);
673 }
674 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400675 case FORMAT_G8R8I:
676 {
677 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400678
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400679 r = gr[0];
680 g = gr[1];
681 }
682 break;
683 case FORMAT_G8R8UI:
684 {
685 unsigned char* gr = (unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400686
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400687 r = gr[0];
688 g = gr[1];
689 }
690 break;
691 case FORMAT_R16I:
692 r = *((short*)element);
693 break;
694 case FORMAT_R16UI:
695 r = *((unsigned short*)element);
696 break;
697 case FORMAT_G16R16I:
698 {
699 short* gr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400700
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400701 r = gr[0];
702 g = gr[1];
703 }
704 break;
John Bauman89401822014-05-06 15:04:28 -0400705 case FORMAT_G16R16:
706 {
707 unsigned int gr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400708
John Bauman89401822014-05-06 15:04:28 -0400709 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
710 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
711 }
712 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400713 case FORMAT_G16R16UI:
714 {
715 unsigned short* gr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400716
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400717 r = gr[0];
718 g = gr[1];
719 }
720 break;
John Bauman89401822014-05-06 15:04:28 -0400721 case FORMAT_A2R10G10B10:
722 {
723 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400724
John Bauman89401822014-05-06 15:04:28 -0400725 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
726 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
727 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
728 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
729 }
730 break;
731 case FORMAT_A2B10G10R10:
732 {
733 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400734
John Bauman89401822014-05-06 15:04:28 -0400735 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
736 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
737 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
738 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
739 }
740 break;
Nicolas Capens5555af42017-12-14 13:14:03 -0500741 case FORMAT_A2B10G10R10UI:
742 {
743 unsigned int abgr = *(unsigned int*)element;
744
745 a = static_cast<float>((abgr & 0xC0000000) >> 30);
746 b = static_cast<float>((abgr & 0x3FF00000) >> 20);
747 g = static_cast<float>((abgr & 0x000FFC00) >> 10);
748 r = static_cast<float>(abgr & 0x000003FF);
749 }
750 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400751 case FORMAT_A16B16G16R16I:
752 {
753 short* abgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400754
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400755 r = abgr[0];
756 g = abgr[1];
757 b = abgr[2];
758 a = abgr[3];
759 }
760 break;
John Bauman89401822014-05-06 15:04:28 -0400761 case FORMAT_A16B16G16R16:
762 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
763 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
764 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
765 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
766 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400767 case FORMAT_A16B16G16R16UI:
768 {
769 unsigned short* abgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400770
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400771 r = abgr[0];
772 g = abgr[1];
773 b = abgr[2];
774 a = abgr[3];
775 }
776 break;
777 case FORMAT_X16B16G16R16I:
778 {
779 short* bgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400780
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400781 r = bgr[0];
782 g = bgr[1];
783 b = bgr[2];
784 }
785 break;
786 case FORMAT_X16B16G16R16UI:
787 {
788 unsigned short* bgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400789
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400790 r = bgr[0];
791 g = bgr[1];
792 b = bgr[2];
793 }
794 break;
795 case FORMAT_A32B32G32R32I:
796 {
797 int* abgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400798
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400799 r = static_cast<float>(abgr[0]);
800 g = static_cast<float>(abgr[1]);
801 b = static_cast<float>(abgr[2]);
802 a = static_cast<float>(abgr[3]);
803 }
804 break;
805 case FORMAT_A32B32G32R32UI:
806 {
807 unsigned int* abgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400808
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400809 r = static_cast<float>(abgr[0]);
810 g = static_cast<float>(abgr[1]);
811 b = static_cast<float>(abgr[2]);
812 a = static_cast<float>(abgr[3]);
813 }
814 break;
815 case FORMAT_X32B32G32R32I:
816 {
817 int* bgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400818
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400819 r = static_cast<float>(bgr[0]);
820 g = static_cast<float>(bgr[1]);
821 b = static_cast<float>(bgr[2]);
822 }
823 break;
824 case FORMAT_X32B32G32R32UI:
825 {
826 unsigned int* bgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400827
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400828 r = static_cast<float>(bgr[0]);
829 g = static_cast<float>(bgr[1]);
830 b = static_cast<float>(bgr[2]);
831 }
832 break;
833 case FORMAT_G32R32I:
834 {
835 int* gr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400836
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400837 r = static_cast<float>(gr[0]);
838 g = static_cast<float>(gr[1]);
839 }
840 break;
841 case FORMAT_G32R32UI:
842 {
843 unsigned int* gr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400844
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400845 r = static_cast<float>(gr[0]);
846 g = static_cast<float>(gr[1]);
847 }
848 break;
849 case FORMAT_R32I:
850 r = static_cast<float>(*((int*)element));
851 break;
852 case FORMAT_R32UI:
853 r = static_cast<float>(*((unsigned int*)element));
854 break;
John Bauman89401822014-05-06 15:04:28 -0400855 case FORMAT_V8U8:
856 {
857 unsigned short vu = *(unsigned short*)element;
858
859 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
860 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
861 }
862 break;
863 case FORMAT_L6V5U5:
864 {
865 unsigned short lvu = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400866
John Bauman89401822014-05-06 15:04:28 -0400867 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
868 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
869 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
870 }
871 break;
872 case FORMAT_Q8W8V8U8:
873 {
874 unsigned int qwvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400875
John Bauman89401822014-05-06 15:04:28 -0400876 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
877 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
878 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
879 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
880 }
881 break;
882 case FORMAT_X8L8V8U8:
883 {
884 unsigned int xlvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400885
John Bauman89401822014-05-06 15:04:28 -0400886 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
887 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
888 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
889 }
890 break;
891 case FORMAT_R8G8B8:
892 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
893 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
894 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
895 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400896 case FORMAT_B8G8R8:
897 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
898 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
899 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
900 break;
John Bauman89401822014-05-06 15:04:28 -0400901 case FORMAT_V16U16:
902 {
903 unsigned int vu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400904
John Bauman89401822014-05-06 15:04:28 -0400905 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
906 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
907 }
908 break;
909 case FORMAT_A2W10V10U10:
910 {
911 unsigned int awvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400912
John Bauman89401822014-05-06 15:04:28 -0400913 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
914 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
915 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
916 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
917 }
918 break;
919 case FORMAT_A16W16V16U16:
920 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
921 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
922 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
923 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
924 break;
925 case FORMAT_Q16W16V16U16:
926 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
927 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
928 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
929 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
930 break;
931 case FORMAT_L8:
932 r =
933 g =
934 b = *(unsigned char*)element * (1.0f / 0xFF);
935 break;
936 case FORMAT_A4L4:
937 {
938 unsigned char al = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400939
John Bauman89401822014-05-06 15:04:28 -0400940 r =
941 g =
942 b = (al & 0x0F) * (1.0f / 0x0F);
943 a = (al & 0xF0) * (1.0f / 0xF0);
944 }
945 break;
946 case FORMAT_L16:
947 r =
948 g =
949 b = *(unsigned short*)element * (1.0f / 0xFFFF);
950 break;
951 case FORMAT_A8L8:
952 r =
953 g =
954 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
955 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
956 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400957 case FORMAT_L16F:
958 r =
959 g =
960 b = *(half*)element;
961 break;
962 case FORMAT_A16L16F:
963 r =
964 g =
965 b = ((half*)element)[0];
966 a = ((half*)element)[1];
967 break;
968 case FORMAT_L32F:
969 r =
970 g =
971 b = *(float*)element;
972 break;
973 case FORMAT_A32L32F:
974 r =
975 g =
976 b = ((float*)element)[0];
977 a = ((float*)element)[1];
978 break;
979 case FORMAT_A16F:
980 a = *(half*)element;
981 break;
John Bauman89401822014-05-06 15:04:28 -0400982 case FORMAT_R16F:
983 r = *(half*)element;
984 break;
985 case FORMAT_G16R16F:
986 r = ((half*)element)[0];
987 g = ((half*)element)[1];
988 break;
Nicolas Capensa6bc61d2017-12-20 11:07:45 -0500989 case FORMAT_X16B16G16R16F:
Nicolas Capens67fdd832017-12-21 11:20:54 -0500990 case FORMAT_X16B16G16R16F_UNSIGNED:
Nicolas Capens80594422015-06-09 16:42:56 -0400991 case FORMAT_B16G16R16F:
992 r = ((half*)element)[0];
993 g = ((half*)element)[1];
994 b = ((half*)element)[2];
995 break;
John Bauman89401822014-05-06 15:04:28 -0400996 case FORMAT_A16B16G16R16F:
997 r = ((half*)element)[0];
998 g = ((half*)element)[1];
999 b = ((half*)element)[2];
1000 a = ((half*)element)[3];
1001 break;
Nicolas Capens80594422015-06-09 16:42:56 -04001002 case FORMAT_A32F:
1003 a = *(float*)element;
1004 break;
John Bauman89401822014-05-06 15:04:28 -04001005 case FORMAT_R32F:
1006 r = *(float*)element;
1007 break;
1008 case FORMAT_G32R32F:
1009 r = ((float*)element)[0];
1010 g = ((float*)element)[1];
1011 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001012 case FORMAT_X32B32G32R32F:
Nicolas Capens67fdd832017-12-21 11:20:54 -05001013 case FORMAT_X32B32G32R32F_UNSIGNED:
Nicolas Capens80594422015-06-09 16:42:56 -04001014 case FORMAT_B32G32R32F:
1015 r = ((float*)element)[0];
1016 g = ((float*)element)[1];
1017 b = ((float*)element)[2];
1018 break;
John Bauman89401822014-05-06 15:04:28 -04001019 case FORMAT_A32B32G32R32F:
1020 r = ((float*)element)[0];
1021 g = ((float*)element)[1];
1022 b = ((float*)element)[2];
1023 a = ((float*)element)[3];
1024 break;
1025 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001026 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04001027 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04001028 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001029 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04001030 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04001031 r = *(float*)element;
1032 g = r;
1033 b = r;
1034 a = r;
1035 break;
1036 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001037 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -04001038 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -04001039 g = r;
1040 b = r;
1041 a = r;
1042 break;
1043 case FORMAT_S8:
1044 r = *(unsigned char*)element * (1.0f / 0xFF);
1045 break;
1046 default:
1047 ASSERT(false);
1048 }
1049
Nicolas Capens1efac522017-12-01 16:59:38 -05001050 if(isSRGBformat(format))
1051 {
1052 r = sRGBtoLinear(r);
1053 g = sRGBtoLinear(g);
1054 b = sRGBtoLinear(b);
1055 }
John Bauman89401822014-05-06 15:04:28 -04001056
1057 return Color<float>(r, g, b, a);
1058 }
1059
1060 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1061 {
1062 x -= 0.5f;
1063 y -= 0.5f;
1064 z -= 0.5f;
1065
1066 int x0 = clamp((int)x, 0, width - 1);
1067 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1068
1069 int y0 = clamp((int)y, 0, height - 1);
1070 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1071
1072 int z0 = clamp((int)z, 0, depth - 1);
1073 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1074
1075 Color<float> c000 = read(x0, y0, z0);
1076 Color<float> c100 = read(x1, y0, z0);
1077 Color<float> c010 = read(x0, y1, z0);
1078 Color<float> c110 = read(x1, y1, z0);
1079 Color<float> c001 = read(x0, y0, z1);
1080 Color<float> c101 = read(x1, y0, z1);
1081 Color<float> c011 = read(x0, y1, z1);
1082 Color<float> c111 = read(x1, y1, z1);
1083
1084 float fx = x - x0;
1085 float fy = y - y0;
1086 float fz = z - z0;
1087
1088 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1089 c100 *= fx * (1 - fy) * (1 - fz);
1090 c010 *= (1 - fx) * fy * (1 - fz);
1091 c110 *= fx * fy * (1 - fz);
1092 c001 *= (1 - fx) * (1 - fy) * fz;
1093 c101 *= fx * (1 - fy) * fz;
1094 c011 *= (1 - fx) * fy * fz;
1095 c111 *= fx * fy * fz;
1096
1097 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1098 }
1099
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001100 Color<float> Surface::Buffer::sample(float x, float y, int layer) const
John Bauman89401822014-05-06 15:04:28 -04001101 {
1102 x -= 0.5f;
1103 y -= 0.5f;
1104
1105 int x0 = clamp((int)x, 0, width - 1);
1106 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1107
1108 int y0 = clamp((int)y, 0, height - 1);
1109 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1110
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001111 Color<float> c00 = read(x0, y0, layer);
1112 Color<float> c10 = read(x1, y0, layer);
1113 Color<float> c01 = read(x0, y1, layer);
1114 Color<float> c11 = read(x1, y1, layer);
John Bauman89401822014-05-06 15:04:28 -04001115
1116 float fx = x - x0;
1117 float fy = y - y0;
1118
1119 c00 *= (1 - fx) * (1 - fy);
1120 c10 *= fx * (1 - fy);
1121 c01 *= (1 - fx) * fy;
1122 c11 *= fx * fy;
1123
1124 return c00 + c10 + c01 + c11;
1125 }
1126
John Bauman19bac1e2014-05-06 15:23:49 -04001127 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001128 {
1129 this->lock = lock;
1130
1131 switch(lock)
1132 {
1133 case LOCK_UNLOCKED:
1134 case LOCK_READONLY:
Nicolas Capensae7756e2018-02-22 16:13:01 -05001135 case LOCK_UPDATE:
John Bauman89401822014-05-06 15:04:28 -04001136 break;
1137 case LOCK_WRITEONLY:
1138 case LOCK_READWRITE:
1139 case LOCK_DISCARD:
1140 dirty = true;
1141 break;
1142 default:
1143 ASSERT(false);
1144 }
1145
John Baumand4ae8632014-05-06 16:18:33 -04001146 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001147 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001148 x += border;
1149 y += border;
1150
John Baumand4ae8632014-05-06 16:18:33 -04001151 switch(format)
1152 {
John Baumand4ae8632014-05-06 16:18:33 -04001153 case FORMAT_DXT1:
John Baumand4ae8632014-05-06 16:18:33 -04001154 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001155 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001156 case FORMAT_R11_EAC:
1157 case FORMAT_SIGNED_R11_EAC:
1158 case FORMAT_RGB8_ETC2:
1159 case FORMAT_SRGB8_ETC2:
1160 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1161 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001162 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001163 case FORMAT_RG11_EAC:
1164 case FORMAT_SIGNED_RG11_EAC:
1165 case FORMAT_RGBA8_ETC2_EAC:
1166 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001167 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001168 case FORMAT_DXT3:
1169 case FORMAT_DXT5:
John Baumand4ae8632014-05-06 16:18:33 -04001170 case FORMAT_ATI2:
1171 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1172 default:
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001173 return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001174 }
John Bauman89401822014-05-06 15:04:28 -04001175 }
1176
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001177 return nullptr;
John Bauman89401822014-05-06 15:04:28 -04001178 }
1179
1180 void Surface::Buffer::unlockRect()
1181 {
1182 lock = LOCK_UNLOCKED;
1183 }
1184
Nicolas Capensf41f0332017-05-30 15:25:50 -04001185 class SurfaceImplementation : public Surface
1186 {
1187 public:
1188 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1189 : Surface(width, height, depth, format, pixels, pitch, slice) {}
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001190 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0)
1191 : Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {}
Ben Clayton2a582382019-04-24 12:11:36 +01001192 ~SurfaceImplementation() override {}
Nicolas Capensf41f0332017-05-30 15:25:50 -04001193
1194 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override
1195 {
1196 return Surface::lockInternal(x, y, z, lock, client);
1197 }
1198
1199 void unlockInternal() override
1200 {
1201 Surface::unlockInternal();
1202 }
1203 };
1204
1205 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1206 {
1207 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice);
1208 }
1209
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001210 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided)
Nicolas Capensf41f0332017-05-30 15:25:50 -04001211 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001212 return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided);
Nicolas Capensf41f0332017-05-30 15:25:50 -04001213 }
1214
Nicolas Capens477314b2015-06-09 16:47:29 -04001215 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1216 {
1217 resource = new Resource(0);
1218 hasParent = false;
1219 ownExternal = false;
1220 depth = max(1, depth);
1221
1222 external.buffer = pixels;
1223 external.width = width;
1224 external.height = height;
1225 external.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001226 external.samples = 1;
Nicolas Capens477314b2015-06-09 16:47:29 -04001227 external.format = format;
1228 external.bytes = bytes(external.format);
1229 external.pitchB = pitch;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001230 external.pitchP = external.bytes ? pitch / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001231 external.sliceB = slice;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001232 external.sliceP = external.bytes ? slice / external.bytes : 0;
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001233 external.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001234 external.lock = LOCK_UNLOCKED;
1235 external.dirty = true;
1236
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001237 internal.buffer = nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001238 internal.width = width;
1239 internal.height = height;
1240 internal.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001241 internal.samples = 1;
Nicolas Capens477314b2015-06-09 16:47:29 -04001242 internal.format = selectInternalFormat(format);
1243 internal.bytes = bytes(internal.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001244 internal.pitchB = pitchB(internal.width, 0, internal.format, false);
1245 internal.pitchP = pitchP(internal.width, 0, internal.format, false);
1246 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false);
1247 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false);
1248 internal.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001249 internal.lock = LOCK_UNLOCKED;
1250 internal.dirty = false;
1251
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001252 stencil.buffer = nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001253 stencil.width = width;
1254 stencil.height = height;
1255 stencil.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001256 stencil.samples = 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001257 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
Nicolas Capens477314b2015-06-09 16:47:29 -04001258 stencil.bytes = bytes(stencil.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001259 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false);
1260 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false);
1261 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false);
1262 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false);
1263 stencil.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001264 stencil.lock = LOCK_UNLOCKED;
1265 stencil.dirty = false;
1266
Nicolas Capens73e18c12017-11-28 13:31:35 -05001267 dirtyContents = true;
Nicolas Capens477314b2015-06-09 16:47:29 -04001268 paletteUsed = 0;
1269 }
1270
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001271 Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001272 {
1273 resource = texture ? texture : new Resource(0);
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001274 hasParent = texture != nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001275 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001276 depth = max(1, depth);
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001277 samples = max(1, samples);
John Bauman89401822014-05-06 15:04:28 -04001278
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001279 external.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001280 external.width = width;
1281 external.height = height;
1282 external.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001283 external.samples = (short)samples;
John Bauman89401822014-05-06 15:04:28 -04001284 external.format = format;
1285 external.bytes = bytes(external.format);
Alexis Hetu5fd1cfa2018-05-11 15:31:12 -04001286 external.pitchB = !pitchPprovided ? pitchB(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided * external.bytes;
1287 external.pitchP = !pitchPprovided ? pitchP(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided;
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001288 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture);
1289 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture);
1290 external.border = 0;
John Bauman89401822014-05-06 15:04:28 -04001291 external.lock = LOCK_UNLOCKED;
1292 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001293
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001294 internal.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001295 internal.width = width;
1296 internal.height = height;
1297 internal.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001298 internal.samples = (short)samples;
John Bauman89401822014-05-06 15:04:28 -04001299 internal.format = selectInternalFormat(format);
1300 internal.bytes = bytes(internal.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001301 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1302 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided;
1303 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget);
1304 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget);
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001305 internal.border = (short)border;
John Bauman89401822014-05-06 15:04:28 -04001306 internal.lock = LOCK_UNLOCKED;
1307 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001308
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001309 stencil.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001310 stencil.width = width;
1311 stencil.height = height;
1312 stencil.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001313 stencil.samples = (short)samples;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001314 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
John Bauman89401822014-05-06 15:04:28 -04001315 stencil.bytes = bytes(stencil.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001316 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget);
1317 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget);
1318 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1319 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1320 stencil.border = 0;
John Bauman89401822014-05-06 15:04:28 -04001321 stencil.lock = LOCK_UNLOCKED;
1322 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001323
Nicolas Capens73e18c12017-11-28 13:31:35 -05001324 dirtyContents = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001325 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001326 }
1327
1328 Surface::~Surface()
1329 {
Nicolas Capensbf7a8142017-05-19 10:57:28 -04001330 // sync() must be called before this destructor to ensure all locks have been released.
1331 // We can't call it here because the parent resource may already have been destroyed.
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001332 ASSERT(isUnlocked());
John Bauman8a4f6fc2014-05-06 15:26:18 -04001333
John Bauman89401822014-05-06 15:04:28 -04001334 if(!hasParent)
1335 {
1336 resource->destruct();
1337 }
1338
Nicolas Capens477314b2015-06-09 16:47:29 -04001339 if(ownExternal)
1340 {
1341 deallocate(external.buffer);
1342 }
John Bauman89401822014-05-06 15:04:28 -04001343
1344 if(internal.buffer != external.buffer)
1345 {
1346 deallocate(internal.buffer);
1347 }
1348
1349 deallocate(stencil.buffer);
1350
Nicolas Capens810f3db2018-09-29 11:58:42 -04001351 external.buffer = nullptr;
1352 internal.buffer = nullptr;
1353 stencil.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001354 }
1355
John Bauman19bac1e2014-05-06 15:23:49 -04001356 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001357 {
1358 resource->lock(client);
1359
1360 if(!external.buffer)
1361 {
Nicolas Capens539468c2018-06-28 11:28:40 -04001362 if(internal.buffer && identicalBuffers())
John Bauman89401822014-05-06 15:04:28 -04001363 {
1364 external.buffer = internal.buffer;
1365 }
1366 else
1367 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001368 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format);
John Bauman89401822014-05-06 15:04:28 -04001369 }
1370 }
1371
1372 if(internal.dirty)
1373 {
1374 if(lock != LOCK_DISCARD)
1375 {
1376 update(external, internal);
1377 }
John Bauman66b8ab22014-05-06 15:57:45 -04001378
1379 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001380 }
1381
1382 switch(lock)
1383 {
1384 case LOCK_READONLY:
1385 break;
1386 case LOCK_WRITEONLY:
1387 case LOCK_READWRITE:
1388 case LOCK_DISCARD:
Nicolas Capens73e18c12017-11-28 13:31:35 -05001389 dirtyContents = true;
John Bauman89401822014-05-06 15:04:28 -04001390 break;
1391 default:
1392 ASSERT(false);
1393 }
1394
John Bauman19bac1e2014-05-06 15:23:49 -04001395 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001396 }
1397
1398 void Surface::unlockExternal()
1399 {
John Bauman89401822014-05-06 15:04:28 -04001400 external.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001401
1402 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001403 }
1404
John Bauman19bac1e2014-05-06 15:23:49 -04001405 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001406 {
1407 if(lock != LOCK_UNLOCKED)
1408 {
1409 resource->lock(client);
1410 }
1411
1412 if(!internal.buffer)
1413 {
Nicolas Capens539468c2018-06-28 11:28:40 -04001414 if(external.buffer && identicalBuffers())
John Bauman89401822014-05-06 15:04:28 -04001415 {
1416 internal.buffer = external.buffer;
1417 }
1418 else
1419 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001420 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format);
John Bauman89401822014-05-06 15:04:28 -04001421 }
1422 }
1423
1424 // FIXME: WHQL requires conversion to lower external precision and back
1425 if(logPrecision >= WHQL)
1426 {
1427 if(internal.dirty && renderTarget && internal.format != external.format)
1428 {
1429 if(lock != LOCK_DISCARD)
1430 {
1431 switch(external.format)
1432 {
1433 case FORMAT_R3G3B2:
1434 case FORMAT_A8R3G3B2:
1435 case FORMAT_A1R5G5B5:
1436 case FORMAT_A2R10G10B10:
1437 case FORMAT_A2B10G10R10:
1438 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1439 unlockExternal();
1440 break;
1441 default:
1442 // Difference passes WHQL
1443 break;
1444 }
1445 }
1446 }
1447 }
1448
John Bauman66b8ab22014-05-06 15:57:45 -04001449 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001450 {
1451 if(lock != LOCK_DISCARD)
1452 {
1453 update(internal, external);
1454 }
John Bauman89401822014-05-06 15:04:28 -04001455
John Bauman66b8ab22014-05-06 15:57:45 -04001456 external.dirty = false;
1457 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001458 }
1459
1460 switch(lock)
1461 {
1462 case LOCK_UNLOCKED:
1463 case LOCK_READONLY:
1464 break;
1465 case LOCK_WRITEONLY:
1466 case LOCK_READWRITE:
1467 case LOCK_DISCARD:
Nicolas Capens73e18c12017-11-28 13:31:35 -05001468 dirtyContents = true;
John Bauman89401822014-05-06 15:04:28 -04001469 break;
1470 default:
1471 ASSERT(false);
1472 }
1473
1474 if(lock == LOCK_READONLY && client == PUBLIC)
1475 {
1476 resolve();
1477 }
1478
John Bauman19bac1e2014-05-06 15:23:49 -04001479 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001480 }
1481
1482 void Surface::unlockInternal()
1483 {
John Bauman89401822014-05-06 15:04:28 -04001484 internal.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001485
1486 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001487 }
1488
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001489 void *Surface::lockStencil(int x, int y, int front, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001490 {
Nicolas Capens810f3db2018-09-29 11:58:42 -04001491 resource->lock(client);
1492
Nicolas Capensae7756e2018-02-22 16:13:01 -05001493 if(stencil.format == FORMAT_NULL)
1494 {
1495 return nullptr;
1496 }
1497
John Bauman89401822014-05-06 15:04:28 -04001498 if(!stencil.buffer)
1499 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001500 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format);
John Bauman89401822014-05-06 15:04:28 -04001501 }
1502
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001503 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
John Bauman89401822014-05-06 15:04:28 -04001504 }
1505
1506 void Surface::unlockStencil()
1507 {
John Bauman89401822014-05-06 15:04:28 -04001508 stencil.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001509
1510 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001511 }
1512
1513 int Surface::bytes(Format format)
1514 {
1515 switch(format)
1516 {
1517 case FORMAT_NULL: return 0;
1518 case FORMAT_P8: return 1;
1519 case FORMAT_A8P8: return 2;
1520 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001521 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001522 case FORMAT_R8: return 1;
1523 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001524 case FORMAT_R16I: return 2;
1525 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001526 case FORMAT_A8R3G3B2: return 2;
1527 case FORMAT_R5G6B5: return 2;
1528 case FORMAT_A1R5G5B5: return 2;
1529 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001530 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001531 case FORMAT_X4R4G4B4: return 2;
1532 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001533 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001534 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001535 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001536 case FORMAT_R32I: return 4;
1537 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001538 case FORMAT_X8R8G8B8: return 4;
1539 // case FORMAT_X8G8R8B8Q: return 4;
1540 case FORMAT_A8R8G8B8: return 4;
1541 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001542 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001543 case FORMAT_X8B8G8R8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04001544 case FORMAT_SRGB8_X8: return 4;
1545 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001546 case FORMAT_A8B8G8R8I: return 4;
1547 case FORMAT_R8UI: return 1;
1548 case FORMAT_G8R8UI: return 2;
1549 case FORMAT_X8B8G8R8UI: return 4;
1550 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001551 case FORMAT_A8B8G8R8: return 4;
Nicolas Capens975adb72017-12-19 15:34:20 -05001552 case FORMAT_R8_SNORM: return 1;
1553 case FORMAT_G8R8_SNORM: return 2;
1554 case FORMAT_X8B8G8R8_SNORM: return 4;
1555 case FORMAT_A8B8G8R8_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001556 case FORMAT_A2R10G10B10: return 4;
1557 case FORMAT_A2B10G10R10: return 4;
Nicolas Capens5555af42017-12-14 13:14:03 -05001558 case FORMAT_A2B10G10R10UI: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001559 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001560 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001561 case FORMAT_G16R16I: return 4;
1562 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001563 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001564 case FORMAT_G32R32I: return 8;
1565 case FORMAT_G32R32UI: return 8;
1566 case FORMAT_X16B16G16R16I: return 8;
1567 case FORMAT_X16B16G16R16UI: return 8;
1568 case FORMAT_A16B16G16R16I: return 8;
1569 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001570 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001571 case FORMAT_X32B32G32R32I: return 16;
1572 case FORMAT_X32B32G32R32UI: return 16;
1573 case FORMAT_A32B32G32R32I: return 16;
1574 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001575 // Compressed formats
John Bauman89401822014-05-06 15:04:28 -04001576 case FORMAT_DXT1: return 2; // Column of four pixels
1577 case FORMAT_DXT3: return 4; // Column of four pixels
1578 case FORMAT_DXT5: return 4; // Column of four pixels
1579 case FORMAT_ATI1: return 2; // Column of four pixels
1580 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001581 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001582 case FORMAT_R11_EAC: return 2;
1583 case FORMAT_SIGNED_R11_EAC: return 2;
1584 case FORMAT_RG11_EAC: return 4;
1585 case FORMAT_SIGNED_RG11_EAC: return 4;
1586 case FORMAT_RGB8_ETC2: return 2;
1587 case FORMAT_SRGB8_ETC2: return 2;
1588 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1589 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1590 case FORMAT_RGBA8_ETC2_EAC: return 4;
1591 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
John Bauman89401822014-05-06 15:04:28 -04001592 // Bumpmap formats
1593 case FORMAT_V8U8: return 2;
1594 case FORMAT_L6V5U5: return 2;
1595 case FORMAT_Q8W8V8U8: return 4;
1596 case FORMAT_X8L8V8U8: return 4;
1597 case FORMAT_A2W10V10U10: return 4;
1598 case FORMAT_V16U16: return 4;
1599 case FORMAT_A16W16V16U16: return 8;
1600 case FORMAT_Q16W16V16U16: return 8;
1601 // Luminance formats
1602 case FORMAT_L8: return 1;
1603 case FORMAT_A4L4: return 1;
1604 case FORMAT_L16: return 2;
1605 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001606 case FORMAT_L16F: return 2;
1607 case FORMAT_A16L16F: return 4;
1608 case FORMAT_L32F: return 4;
1609 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001610 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001611 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001612 case FORMAT_R16F: return 2;
1613 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001614 case FORMAT_B16G16R16F: return 6;
Nicolas Capensa6bc61d2017-12-20 11:07:45 -05001615 case FORMAT_X16B16G16R16F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001616 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens67fdd832017-12-21 11:20:54 -05001617 case FORMAT_X16B16G16R16F_UNSIGNED: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001618 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001619 case FORMAT_R32F: return 4;
1620 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001621 case FORMAT_B32G32R32F: return 12;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001622 case FORMAT_X32B32G32R32F: return 16;
John Bauman89401822014-05-06 15:04:28 -04001623 case FORMAT_A32B32G32R32F: return 16;
Nicolas Capens67fdd832017-12-21 11:20:54 -05001624 case FORMAT_X32B32G32R32F_UNSIGNED: return 16;
John Bauman89401822014-05-06 15:04:28 -04001625 // Depth/stencil formats
1626 case FORMAT_D16: return 2;
1627 case FORMAT_D32: return 4;
1628 case FORMAT_D24X8: return 4;
1629 case FORMAT_D24S8: return 4;
1630 case FORMAT_D24FS8: return 4;
1631 case FORMAT_D32F: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001632 case FORMAT_D32FS8: return 4;
John Bauman89401822014-05-06 15:04:28 -04001633 case FORMAT_D32F_COMPLEMENTARY: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001634 case FORMAT_D32FS8_COMPLEMENTARY: return 4;
John Bauman89401822014-05-06 15:04:28 -04001635 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001636 case FORMAT_D32FS8_TEXTURE: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001637 case FORMAT_D32F_SHADOW: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001638 case FORMAT_D32FS8_SHADOW: return 4;
1639 case FORMAT_DF24S8: return 4;
1640 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001641 case FORMAT_INTZ: return 4;
1642 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001643 case FORMAT_YV12_BT601: return 1; // Y plane only
1644 case FORMAT_YV12_BT709: return 1; // Y plane only
1645 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001646 default:
1647 ASSERT(false);
1648 }
1649
1650 return 0;
1651 }
1652
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001653 int Surface::pitchB(int width, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001654 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001655 width += 2 * border;
1656
Nicolas Capens419a5802018-05-08 17:20:50 -04001657 // Render targets require 2x2 quads
John Bauman89401822014-05-06 15:04:28 -04001658 if(target || isDepth(format) || isStencil(format))
1659 {
Nicolas Capens419a5802018-05-08 17:20:50 -04001660 width = align<2>(width);
John Bauman89401822014-05-06 15:04:28 -04001661 }
1662
1663 switch(format)
1664 {
John Bauman89401822014-05-06 15:04:28 -04001665 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001666 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001667 case FORMAT_R11_EAC:
1668 case FORMAT_SIGNED_R11_EAC:
1669 case FORMAT_RGB8_ETC2:
1670 case FORMAT_SRGB8_ETC2:
1671 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1672 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001673 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001674 case FORMAT_RG11_EAC:
1675 case FORMAT_SIGNED_RG11_EAC:
1676 case FORMAT_RGBA8_ETC2_EAC:
1677 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001678 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman89401822014-05-06 15:04:28 -04001679 case FORMAT_DXT3:
1680 case FORMAT_DXT5:
1681 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1682 case FORMAT_ATI1:
1683 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1684 case FORMAT_ATI2:
1685 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001686 case FORMAT_YV12_BT601:
1687 case FORMAT_YV12_BT709:
1688 case FORMAT_YV12_JFIF:
Nicolas Capens419a5802018-05-08 17:20:50 -04001689 return align<16>(width);
John Bauman89401822014-05-06 15:04:28 -04001690 default:
1691 return bytes(format) * width;
1692 }
1693 }
1694
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001695 int Surface::pitchP(int width, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001696 {
1697 int B = bytes(format);
1698
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001699 return B > 0 ? pitchB(width, border, format, target) / B : 0;
John Bauman89401822014-05-06 15:04:28 -04001700 }
1701
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001702 int Surface::sliceB(int width, int height, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001703 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001704 height += 2 * border;
1705
Nicolas Capens419a5802018-05-08 17:20:50 -04001706 // Render targets require 2x2 quads
John Bauman89401822014-05-06 15:04:28 -04001707 if(target || isDepth(format) || isStencil(format))
1708 {
Nicolas Capens419a5802018-05-08 17:20:50 -04001709 height = align<2>(height);
John Bauman89401822014-05-06 15:04:28 -04001710 }
1711
1712 switch(format)
1713 {
John Bauman89401822014-05-06 15:04:28 -04001714 case FORMAT_DXT1:
1715 case FORMAT_DXT3:
1716 case FORMAT_DXT5:
Nicolas Capens22658242014-11-29 00:31:41 -05001717 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001718 case FORMAT_R11_EAC:
1719 case FORMAT_SIGNED_R11_EAC:
1720 case FORMAT_RG11_EAC:
1721 case FORMAT_SIGNED_RG11_EAC:
1722 case FORMAT_RGB8_ETC2:
1723 case FORMAT_SRGB8_ETC2:
1724 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1725 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1726 case FORMAT_RGBA8_ETC2_EAC:
1727 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001728 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001729 case FORMAT_ATI1:
1730 case FORMAT_ATI2:
Nicolas Capens419a5802018-05-08 17:20:50 -04001731 return pitchB(width, border, format, target) * align<4>(height); // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001732 default:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001733 return pitchB(width, border, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001734 }
1735 }
1736
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001737 int Surface::sliceP(int width, int height, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001738 {
1739 int B = bytes(format);
1740
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001741 return B > 0 ? sliceB(width, height, border, format, target) / B : 0;
John Bauman89401822014-05-06 15:04:28 -04001742 }
1743
1744 void Surface::update(Buffer &destination, Buffer &source)
1745 {
1746 // ASSERT(source.lock != LOCK_UNLOCKED);
1747 // ASSERT(destination.lock != LOCK_UNLOCKED);
Nicolas Capensc39901e2016-03-21 16:37:44 -04001748
John Bauman89401822014-05-06 15:04:28 -04001749 if(destination.buffer != source.buffer)
1750 {
1751 ASSERT(source.dirty && !destination.dirty);
1752
1753 switch(source.format)
1754 {
1755 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001756 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1757 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1758 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1759 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1760 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001761 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1762 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1763 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
1764 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1765 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001766 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1767 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1768 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1769 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001770 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001771 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1772 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1773 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1774 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1775 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1776 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001777 default: genericUpdate(destination, source); break;
1778 }
1779 }
John Bauman89401822014-05-06 15:04:28 -04001780 }
1781
1782 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1783 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001784 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
Nicolas Capensae7756e2018-02-22 16:13:01 -05001785 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
John Bauman89401822014-05-06 15:04:28 -04001786
1787 int depth = min(destination.depth, source.depth);
1788 int height = min(destination.height, source.height);
1789 int width = min(destination.width, source.width);
1790 int rowBytes = width * source.bytes;
1791
1792 for(int z = 0; z < depth; z++)
1793 {
1794 unsigned char *sourceRow = sourceSlice;
1795 unsigned char *destinationRow = destinationSlice;
1796
1797 for(int y = 0; y < height; y++)
1798 {
1799 if(source.format == destination.format)
1800 {
1801 memcpy(destinationRow, sourceRow, rowBytes);
1802 }
1803 else
1804 {
1805 unsigned char *sourceElement = sourceRow;
1806 unsigned char *destinationElement = destinationRow;
1807
1808 for(int x = 0; x < width; x++)
1809 {
1810 Color<float> color = source.read(sourceElement);
1811 destination.write(destinationElement, color);
1812
1813 sourceElement += source.bytes;
1814 destinationElement += destination.bytes;
1815 }
1816 }
1817
1818 sourceRow += source.pitchB;
1819 destinationRow += destination.pitchB;
1820 }
1821
1822 sourceSlice += source.sliceB;
1823 destinationSlice += destination.sliceB;
1824 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001825
1826 source.unlockRect();
1827 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001828 }
1829
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001830 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001831 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001832 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
Nicolas Capensae7756e2018-02-22 16:13:01 -05001833 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
John Bauman89401822014-05-06 15:04:28 -04001834
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001835 int depth = min(destination.depth, source.depth);
1836 int height = min(destination.height, source.height);
1837 int width = min(destination.width, source.width);
1838
1839 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04001840 {
1841 unsigned char *sourceRow = sourceSlice;
1842 unsigned char *destinationRow = destinationSlice;
1843
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001844 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04001845 {
1846 unsigned char *sourceElement = sourceRow;
1847 unsigned char *destinationElement = destinationRow;
1848
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001849 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04001850 {
1851 unsigned int b = sourceElement[0];
1852 unsigned int g = sourceElement[1];
1853 unsigned int r = sourceElement[2];
1854
1855 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1856
1857 sourceElement += source.bytes;
1858 destinationElement += destination.bytes;
1859 }
1860
1861 sourceRow += source.pitchB;
1862 destinationRow += destination.pitchB;
1863 }
1864
1865 sourceSlice += source.sliceB;
1866 destinationSlice += destination.sliceB;
1867 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001868
1869 source.unlockRect();
1870 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001871 }
1872
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001873 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001874 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001875 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
Nicolas Capensae7756e2018-02-22 16:13:01 -05001876 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
John Bauman89401822014-05-06 15:04:28 -04001877
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001878 int depth = min(destination.depth, source.depth);
1879 int height = min(destination.height, source.height);
1880 int width = min(destination.width, source.width);
1881
1882 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04001883 {
1884 unsigned char *sourceRow = sourceSlice;
1885 unsigned char *destinationRow = destinationSlice;
1886
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001887 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04001888 {
1889 unsigned char *sourceElement = sourceRow;
1890 unsigned char *destinationElement = destinationRow;
1891
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001892 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04001893 {
1894 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001895
John Bauman89401822014-05-06 15:04:28 -04001896 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1897 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1898 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1899
1900 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1901
1902 sourceElement += source.bytes;
1903 destinationElement += destination.bytes;
1904 }
1905
1906 sourceRow += source.pitchB;
1907 destinationRow += destination.pitchB;
1908 }
1909
1910 sourceSlice += source.sliceB;
1911 destinationSlice += destination.sliceB;
1912 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001913
1914 source.unlockRect();
1915 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001916 }
1917
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001918 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001919 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001920 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
Nicolas Capensae7756e2018-02-22 16:13:01 -05001921 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
John Bauman89401822014-05-06 15:04:28 -04001922
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001923 int depth = min(destination.depth, source.depth);
1924 int height = min(destination.height, source.height);
1925 int width = min(destination.width, source.width);
1926
1927 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04001928 {
1929 unsigned char *sourceRow = sourceSlice;
1930 unsigned char *destinationRow = destinationSlice;
1931
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001932 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04001933 {
1934 unsigned char *sourceElement = sourceRow;
1935 unsigned char *destinationElement = destinationRow;
1936
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001937 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04001938 {
1939 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001940
John Bauman89401822014-05-06 15:04:28 -04001941 unsigned int a = (argb & 0x8000) * 130560;
1942 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1943 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1944 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
1945
1946 *(unsigned int*)destinationElement = a | r | g | b;
1947
1948 sourceElement += source.bytes;
1949 destinationElement += destination.bytes;
1950 }
1951
1952 sourceRow += source.pitchB;
1953 destinationRow += destination.pitchB;
1954 }
1955
1956 sourceSlice += source.sliceB;
1957 destinationSlice += destination.sliceB;
1958 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001959
1960 source.unlockRect();
1961 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001962 }
1963
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001964 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001965 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001966 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
Nicolas Capensae7756e2018-02-22 16:13:01 -05001967 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
John Bauman89401822014-05-06 15:04:28 -04001968
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001969 int depth = min(destination.depth, source.depth);
1970 int height = min(destination.height, source.height);
1971 int width = min(destination.width, source.width);
1972
1973 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04001974 {
1975 unsigned char *sourceRow = sourceSlice;
1976 unsigned char *destinationRow = destinationSlice;
1977
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001978 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04001979 {
1980 unsigned char *sourceElement = sourceRow;
1981 unsigned char *destinationElement = destinationRow;
1982
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001983 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04001984 {
1985 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001986
John Bauman89401822014-05-06 15:04:28 -04001987 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
1988 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
1989 unsigned int b = (xrgb & 0x000F) * 0x00000011;
1990
1991 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1992
1993 sourceElement += source.bytes;
1994 destinationElement += destination.bytes;
1995 }
1996
1997 sourceRow += source.pitchB;
1998 destinationRow += destination.pitchB;
1999 }
2000
2001 sourceSlice += source.sliceB;
2002 destinationSlice += destination.sliceB;
2003 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002004
2005 source.unlockRect();
2006 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002007 }
2008
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002009 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002010 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002011 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
Nicolas Capensae7756e2018-02-22 16:13:01 -05002012 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
John Bauman89401822014-05-06 15:04:28 -04002013
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002014 int depth = min(destination.depth, source.depth);
2015 int height = min(destination.height, source.height);
2016 int width = min(destination.width, source.width);
2017
2018 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002019 {
2020 unsigned char *sourceRow = sourceSlice;
2021 unsigned char *destinationRow = destinationSlice;
2022
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002023 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002024 {
2025 unsigned char *sourceElement = sourceRow;
2026 unsigned char *destinationElement = destinationRow;
2027
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002028 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002029 {
2030 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002031
John Bauman89401822014-05-06 15:04:28 -04002032 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2033 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2034 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2035 unsigned int b = (argb & 0x000F) * 0x00000011;
2036
2037 *(unsigned int*)destinationElement = a | r | g | b;
2038
2039 sourceElement += source.bytes;
2040 destinationElement += destination.bytes;
2041 }
2042
2043 sourceRow += source.pitchB;
2044 destinationRow += destination.pitchB;
2045 }
2046
2047 sourceSlice += source.sliceB;
2048 destinationSlice += destination.sliceB;
2049 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002050
2051 source.unlockRect();
2052 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002053 }
2054
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002055 void Surface::decodeP8(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002056 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002057 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
Nicolas Capensae7756e2018-02-22 16:13:01 -05002058 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
John Bauman89401822014-05-06 15:04:28 -04002059
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002060 int depth = min(destination.depth, source.depth);
2061 int height = min(destination.height, source.height);
2062 int width = min(destination.width, source.width);
2063
2064 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002065 {
2066 unsigned char *sourceRow = sourceSlice;
2067 unsigned char *destinationRow = destinationSlice;
2068
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002069 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002070 {
2071 unsigned char *sourceElement = sourceRow;
2072 unsigned char *destinationElement = destinationRow;
2073
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002074 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002075 {
2076 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2077
2078 unsigned int r = (abgr & 0x000000FF) << 16;
2079 unsigned int g = (abgr & 0x0000FF00) << 0;
2080 unsigned int b = (abgr & 0x00FF0000) >> 16;
2081 unsigned int a = (abgr & 0xFF000000) >> 0;
2082
2083 *(unsigned int*)destinationElement = a | r | g | b;
2084
2085 sourceElement += source.bytes;
2086 destinationElement += destination.bytes;
2087 }
2088
2089 sourceRow += source.pitchB;
2090 destinationRow += destination.pitchB;
2091 }
2092
2093 sourceSlice += source.sliceB;
2094 destinationSlice += destination.sliceB;
2095 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002096
2097 source.unlockRect();
2098 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002099 }
2100
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002101 void Surface::decodeDXT1(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002102 {
Nicolas Capensae7756e2018-02-22 16:13:01 -05002103 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002104 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002105
2106 for(int z = 0; z < external.depth; z++)
2107 {
2108 unsigned int *dest = destSlice;
2109
2110 for(int y = 0; y < external.height; y += 4)
2111 {
2112 for(int x = 0; x < external.width; x += 4)
2113 {
2114 Color<byte> c[4];
2115
2116 c[0] = source->c0;
2117 c[1] = source->c1;
2118
2119 if(source->c0 > source->c1) // No transparency
2120 {
2121 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2122 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2123 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2124 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2125 c[2].a = 0xFF;
2126
2127 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2128 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2129 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2130 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2131 c[3].a = 0xFF;
2132 }
2133 else // c3 transparent
2134 {
2135 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2136 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2137 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2138 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2139 c[2].a = 0xFF;
2140
2141 c[3].r = 0;
2142 c[3].g = 0;
2143 c[3].b = 0;
2144 c[3].a = 0;
2145 }
2146
2147 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2148 {
2149 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2150 {
Nicolas Capens539468c2018-06-28 11:28:40 -04002151 dest[(x + i) + (y + j) * internal.pitchP] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
John Bauman89401822014-05-06 15:04:28 -04002152 }
2153 }
2154
2155 source++;
2156 }
2157 }
2158
2159 (byte*&)destSlice += internal.sliceB;
2160 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002161
2162 external.unlockRect();
2163 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002164 }
2165
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002166 void Surface::decodeDXT3(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002167 {
Nicolas Capensae7756e2018-02-22 16:13:01 -05002168 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002169 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002170
2171 for(int z = 0; z < external.depth; z++)
2172 {
2173 unsigned int *dest = destSlice;
2174
2175 for(int y = 0; y < external.height; y += 4)
2176 {
2177 for(int x = 0; x < external.width; x += 4)
2178 {
2179 Color<byte> c[4];
2180
2181 c[0] = source->c0;
2182 c[1] = source->c1;
2183
2184 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2185 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2186 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2187 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2188
2189 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2190 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2191 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2192 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2193
2194 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2195 {
2196 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2197 {
2198 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2199 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2200
Nicolas Capens539468c2018-06-28 11:28:40 -04002201 dest[(x + i) + (y + j) * internal.pitchP] = color;
John Bauman89401822014-05-06 15:04:28 -04002202 }
2203 }
2204
2205 source++;
2206 }
2207 }
2208
2209 (byte*&)destSlice += internal.sliceB;
2210 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002211
2212 external.unlockRect();
2213 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002214 }
2215
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002216 void Surface::decodeDXT5(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002217 {
Nicolas Capensae7756e2018-02-22 16:13:01 -05002218 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002219 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002220
2221 for(int z = 0; z < external.depth; z++)
2222 {
2223 unsigned int *dest = destSlice;
2224
2225 for(int y = 0; y < external.height; y += 4)
2226 {
2227 for(int x = 0; x < external.width; x += 4)
2228 {
2229 Color<byte> c[4];
2230
2231 c[0] = source->c0;
2232 c[1] = source->c1;
2233
2234 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2235 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2236 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2237 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2238
2239 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2240 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2241 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2242 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2243
2244 byte a[8];
2245
2246 a[0] = source->a0;
2247 a[1] = source->a1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002248
John Bauman89401822014-05-06 15:04:28 -04002249 if(a[0] > a[1])
2250 {
2251 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2252 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2253 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2254 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2255 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2256 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2257 }
2258 else
2259 {
2260 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2261 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2262 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2263 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2264 a[6] = 0;
2265 a[7] = 0xFF;
2266 }
2267
2268 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2269 {
2270 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2271 {
2272 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2273 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002274
Nicolas Capens539468c2018-06-28 11:28:40 -04002275 dest[(x + i) + (y + j) * internal.pitchP] = color;
John Bauman89401822014-05-06 15:04:28 -04002276 }
2277 }
2278
2279 source++;
2280 }
2281 }
2282
2283 (byte*&)destSlice += internal.sliceB;
2284 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002285
2286 external.unlockRect();
2287 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002288 }
2289
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002290 void Surface::decodeATI1(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002291 {
Nicolas Capensae7756e2018-02-22 16:13:01 -05002292 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002293 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002294
2295 for(int z = 0; z < external.depth; z++)
2296 {
2297 byte *dest = destSlice;
2298
2299 for(int y = 0; y < external.height; y += 4)
2300 {
2301 for(int x = 0; x < external.width; x += 4)
2302 {
2303 byte r[8];
2304
2305 r[0] = source->r0;
2306 r[1] = source->r1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002307
John Bauman89401822014-05-06 15:04:28 -04002308 if(r[0] > r[1])
2309 {
2310 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2311 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2312 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2313 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2314 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2315 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2316 }
2317 else
2318 {
2319 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2320 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2321 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2322 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2323 r[6] = 0;
2324 r[7] = 0xFF;
2325 }
2326
2327 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2328 {
2329 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2330 {
Nicolas Capens539468c2018-06-28 11:28:40 -04002331 dest[(x + i) + (y + j) * internal.pitchP] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
John Bauman89401822014-05-06 15:04:28 -04002332 }
2333 }
2334
2335 source++;
2336 }
2337 }
2338
2339 destSlice += internal.sliceB;
2340 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002341
2342 external.unlockRect();
2343 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002344 }
2345
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002346 void Surface::decodeATI2(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002347 {
Nicolas Capensae7756e2018-02-22 16:13:01 -05002348 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002349 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002350
2351 for(int z = 0; z < external.depth; z++)
2352 {
2353 word *dest = destSlice;
2354
2355 for(int y = 0; y < external.height; y += 4)
2356 {
2357 for(int x = 0; x < external.width; x += 4)
2358 {
2359 byte X[8];
2360
2361 X[0] = source->x0;
2362 X[1] = source->x1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002363
John Bauman89401822014-05-06 15:04:28 -04002364 if(X[0] > X[1])
2365 {
2366 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2367 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2368 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2369 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2370 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2371 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2372 }
2373 else
2374 {
2375 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2376 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2377 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2378 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2379 X[6] = 0;
2380 X[7] = 0xFF;
2381 }
2382
2383 byte Y[8];
2384
2385 Y[0] = source->y0;
2386 Y[1] = source->y1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002387
John Bauman89401822014-05-06 15:04:28 -04002388 if(Y[0] > Y[1])
2389 {
2390 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2391 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2392 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2393 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2394 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2395 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2396 }
2397 else
2398 {
2399 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2400 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2401 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2402 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2403 Y[6] = 0;
2404 Y[7] = 0xFF;
2405 }
2406
2407 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2408 {
2409 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2410 {
2411 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2412 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2413
Nicolas Capens539468c2018-06-28 11:28:40 -04002414 dest[(x + i) + (y + j) * internal.pitchP] = (g << 8) + r;
John Bauman89401822014-05-06 15:04:28 -04002415 }
2416 }
2417
2418 source++;
2419 }
2420 }
2421
2422 (byte*&)destSlice += internal.sliceB;
2423 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002424
2425 external.unlockRect();
2426 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002427 }
Nicolas Capens22658242014-11-29 00:31:41 -05002428
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002429 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002430 {
Nicolas Capensae7756e2018-02-22 16:13:01 -05002431 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
Alexis Hetu0de50d42015-09-09 13:56:41 -04002432 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002433 external.unlockRect();
2434 internal.unlockRect();
Nicolas Capens22658242014-11-29 00:31:41 -05002435
Alexis Hetu0de50d42015-09-09 13:56:41 -04002436 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002437 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002438 static byte sRGBtoLinearTable[256];
2439 static bool sRGBtoLinearTableDirty = true;
2440 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002441 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002442 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002443 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002444 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002445 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002446 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002447 }
2448
Alexis Hetu0de50d42015-09-09 13:56:41 -04002449 // Perform sRGB conversion in place after decoding
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002450 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
Alexis Hetu0de50d42015-09-09 13:56:41 -04002451 for(int y = 0; y < internal.height; y++)
2452 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002453 byte *srcRow = src + y * internal.pitchB;
Alexis Hetu0de50d42015-09-09 13:56:41 -04002454 for(int x = 0; x < internal.width; x++)
2455 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002456 byte *srcPix = srcRow + x * internal.bytes;
Alexis Hetu0de50d42015-09-09 13:56:41 -04002457 for(int i = 0; i < 3; i++)
2458 {
2459 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2460 }
2461 }
2462 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002463 internal.unlockRect();
Nicolas Capens22658242014-11-29 00:31:41 -05002464 }
2465 }
John Bauman89401822014-05-06 15:04:28 -04002466
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002467 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned)
Alexis Hetu460e41f2015-09-01 10:58:37 -04002468 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002469 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002470
Alexis Hetuf46493f2017-12-18 15:32:26 -05002471 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
2472 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
Alexis Hetu0de50d42015-09-09 13:56:41 -04002473 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002474 external.unlockRect();
Alexis Hetu0de50d42015-09-09 13:56:41 -04002475
Alexis Hetuf46493f2017-12-18 15:32:26 -05002476 // FIXME: We convert EAC data to float, until signed short internal formats are supported
2477 // This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats
2478 const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f));
2479 for(int y = 0; y < internal.height; y++)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002480 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002481 byte* srcRow = src + y * internal.pitchB;
2482 for(int x = internal.width - 1; x >= 0; x--)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002483 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002484 int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes);
2485 float* dstPix = reinterpret_cast<float*>(srcPix);
2486 for(int c = nbChannels - 1; c >= 0; c--)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002487 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002488 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
Alexis Hetu0de50d42015-09-09 13:56:41 -04002489 }
2490 }
2491 }
Alexis Hetuf46493f2017-12-18 15:32:26 -05002492
2493 internal.unlockRect();
Alexis Hetu460e41f2015-09-01 10:58:37 -04002494 }
2495
Nicolas Capens419a5802018-05-08 17:20:50 -04002496 size_t Surface::size(int width, int height, int depth, int border, int samples, Format format)
John Bauman89401822014-05-06 15:04:28 -04002497 {
Nicolas Capens607771b2018-05-08 17:20:50 -04002498 samples = max(1, samples);
2499
John Bauman89401822014-05-06 15:04:28 -04002500 switch(format)
2501 {
Nicolas Capens419a5802018-05-08 17:20:50 -04002502 default:
Nicolas Capens607771b2018-05-08 17:20:50 -04002503 {
2504 uint64_t size = (uint64_t)sliceB(width, height, border, format, true) * depth * samples;
Nicolas Capens419a5802018-05-08 17:20:50 -04002505
Nicolas Capens6e3a3872019-12-18 10:48:09 -05002506 // We can only sample buffers smaller than 2 GiB, due to signed 32-bit offset calculations.
2507 // Force an out-of-memory if larger, or let the caller report an error.
2508 if(size >= 0x80000000u)
2509 {
2510 return std::numeric_limits<size_t>::max();
2511 }
2512
2513 // Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
Nicolas Capens607771b2018-05-08 17:20:50 -04002514 // and stencil operations also read 8 bytes per four 8-bit stencil values,
2515 // so we have to allocate 4 extra bytes to avoid buffer overruns.
Nicolas Capens6e3a3872019-12-18 10:48:09 -05002516 // TODO(b/145229887): Eliminate if possible, or don't hard-code.
Nicolas Capens5cf1e9a2020-03-30 15:09:16 -04002517 return static_cast<size_t>(size) + 4;
Nicolas Capens607771b2018-05-08 17:20:50 -04002518 }
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002519 case FORMAT_YV12_BT601:
2520 case FORMAT_YV12_BT709:
2521 case FORMAT_YV12_JFIF:
2522 {
Nicolas Capens419a5802018-05-08 17:20:50 -04002523 width += 2 * border;
2524 height += 2 * border;
2525
2526 size_t YStride = align<16>(width);
2527 size_t YSize = YStride * height;
2528 size_t CStride = align<16>(YStride / 2);
2529 size_t CSize = CStride * height / 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002530
2531 return YSize + 2 * CSize;
2532 }
John Bauman89401822014-05-06 15:04:28 -04002533 }
John Bauman89401822014-05-06 15:04:28 -04002534 }
2535
2536 bool Surface::isStencil(Format format)
2537 {
2538 switch(format)
2539 {
2540 case FORMAT_D32:
2541 case FORMAT_D16:
2542 case FORMAT_D24X8:
2543 case FORMAT_D32F:
2544 case FORMAT_D32F_COMPLEMENTARY:
2545 case FORMAT_D32F_LOCKABLE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002546 case FORMAT_D32F_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002547 return false;
2548 case FORMAT_D24S8:
2549 case FORMAT_D24FS8:
2550 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002551 case FORMAT_DF24S8:
2552 case FORMAT_DF16S8:
2553 case FORMAT_D32FS8_TEXTURE:
2554 case FORMAT_D32FS8_SHADOW:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002555 case FORMAT_D32FS8:
2556 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002557 case FORMAT_INTZ:
2558 return true;
2559 default:
2560 return false;
2561 }
2562 }
2563
2564 bool Surface::isDepth(Format format)
2565 {
2566 switch(format)
2567 {
2568 case FORMAT_D32:
2569 case FORMAT_D16:
2570 case FORMAT_D24X8:
2571 case FORMAT_D24S8:
2572 case FORMAT_D24FS8:
2573 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002574 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002575 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002576 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002577 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002578 case FORMAT_DF24S8:
2579 case FORMAT_DF16S8:
2580 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002581 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002582 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002583 case FORMAT_INTZ:
2584 return true;
2585 case FORMAT_S8:
2586 return false;
2587 default:
2588 return false;
2589 }
2590 }
2591
Alexis Hetub9dda642016-10-06 11:25:32 -04002592 bool Surface::hasQuadLayout(Format format)
2593 {
2594 switch(format)
2595 {
2596 case FORMAT_D32:
2597 case FORMAT_D16:
2598 case FORMAT_D24X8:
2599 case FORMAT_D24S8:
2600 case FORMAT_D24FS8:
2601 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002602 case FORMAT_D32FS8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002603 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002604 case FORMAT_D32FS8_COMPLEMENTARY:
Alexis Hetub9dda642016-10-06 11:25:32 -04002605 case FORMAT_DF24S8:
2606 case FORMAT_DF16S8:
2607 case FORMAT_INTZ:
2608 case FORMAT_S8:
2609 case FORMAT_A8G8R8B8Q:
2610 case FORMAT_X8G8R8B8Q:
2611 return true;
2612 case FORMAT_D32F_LOCKABLE:
2613 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002614 case FORMAT_D32F_SHADOW:
Alexis Hetub9dda642016-10-06 11:25:32 -04002615 case FORMAT_D32FS8_SHADOW:
2616 default:
2617 break;
2618 }
2619
2620 return false;
2621 }
2622
John Bauman89401822014-05-06 15:04:28 -04002623 bool Surface::isPalette(Format format)
2624 {
2625 switch(format)
2626 {
2627 case FORMAT_P8:
2628 case FORMAT_A8P8:
2629 return true;
2630 default:
2631 return false;
2632 }
2633 }
2634
2635 bool Surface::isFloatFormat(Format format)
2636 {
2637 switch(format)
2638 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002639 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002640 case FORMAT_R8G8B8:
2641 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002642 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002643 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002644 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002645 case FORMAT_A8R8G8B8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002646 case FORMAT_SRGB8_X8:
2647 case FORMAT_SRGB8_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002648 case FORMAT_A8B8G8R8I:
2649 case FORMAT_R8UI:
2650 case FORMAT_G8R8UI:
2651 case FORMAT_X8B8G8R8UI:
2652 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002653 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002654 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002655 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002656 case FORMAT_A2B10G10R10:
Nicolas Capens5555af42017-12-14 13:14:03 -05002657 case FORMAT_A2B10G10R10UI:
Nicolas Capens975adb72017-12-19 15:34:20 -05002658 case FORMAT_R8_SNORM:
2659 case FORMAT_G8R8_SNORM:
2660 case FORMAT_X8B8G8R8_SNORM:
2661 case FORMAT_A8B8G8R8_SNORM:
Alexis Hetu43577b82015-10-21 15:32:16 -04002662 case FORMAT_R16I:
2663 case FORMAT_R16UI:
2664 case FORMAT_G16R16I:
2665 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002666 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002667 case FORMAT_X16B16G16R16I:
2668 case FORMAT_X16B16G16R16UI:
2669 case FORMAT_A16B16G16R16I:
2670 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002671 case FORMAT_A16B16G16R16:
2672 case FORMAT_V8U8:
2673 case FORMAT_Q8W8V8U8:
2674 case FORMAT_X8L8V8U8:
2675 case FORMAT_V16U16:
2676 case FORMAT_A16W16V16U16:
2677 case FORMAT_Q16W16V16U16:
2678 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002679 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002680 case FORMAT_R8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002681 case FORMAT_S8:
John Bauman89401822014-05-06 15:04:28 -04002682 case FORMAT_L8:
2683 case FORMAT_L16:
2684 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002685 case FORMAT_YV12_BT601:
2686 case FORMAT_YV12_BT709:
2687 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002688 case FORMAT_R32I:
2689 case FORMAT_R32UI:
2690 case FORMAT_G32R32I:
2691 case FORMAT_G32R32UI:
2692 case FORMAT_X32B32G32R32I:
2693 case FORMAT_X32B32G32R32UI:
2694 case FORMAT_A32B32G32R32I:
2695 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002696 return false;
Nicolas Capens400667e2017-03-29 14:40:14 -04002697 case FORMAT_R16F:
2698 case FORMAT_G16R16F:
2699 case FORMAT_B16G16R16F:
Nicolas Capensa6bc61d2017-12-20 11:07:45 -05002700 case FORMAT_X16B16G16R16F:
Nicolas Capens400667e2017-03-29 14:40:14 -04002701 case FORMAT_A16B16G16R16F:
Nicolas Capens67fdd832017-12-21 11:20:54 -05002702 case FORMAT_X16B16G16R16F_UNSIGNED:
John Bauman89401822014-05-06 15:04:28 -04002703 case FORMAT_R32F:
2704 case FORMAT_G32R32F:
Nicolas Capensc018e082016-12-13 10:19:33 -05002705 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002706 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002707 case FORMAT_A32B32G32R32F:
Nicolas Capens67fdd832017-12-21 11:20:54 -05002708 case FORMAT_X32B32G32R32F_UNSIGNED:
John Bauman89401822014-05-06 15:04:28 -04002709 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002710 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002711 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002712 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002713 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002714 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002715 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002716 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002717 case FORMAT_L16F:
2718 case FORMAT_A16L16F:
2719 case FORMAT_L32F:
2720 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002721 return true;
2722 default:
2723 ASSERT(false);
2724 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002725
John Bauman89401822014-05-06 15:04:28 -04002726 return false;
2727 }
2728
2729 bool Surface::isUnsignedComponent(Format format, int component)
2730 {
2731 switch(format)
2732 {
2733 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002734 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002735 case FORMAT_R8G8B8:
2736 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002737 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002738 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002739 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002740 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002741 case FORMAT_SRGB8_X8:
2742 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002743 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002744 case FORMAT_A2B10G10R10:
Nicolas Capens5555af42017-12-14 13:14:03 -05002745 case FORMAT_A2B10G10R10UI:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002746 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002747 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002748 case FORMAT_G16R16UI:
2749 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002750 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002751 case FORMAT_A16B16G16R16UI:
2752 case FORMAT_R32UI:
2753 case FORMAT_G32R32UI:
2754 case FORMAT_X32B32G32R32UI:
2755 case FORMAT_A32B32G32R32UI:
Nicolas Capens67fdd832017-12-21 11:20:54 -05002756 case FORMAT_X32B32G32R32F_UNSIGNED:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002757 case FORMAT_R8UI:
2758 case FORMAT_G8R8UI:
2759 case FORMAT_X8B8G8R8UI:
2760 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002761 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002762 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002763 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002764 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002765 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002766 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002767 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002768 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002769 case FORMAT_A8:
2770 case FORMAT_R8:
2771 case FORMAT_L8:
2772 case FORMAT_L16:
2773 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002774 case FORMAT_YV12_BT601:
2775 case FORMAT_YV12_BT709:
2776 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002777 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002778 case FORMAT_A8B8G8R8I:
2779 case FORMAT_A16B16G16R16I:
2780 case FORMAT_A32B32G32R32I:
Nicolas Capens975adb72017-12-19 15:34:20 -05002781 case FORMAT_A8B8G8R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002782 case FORMAT_Q8W8V8U8:
2783 case FORMAT_Q16W16V16U16:
2784 case FORMAT_A32B32G32R32F:
2785 return false;
2786 case FORMAT_R32F:
2787 case FORMAT_R8I:
2788 case FORMAT_R16I:
2789 case FORMAT_R32I:
Nicolas Capens975adb72017-12-19 15:34:20 -05002790 case FORMAT_R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002791 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002792 case FORMAT_V8U8:
2793 case FORMAT_X8L8V8U8:
2794 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002795 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002796 case FORMAT_G8R8I:
2797 case FORMAT_G16R16I:
2798 case FORMAT_G32R32I:
Nicolas Capens975adb72017-12-19 15:34:20 -05002799 case FORMAT_G8R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002800 return component >= 2;
2801 case FORMAT_A16W16V16U16:
Nicolas Capens2e363b02016-12-14 10:32:36 -05002802 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002803 case FORMAT_X32B32G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002804 case FORMAT_X8B8G8R8I:
2805 case FORMAT_X16B16G16R16I:
2806 case FORMAT_X32B32G32R32I:
Nicolas Capens975adb72017-12-19 15:34:20 -05002807 case FORMAT_X8B8G8R8_SNORM:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002808 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002809 default:
2810 ASSERT(false);
2811 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002812
John Bauman89401822014-05-06 15:04:28 -04002813 return false;
2814 }
2815
2816 bool Surface::isSRGBreadable(Format format)
2817 {
2818 // Keep in sync with Capabilities::isSRGBreadable
2819 switch(format)
2820 {
2821 case FORMAT_L8:
2822 case FORMAT_A8L8:
2823 case FORMAT_R8G8B8:
2824 case FORMAT_A8R8G8B8:
2825 case FORMAT_X8R8G8B8:
2826 case FORMAT_A8B8G8R8:
2827 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002828 case FORMAT_SRGB8_X8:
2829 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002830 case FORMAT_R5G6B5:
2831 case FORMAT_X1R5G5B5:
2832 case FORMAT_A1R5G5B5:
2833 case FORMAT_A4R4G4B4:
John Bauman89401822014-05-06 15:04:28 -04002834 case FORMAT_DXT1:
2835 case FORMAT_DXT3:
2836 case FORMAT_DXT5:
2837 case FORMAT_ATI1:
2838 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002839 return true;
2840 default:
2841 return false;
2842 }
John Bauman89401822014-05-06 15:04:28 -04002843 }
2844
2845 bool Surface::isSRGBwritable(Format format)
2846 {
2847 // Keep in sync with Capabilities::isSRGBwritable
2848 switch(format)
2849 {
2850 case FORMAT_NULL:
2851 case FORMAT_A8R8G8B8:
2852 case FORMAT_X8R8G8B8:
2853 case FORMAT_A8B8G8R8:
2854 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002855 case FORMAT_SRGB8_X8:
2856 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002857 case FORMAT_R5G6B5:
2858 return true;
2859 default:
2860 return false;
2861 }
2862 }
2863
Nicolas Capens5555af42017-12-14 13:14:03 -05002864 bool Surface::isSRGBformat(Format format)
2865 {
2866 switch(format)
2867 {
2868 case FORMAT_SRGB8_X8:
2869 case FORMAT_SRGB8_A8:
2870 return true;
2871 default:
2872 return false;
2873 }
2874 }
2875
John Bauman89401822014-05-06 15:04:28 -04002876 bool Surface::isCompressed(Format format)
2877 {
2878 switch(format)
2879 {
John Bauman89401822014-05-06 15:04:28 -04002880 case FORMAT_DXT1:
2881 case FORMAT_DXT3:
2882 case FORMAT_DXT5:
2883 case FORMAT_ATI1:
2884 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05002885 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002886 case FORMAT_R11_EAC:
2887 case FORMAT_SIGNED_R11_EAC:
2888 case FORMAT_RG11_EAC:
2889 case FORMAT_SIGNED_RG11_EAC:
2890 case FORMAT_RGB8_ETC2:
2891 case FORMAT_SRGB8_ETC2:
2892 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2893 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2894 case FORMAT_RGBA8_ETC2_EAC:
2895 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
John Bauman89401822014-05-06 15:04:28 -04002896 return true;
John Bauman89401822014-05-06 15:04:28 -04002897 default:
2898 return false;
2899 }
2900 }
2901
Nicolas Capens492887a2017-03-27 14:50:51 -04002902 bool Surface::isSignedNonNormalizedInteger(Format format)
Alexis Hetu43577b82015-10-21 15:32:16 -04002903 {
2904 switch(format)
2905 {
2906 case FORMAT_A8B8G8R8I:
2907 case FORMAT_X8B8G8R8I:
2908 case FORMAT_G8R8I:
2909 case FORMAT_R8I:
Alexis Hetu43577b82015-10-21 15:32:16 -04002910 case FORMAT_A16B16G16R16I:
2911 case FORMAT_X16B16G16R16I:
2912 case FORMAT_G16R16I:
2913 case FORMAT_R16I:
Alexis Hetu91dd1c42017-07-18 13:03:42 -04002914 case FORMAT_A32B32G32R32I:
2915 case FORMAT_X32B32G32R32I:
2916 case FORMAT_G32R32I:
2917 case FORMAT_R32I:
Nicolas Capens492887a2017-03-27 14:50:51 -04002918 return true;
2919 default:
2920 return false;
2921 }
2922 }
2923
2924 bool Surface::isUnsignedNonNormalizedInteger(Format format)
2925 {
2926 switch(format)
2927 {
Alexis Hetu91dd1c42017-07-18 13:03:42 -04002928 case FORMAT_A8B8G8R8UI:
2929 case FORMAT_X8B8G8R8UI:
2930 case FORMAT_G8R8UI:
2931 case FORMAT_R8UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04002932 case FORMAT_A16B16G16R16UI:
2933 case FORMAT_X16B16G16R16UI:
2934 case FORMAT_G16R16UI:
2935 case FORMAT_R16UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04002936 case FORMAT_A32B32G32R32UI:
2937 case FORMAT_X32B32G32R32UI:
2938 case FORMAT_G32R32UI:
2939 case FORMAT_R32UI:
2940 return true;
2941 default:
2942 return false;
2943 }
2944 }
2945
Nicolas Capens492887a2017-03-27 14:50:51 -04002946 bool Surface::isNonNormalizedInteger(Format format)
2947 {
2948 return isSignedNonNormalizedInteger(format) ||
2949 isUnsignedNonNormalizedInteger(format);
2950 }
2951
2952 bool Surface::isNormalizedInteger(Format format)
2953 {
2954 return !isFloatFormat(format) &&
2955 !isNonNormalizedInteger(format) &&
2956 !isCompressed(format) &&
2957 !isDepth(format) &&
2958 !isStencil(format);
2959 }
2960
John Bauman89401822014-05-06 15:04:28 -04002961 int Surface::componentCount(Format format)
2962 {
2963 switch(format)
2964 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002965 case FORMAT_R5G6B5: return 3;
2966 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002967 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002968 case FORMAT_X8B8G8R8: return 3;
2969 case FORMAT_A8R8G8B8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04002970 case FORMAT_SRGB8_X8: return 3;
2971 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002972 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002973 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002974 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002975 case FORMAT_G8R8: return 2;
Nicolas Capens975adb72017-12-19 15:34:20 -05002976 case FORMAT_R8_SNORM: return 1;
2977 case FORMAT_G8R8_SNORM: return 2;
2978 case FORMAT_X8B8G8R8_SNORM:return 3;
2979 case FORMAT_A8B8G8R8_SNORM:return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002980 case FORMAT_R8UI: return 1;
2981 case FORMAT_G8R8UI: return 2;
2982 case FORMAT_X8B8G8R8UI: return 3;
2983 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05002984 case FORMAT_A2B10G10R10: return 4;
Nicolas Capens5555af42017-12-14 13:14:03 -05002985 case FORMAT_A2B10G10R10UI: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002986 case FORMAT_G16R16I: return 2;
2987 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002988 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002989 case FORMAT_G32R32I: return 2;
2990 case FORMAT_G32R32UI: return 2;
2991 case FORMAT_X16B16G16R16I: return 3;
2992 case FORMAT_X16B16G16R16UI: return 3;
2993 case FORMAT_A16B16G16R16I: return 4;
2994 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002995 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002996 case FORMAT_X32B32G32R32I: return 3;
2997 case FORMAT_X32B32G32R32UI: return 3;
2998 case FORMAT_A32B32G32R32I: return 4;
2999 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003000 case FORMAT_V8U8: return 2;
3001 case FORMAT_Q8W8V8U8: return 4;
3002 case FORMAT_X8L8V8U8: return 3;
3003 case FORMAT_V16U16: return 2;
3004 case FORMAT_A16W16V16U16: return 4;
3005 case FORMAT_Q16W16V16U16: return 4;
3006 case FORMAT_R32F: return 1;
3007 case FORMAT_G32R32F: return 2;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003008 case FORMAT_X32B32G32R32F: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003009 case FORMAT_A32B32G32R32F: return 4;
Nicolas Capens67fdd832017-12-21 11:20:54 -05003010 case FORMAT_X32B32G32R32F_UNSIGNED: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003011 case FORMAT_D32F: return 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003012 case FORMAT_D32FS8: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003013 case FORMAT_D32F_LOCKABLE: return 1;
3014 case FORMAT_D32FS8_TEXTURE: return 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003015 case FORMAT_D32F_SHADOW: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003016 case FORMAT_D32FS8_SHADOW: return 1;
3017 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003018 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003019 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003020 case FORMAT_R16I: return 1;
3021 case FORMAT_R16UI: return 1;
3022 case FORMAT_R32I: return 1;
3023 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003024 case FORMAT_L8: return 1;
3025 case FORMAT_L16: return 1;
3026 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003027 case FORMAT_YV12_BT601: return 3;
3028 case FORMAT_YV12_BT709: return 3;
3029 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04003030 default:
3031 ASSERT(false);
3032 }
3033
3034 return 1;
3035 }
3036
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003037 void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format)
John Bauman89401822014-05-06 15:04:28 -04003038 {
Nicolas Capens419a5802018-05-08 17:20:50 -04003039 return allocate(size(width, height, depth, border, samples, format));
John Bauman89401822014-05-06 15:04:28 -04003040 }
3041
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003042 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003043 {
3044 while((size_t)buffer & 0x1 && bytes >= 1)
3045 {
3046 *(char*)buffer = (char)pattern;
3047 (char*&)buffer += 1;
3048 bytes -= 1;
3049 }
3050
3051 while((size_t)buffer & 0x3 && bytes >= 2)
3052 {
3053 *(short*)buffer = (short)pattern;
3054 (short*&)buffer += 1;
3055 bytes -= 2;
3056 }
3057
Nicolas Capens47dc8672017-04-25 12:54:39 -04003058 #if defined(__i386__) || defined(__x86_64__)
3059 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04003060 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003061 while((size_t)buffer & 0xF && bytes >= 4)
3062 {
3063 *(int*)buffer = pattern;
3064 (int*&)buffer += 1;
3065 bytes -= 4;
3066 }
3067
3068 __m128 quad = _mm_set_ps1((float&)pattern);
3069
3070 float *pointer = (float*)buffer;
3071 int qxwords = bytes / 64;
3072 bytes -= qxwords * 64;
3073
3074 while(qxwords--)
3075 {
3076 _mm_stream_ps(pointer + 0, quad);
3077 _mm_stream_ps(pointer + 4, quad);
3078 _mm_stream_ps(pointer + 8, quad);
3079 _mm_stream_ps(pointer + 12, quad);
3080
3081 pointer += 16;
3082 }
3083
3084 buffer = pointer;
John Bauman89401822014-05-06 15:04:28 -04003085 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003086 #endif
John Bauman89401822014-05-06 15:04:28 -04003087
3088 while(bytes >= 4)
3089 {
3090 *(int*)buffer = (int)pattern;
3091 (int*&)buffer += 1;
3092 bytes -= 4;
3093 }
3094
3095 while(bytes >= 2)
3096 {
3097 *(short*)buffer = (short)pattern;
3098 (short*&)buffer += 1;
3099 bytes -= 2;
3100 }
3101
3102 while(bytes >= 1)
3103 {
3104 *(char*)buffer = (char)pattern;
3105 (char*&)buffer += 1;
3106 bytes -= 1;
3107 }
3108 }
3109
Nicolas Capensbf7a8142017-05-19 10:57:28 -04003110 void Surface::sync()
3111 {
3112 resource->lock(EXCLUSIVE);
3113 resource->unlock();
3114 }
3115
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003116 bool Surface::isEntire(const Rect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003117 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003118 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3119 }
John Bauman89401822014-05-06 15:04:28 -04003120
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003121 Rect Surface::getRect() const
Alexis Hetu75b650f2015-11-19 17:40:15 -05003122 {
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003123 return Rect(0, 0, internal.width, internal.height);
John Bauman89401822014-05-06 15:04:28 -04003124 }
3125
Nicolas Capensc39901e2016-03-21 16:37:44 -04003126 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003127 {
Nicolas Capens38488762018-04-12 16:31:32 -04003128 if(width == 0 || height == 0)
3129 {
3130 return;
3131 }
3132
3133 if(internal.format == FORMAT_NULL)
3134 {
3135 return;
3136 }
Alexis Hetu358a1442015-12-03 14:23:10 -05003137
John Bauman89401822014-05-06 15:04:28 -04003138 // Not overlapping
3139 if(x0 > internal.width) return;
3140 if(y0 > internal.height) return;
3141 if(x0 + width < 0) return;
3142 if(y0 + height < 0) return;
3143
3144 // Clip against dimensions
3145 if(x0 < 0) {width += x0; x0 = 0;}
3146 if(x0 + width > internal.width) width = internal.width - x0;
3147 if(y0 < 0) {height += y0; y0 = 0;}
3148 if(y0 + height > internal.height) height = internal.height - y0;
3149
3150 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3151 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3152
John Bauman89401822014-05-06 15:04:28 -04003153 int x1 = x0 + width;
3154 int y1 = y0 + height;
3155
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003156 if(!hasQuadLayout(internal.format))
John Bauman89401822014-05-06 15:04:28 -04003157 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003158 float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC);
John Bauman89401822014-05-06 15:04:28 -04003159
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003160 for(int z = 0; z < internal.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003161 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003162 float *row = target;
John Bauman89401822014-05-06 15:04:28 -04003163 for(int y = y0; y < y1; y++)
3164 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003165 memfill4(row, (int&)depth, width * sizeof(float));
3166 row += internal.pitchP;
John Bauman89401822014-05-06 15:04:28 -04003167 }
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003168 target += internal.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003169 }
3170
3171 unlockInternal();
3172 }
3173 else // Quad layout
3174 {
3175 if(complementaryDepthBuffer)
3176 {
3177 depth = 1 - depth;
3178 }
3179
3180 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3181
Alexis Hetu358a1442015-12-03 14:23:10 -05003182 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3183 int oddX1 = (x1 & ~1) * 2;
3184 int evenX0 = ((x0 + 1) & ~1) * 2;
3185 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3186
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003187 for(int z = 0; z < internal.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003188 {
3189 for(int y = y0; y < y1; y++)
3190 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003191 float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003192
John Bauman89401822014-05-06 15:04:28 -04003193 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3194 {
3195 if((x0 & 1) != 0)
3196 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003197 target[oddX0 + 0] = depth;
3198 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003199 }
3200
Alexis Hetu358a1442015-12-03 14:23:10 -05003201 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003202 // {
3203 // target[x2 + 0] = depth;
3204 // target[x2 + 1] = depth;
3205 // target[x2 + 2] = depth;
3206 // target[x2 + 3] = depth;
3207 // }
3208
3209 // __asm
3210 // {
3211 // movss xmm0, depth
3212 // shufps xmm0, xmm0, 0x00
3213 //
3214 // mov eax, x0
3215 // add eax, 1
3216 // and eax, 0xFFFFFFFE
3217 // cmp eax, x1
3218 // jge qEnd
3219 //
3220 // mov edi, target
3221 //
3222 // qLoop:
3223 // movntps [edi+8*eax], xmm0
3224 //
3225 // add eax, 2
3226 // cmp eax, x1
3227 // jl qLoop
3228 // qEnd:
3229 // }
3230
Alexis Hetu358a1442015-12-03 14:23:10 -05003231 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003232
3233 if((x1 & 1) != 0)
3234 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003235 target[oddX1 + 0] = depth;
3236 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003237 }
3238
3239 y++;
3240 }
3241 else
3242 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003243 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003244 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003245 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003246 }
3247 }
3248 }
3249
3250 buffer += internal.sliceP;
3251 }
3252
3253 unlockInternal();
3254 }
3255 }
3256
Nicolas Capensc39901e2016-03-21 16:37:44 -04003257 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003258 {
Nicolas Capens38488762018-04-12 16:31:32 -04003259 if(mask == 0 || width == 0 || height == 0)
3260 {
3261 return;
3262 }
3263
3264 if(stencil.format == FORMAT_NULL)
3265 {
3266 return;
3267 }
Alexis Hetu2b052f82015-11-25 13:57:28 -05003268
John Bauman89401822014-05-06 15:04:28 -04003269 // Not overlapping
3270 if(x0 > internal.width) return;
3271 if(y0 > internal.height) return;
3272 if(x0 + width < 0) return;
3273 if(y0 + height < 0) return;
3274
3275 // Clip against dimensions
3276 if(x0 < 0) {width += x0; x0 = 0;}
3277 if(x0 + width > internal.width) width = internal.width - x0;
3278 if(y0 < 0) {height += y0; y0 = 0;}
3279 if(y0 + height > internal.height) height = internal.height - y0;
3280
John Bauman89401822014-05-06 15:04:28 -04003281 int x1 = x0 + width;
3282 int y1 = y0 + height;
3283
Alexis Hetu358a1442015-12-03 14:23:10 -05003284 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3285 int oddX1 = (x1 & ~1) * 2;
3286 int evenX0 = ((x0 + 1) & ~1) * 2;
3287 int evenBytes = oddX1 - evenX0;
3288
John Bauman89401822014-05-06 15:04:28 -04003289 unsigned char maskedS = s & mask;
3290 unsigned char invMask = ~mask;
3291 unsigned int fill = maskedS;
Tom Anderson69bc6e82017-03-20 11:54:29 -07003292 fill = fill | (fill << 8) | (fill << 16) | (fill << 24);
John Bauman89401822014-05-06 15:04:28 -04003293
Alexis Hetua52dfbd2016-10-05 17:03:30 -04003294 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003295
3296 // Stencil buffers are assumed to use quad layout
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003297 for(int z = 0; z < stencil.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003298 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003299 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003300 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003301 char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003302
3303 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003304 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003305 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003306 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003307 target[oddX0 + 0] = fill;
3308 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003309 }
3310
Alexis Hetu358a1442015-12-03 14:23:10 -05003311 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003312
3313 if((x1 & 1) != 0)
3314 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003315 target[oddX1 + 0] = fill;
3316 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003317 }
3318
3319 y++;
3320 }
3321 else
3322 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003323 for(int x = x0; x < x1; x++)
Alexis Hetu2b052f82015-11-25 13:57:28 -05003324 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003325 int i = (x & ~1) * 2 + (x & 1);
Alexis Hetu358a1442015-12-03 14:23:10 -05003326 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003327 }
John Bauman89401822014-05-06 15:04:28 -04003328 }
3329 }
3330
Alexis Hetu2b052f82015-11-25 13:57:28 -05003331 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003332 }
John Bauman89401822014-05-06 15:04:28 -04003333
Alexis Hetu2b052f82015-11-25 13:57:28 -05003334 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003335 }
3336
3337 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3338 {
3339 unsigned char *row;
3340 Buffer *buffer;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003341
John Bauman89401822014-05-06 15:04:28 -04003342 if(internal.dirty)
3343 {
3344 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3345 buffer = &internal;
3346 }
3347 else
3348 {
3349 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3350 buffer = &external;
3351 }
3352
3353 if(buffer->bytes <= 4)
3354 {
3355 int c;
3356 buffer->write(&c, color);
3357
3358 if(buffer->bytes <= 1) c = (c << 8) | c;
3359 if(buffer->bytes <= 2) c = (c << 16) | c;
3360
3361 for(int y = 0; y < height; y++)
3362 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003363 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003364
3365 row += buffer->pitchB;
3366 }
3367 }
3368 else // Generic
3369 {
3370 for(int y = 0; y < height; y++)
3371 {
3372 unsigned char *element = row;
3373
3374 for(int x = 0; x < width; x++)
3375 {
3376 buffer->write(element, color);
3377
3378 element += buffer->bytes;
3379 }
3380
3381 row += buffer->pitchB;
3382 }
3383 }
3384
3385 if(buffer == &internal)
3386 {
3387 unlockInternal();
3388 }
3389 else
3390 {
3391 unlockExternal();
3392 }
3393 }
3394
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003395 void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003396 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003397 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003398
Alexis Hetu43577b82015-10-21 15:32:16 -04003399 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003400
Alexis Hetu43577b82015-10-21 15:32:16 -04003401 if(!filter)
3402 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003403 color = source->internal.read((int)srcX, (int)srcY, 0);
Alexis Hetu43577b82015-10-21 15:32:16 -04003404 }
3405 else // Bilinear filtering
3406 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003407 color = source->internal.sample(srcX, srcY, 0);
Alexis Hetu43577b82015-10-21 15:32:16 -04003408 }
John Bauman89401822014-05-06 15:04:28 -04003409
3410 internal.write(x, y, color);
3411 }
3412
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003413 void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
Alexis Hetu43577b82015-10-21 15:32:16 -04003414 {
3415 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3416
3417 sw::Color<float> color;
3418
3419 if(!filter)
3420 {
3421 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3422 }
3423 else // Bilinear filtering
3424 {
3425 color = source->internal.sample(srcX, srcY, srcZ);
3426 }
3427
3428 internal.write(x, y, z, color);
3429 }
3430
Alexis Hetua76a1bf2016-11-29 17:17:26 -05003431 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge)
3432 {
3433 Surface *dst = this;
3434
3435 // Figure out if the edges to be copied in reverse order respectively from one another
3436 // The copy should be reversed whenever the same edges are contiguous or if we're
3437 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
3438 //
3439 // | +y |
3440 // | -x | +z | +x | -z |
3441 // | -y |
3442
3443 bool reverse = (srcEdge == dstEdge) ||
3444 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
3445 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
3446 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
3447 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
3448
3449 int srcBytes = src->bytes(src->Surface::getInternalFormat());
3450 int srcPitch = src->getInternalPitchB();
3451 int dstBytes = dst->bytes(dst->Surface::getInternalFormat());
3452 int dstPitch = dst->getInternalPitchB();
3453
3454 int srcW = src->getWidth();
3455 int srcH = src->getHeight();
3456 int dstW = dst->getWidth();
3457 int dstH = dst->getHeight();
3458
3459 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes);
3460
3461 // Src is expressed in the regular [0, width-1], [0, height-1] space
3462 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch;
3463 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0));
3464
3465 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space
3466 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1);
3467 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta);
3468
3469 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart;
3470 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart;
3471
3472 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta)
3473 {
3474 memcpy(dstBuf, srcBuf, srcBytes);
3475 }
3476
3477 if(dstEdge == LEFT || dstEdge == RIGHT)
3478 {
3479 // TOP and BOTTOM are already set, let's average out the corners
3480 int x0 = (dstEdge == RIGHT) ? dstW : -1;
3481 int y0 = -1;
3482 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0;
3483 int y1 = 0;
3484 dst->computeCubeCorner(x0, y0, x1, y1);
3485 y0 = dstH;
3486 y1 = dstH - 1;
3487 dst->computeCubeCorner(x0, y0, x1, y1);
3488 }
3489
3490 src->unlockInternal();
3491 dst->unlockInternal();
3492 }
3493
3494 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1)
3495 {
3496 ASSERT(internal.lock != LOCK_UNLOCKED);
3497
3498 sw::Color<float> color = internal.read(x0, y1);
3499 color += internal.read(x1, y0);
3500 color += internal.read(x1, y1);
3501 color *= (1.0f / 3.0f);
3502
3503 internal.write(x0, y0, color);
3504 }
3505
John Bauman89401822014-05-06 15:04:28 -04003506 bool Surface::hasStencil() const
3507 {
3508 return isStencil(external.format);
3509 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04003510
John Bauman89401822014-05-06 15:04:28 -04003511 bool Surface::hasDepth() const
3512 {
3513 return isDepth(external.format);
3514 }
3515
3516 bool Surface::hasPalette() const
3517 {
3518 return isPalette(external.format);
3519 }
3520
3521 bool Surface::isRenderTarget() const
3522 {
3523 return renderTarget;
3524 }
3525
Nicolas Capens73e18c12017-11-28 13:31:35 -05003526 bool Surface::hasDirtyContents() const
John Bauman89401822014-05-06 15:04:28 -04003527 {
Nicolas Capens73e18c12017-11-28 13:31:35 -05003528 return dirtyContents;
John Bauman89401822014-05-06 15:04:28 -04003529 }
3530
Nicolas Capens73e18c12017-11-28 13:31:35 -05003531 void Surface::markContentsClean()
John Bauman89401822014-05-06 15:04:28 -04003532 {
Nicolas Capens73e18c12017-11-28 13:31:35 -05003533 dirtyContents = false;
John Bauman89401822014-05-06 15:04:28 -04003534 }
3535
3536 Resource *Surface::getResource()
3537 {
3538 return resource;
3539 }
3540
Nicolas Capens539468c2018-06-28 11:28:40 -04003541 bool Surface::identicalBuffers() const
John Bauman89401822014-05-06 15:04:28 -04003542 {
John Bauman66b8ab22014-05-06 15:57:45 -04003543 return external.format == internal.format &&
3544 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003545 external.height == internal.height &&
3546 external.depth == internal.depth &&
3547 external.pitchB == internal.pitchB &&
Alexis Hetu9c6d5222016-11-29 17:02:14 -05003548 external.sliceB == internal.sliceB &&
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003549 external.border == internal.border &&
3550 external.samples == internal.samples;
John Bauman89401822014-05-06 15:04:28 -04003551 }
3552
3553 Format Surface::selectInternalFormat(Format format) const
3554 {
3555 switch(format)
3556 {
3557 case FORMAT_NULL:
3558 return FORMAT_NULL;
3559 case FORMAT_P8:
3560 case FORMAT_A8P8:
3561 case FORMAT_A4R4G4B4:
3562 case FORMAT_A1R5G5B5:
3563 case FORMAT_A8R3G3B2:
3564 return FORMAT_A8R8G8B8;
3565 case FORMAT_A8:
3566 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003567 case FORMAT_R8I:
3568 return FORMAT_R8I;
3569 case FORMAT_R8UI:
3570 return FORMAT_R8UI;
Nicolas Capens975adb72017-12-19 15:34:20 -05003571 case FORMAT_R8_SNORM:
3572 return FORMAT_R8_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003573 case FORMAT_R8:
3574 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003575 case FORMAT_R16I:
3576 return FORMAT_R16I;
3577 case FORMAT_R16UI:
3578 return FORMAT_R16UI;
3579 case FORMAT_R32I:
3580 return FORMAT_R32I;
3581 case FORMAT_R32UI:
3582 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003583 case FORMAT_X16B16G16R16I:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003584 return FORMAT_X16B16G16R16I;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003585 case FORMAT_A16B16G16R16I:
3586 return FORMAT_A16B16G16R16I;
3587 case FORMAT_X16B16G16R16UI:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003588 return FORMAT_X16B16G16R16UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003589 case FORMAT_A16B16G16R16UI:
3590 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003591 case FORMAT_A2R10G10B10:
3592 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003593 case FORMAT_A16B16G16R16:
3594 return FORMAT_A16B16G16R16;
Nicolas Capens5555af42017-12-14 13:14:03 -05003595 case FORMAT_A2B10G10R10UI:
3596 return FORMAT_A16B16G16R16UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003597 case FORMAT_X32B32G32R32I:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003598 return FORMAT_X32B32G32R32I;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003599 case FORMAT_A32B32G32R32I:
3600 return FORMAT_A32B32G32R32I;
3601 case FORMAT_X32B32G32R32UI:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003602 return FORMAT_X32B32G32R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003603 case FORMAT_A32B32G32R32UI:
3604 return FORMAT_A32B32G32R32UI;
3605 case FORMAT_G8R8I:
3606 return FORMAT_G8R8I;
3607 case FORMAT_G8R8UI:
3608 return FORMAT_G8R8UI;
Nicolas Capens975adb72017-12-19 15:34:20 -05003609 case FORMAT_G8R8_SNORM:
3610 return FORMAT_G8R8_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003611 case FORMAT_G8R8:
3612 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003613 case FORMAT_G16R16I:
3614 return FORMAT_G16R16I;
3615 case FORMAT_G16R16UI:
3616 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003617 case FORMAT_G16R16:
3618 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003619 case FORMAT_G32R32I:
3620 return FORMAT_G32R32I;
3621 case FORMAT_G32R32UI:
3622 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003623 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003624 if(lockable || !quadLayoutEnabled)
3625 {
3626 return FORMAT_A8R8G8B8;
3627 }
3628 else
3629 {
3630 return FORMAT_A8G8R8B8Q;
3631 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003632 case FORMAT_A8B8G8R8I:
3633 return FORMAT_A8B8G8R8I;
3634 case FORMAT_A8B8G8R8UI:
3635 return FORMAT_A8B8G8R8UI;
Nicolas Capens975adb72017-12-19 15:34:20 -05003636 case FORMAT_A8B8G8R8_SNORM:
3637 return FORMAT_A8B8G8R8_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003638 case FORMAT_R5G5B5A1:
3639 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003640 case FORMAT_A8B8G8R8:
3641 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003642 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003643 return FORMAT_R5G6B5;
3644 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003645 case FORMAT_R8G8B8:
3646 case FORMAT_X4R4G4B4:
3647 case FORMAT_X1R5G5B5:
3648 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003649 if(lockable || !quadLayoutEnabled)
3650 {
3651 return FORMAT_X8R8G8B8;
3652 }
3653 else
3654 {
3655 return FORMAT_X8G8R8B8Q;
3656 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003657 case FORMAT_X8B8G8R8I:
3658 return FORMAT_X8B8G8R8I;
3659 case FORMAT_X8B8G8R8UI:
3660 return FORMAT_X8B8G8R8UI;
Nicolas Capens975adb72017-12-19 15:34:20 -05003661 case FORMAT_X8B8G8R8_SNORM:
3662 return FORMAT_X8B8G8R8_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003663 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003664 case FORMAT_X8B8G8R8:
3665 return FORMAT_X8B8G8R8;
Alexis Hetu049a1872016-04-25 16:59:58 -04003666 case FORMAT_SRGB8_X8:
3667 return FORMAT_SRGB8_X8;
3668 case FORMAT_SRGB8_A8:
3669 return FORMAT_SRGB8_A8;
John Bauman89401822014-05-06 15:04:28 -04003670 // Compressed formats
John Bauman89401822014-05-06 15:04:28 -04003671 case FORMAT_DXT1:
3672 case FORMAT_DXT3:
3673 case FORMAT_DXT5:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003674 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3675 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3676 case FORMAT_RGBA8_ETC2_EAC:
3677 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003678 return FORMAT_A8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003679 case FORMAT_ATI1:
3680 return FORMAT_R8;
Alexis Hetuf46493f2017-12-18 15:32:26 -05003681 case FORMAT_R11_EAC:
Alexis Hetu0de50d42015-09-09 13:56:41 -04003682 case FORMAT_SIGNED_R11_EAC:
3683 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003684 case FORMAT_ATI2:
3685 return FORMAT_G8R8;
Alexis Hetuf46493f2017-12-18 15:32:26 -05003686 case FORMAT_RG11_EAC:
Alexis Hetu0de50d42015-09-09 13:56:41 -04003687 case FORMAT_SIGNED_RG11_EAC:
3688 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003689 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003690 case FORMAT_RGB8_ETC2:
3691 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003692 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003693 // Bumpmap formats
3694 case FORMAT_V8U8: return FORMAT_V8U8;
3695 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3696 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3697 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3698 case FORMAT_V16U16: return FORMAT_V16U16;
3699 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3700 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3701 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003702 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003703 case FORMAT_R16F: return FORMAT_R32F;
3704 case FORMAT_G16R16F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003705 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
Nicolas Capensa6bc61d2017-12-20 11:07:45 -05003706 case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003707 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens67fdd832017-12-21 11:20:54 -05003708 case FORMAT_X16B16G16R16F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED;
Nicolas Capens80594422015-06-09 16:42:56 -04003709 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003710 case FORMAT_R32F: return FORMAT_R32F;
3711 case FORMAT_G32R32F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003712 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3713 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003714 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
Nicolas Capens67fdd832017-12-21 11:20:54 -05003715 case FORMAT_X32B32G32R32F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED;
John Bauman89401822014-05-06 15:04:28 -04003716 // Luminance formats
3717 case FORMAT_L8: return FORMAT_L8;
3718 case FORMAT_A4L4: return FORMAT_A8L8;
3719 case FORMAT_L16: return FORMAT_L16;
3720 case FORMAT_A8L8: return FORMAT_A8L8;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003721 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003722 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003723 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003724 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003725 // Depth/stencil formats
3726 case FORMAT_D16:
3727 case FORMAT_D32:
3728 case FORMAT_D24X8:
John Bauman89401822014-05-06 15:04:28 -04003729 if(hasParent) // Texture
3730 {
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003731 return FORMAT_D32F_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003732 }
3733 else if(complementaryDepthBuffer)
3734 {
3735 return FORMAT_D32F_COMPLEMENTARY;
3736 }
3737 else
3738 {
3739 return FORMAT_D32F;
3740 }
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003741 case FORMAT_D24S8:
3742 case FORMAT_D24FS8:
3743 if(hasParent) // Texture
3744 {
3745 return FORMAT_D32FS8_SHADOW;
3746 }
3747 else if(complementaryDepthBuffer)
3748 {
3749 return FORMAT_D32FS8_COMPLEMENTARY;
3750 }
3751 else
3752 {
3753 return FORMAT_D32FS8;
3754 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003755 case FORMAT_D32F: return FORMAT_D32F;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003756 case FORMAT_D32FS8: return FORMAT_D32FS8;
John Bauman66b8ab22014-05-06 15:57:45 -04003757 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3758 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3759 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3760 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3761 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003762 case FORMAT_S8: return FORMAT_S8;
3763 // YUV formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003764 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3765 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3766 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003767 default:
3768 ASSERT(false);
3769 }
3770
3771 return FORMAT_NULL;
3772 }
3773
3774 void Surface::setTexturePalette(unsigned int *palette)
3775 {
3776 Surface::palette = palette;
3777 Surface::paletteID++;
3778 }
3779
3780 void Surface::resolve()
3781 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003782 if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
John Bauman89401822014-05-06 15:04:28 -04003783 {
3784 return;
3785 }
3786
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003787 ASSERT(internal.depth == 1); // Unimplemented
3788
John Bauman89401822014-05-06 15:04:28 -04003789 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3790
John Bauman89401822014-05-06 15:04:28 -04003791 int width = internal.width;
3792 int height = internal.height;
3793 int pitch = internal.pitchB;
3794 int slice = internal.sliceB;
3795
3796 unsigned char *source0 = (unsigned char*)source;
3797 unsigned char *source1 = source0 + slice;
3798 unsigned char *source2 = source1 + slice;
3799 unsigned char *source3 = source2 + slice;
3800 unsigned char *source4 = source3 + slice;
3801 unsigned char *source5 = source4 + slice;
3802 unsigned char *source6 = source5 + slice;
3803 unsigned char *source7 = source6 + slice;
3804 unsigned char *source8 = source7 + slice;
3805 unsigned char *source9 = source8 + slice;
3806 unsigned char *sourceA = source9 + slice;
3807 unsigned char *sourceB = sourceA + slice;
3808 unsigned char *sourceC = sourceB + slice;
3809 unsigned char *sourceD = sourceC + slice;
3810 unsigned char *sourceE = sourceD + slice;
3811 unsigned char *sourceF = sourceE + slice;
3812
Alexis Hetu049a1872016-04-25 16:59:58 -04003813 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
3814 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
3815 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
John Bauman89401822014-05-06 15:04:28 -04003816 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003817 #if defined(__i386__) || defined(__x86_64__)
3818 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04003819 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003820 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04003821 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003822 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003823 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003824 for(int x = 0; x < width; x += 4)
3825 {
3826 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3827 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003828
Nicolas Capens47dc8672017-04-25 12:54:39 -04003829 c0 = _mm_avg_epu8(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04003830
Nicolas Capens47dc8672017-04-25 12:54:39 -04003831 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3832 }
3833
3834 source0 += pitch;
3835 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003836 }
John Bauman89401822014-05-06 15:04:28 -04003837 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003838 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04003839 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003840 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003841 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003842 for(int x = 0; x < width; x += 4)
3843 {
3844 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3845 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3846 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3847 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003848
Nicolas Capens47dc8672017-04-25 12:54:39 -04003849 c0 = _mm_avg_epu8(c0, c1);
3850 c2 = _mm_avg_epu8(c2, c3);
3851 c0 = _mm_avg_epu8(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04003852
Nicolas Capens47dc8672017-04-25 12:54:39 -04003853 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3854 }
3855
3856 source0 += pitch;
3857 source1 += pitch;
3858 source2 += pitch;
3859 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003860 }
John Bauman89401822014-05-06 15:04:28 -04003861 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003862 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04003863 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003864 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003865 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003866 for(int x = 0; x < width; x += 4)
3867 {
3868 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3869 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3870 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3871 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3872 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3873 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3874 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3875 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003876
Nicolas Capens47dc8672017-04-25 12:54:39 -04003877 c0 = _mm_avg_epu8(c0, c1);
3878 c2 = _mm_avg_epu8(c2, c3);
3879 c4 = _mm_avg_epu8(c4, c5);
3880 c6 = _mm_avg_epu8(c6, c7);
3881 c0 = _mm_avg_epu8(c0, c2);
3882 c4 = _mm_avg_epu8(c4, c6);
3883 c0 = _mm_avg_epu8(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04003884
Nicolas Capens47dc8672017-04-25 12:54:39 -04003885 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3886 }
3887
3888 source0 += pitch;
3889 source1 += pitch;
3890 source2 += pitch;
3891 source3 += pitch;
3892 source4 += pitch;
3893 source5 += pitch;
3894 source6 += pitch;
3895 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003896 }
John Bauman89401822014-05-06 15:04:28 -04003897 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003898 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04003899 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003900 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003901 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003902 for(int x = 0; x < width; x += 4)
3903 {
3904 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3905 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3906 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3907 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3908 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3909 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3910 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3911 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3912 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
3913 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
3914 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
3915 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
3916 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
3917 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
3918 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
3919 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04003920
Nicolas Capens47dc8672017-04-25 12:54:39 -04003921 c0 = _mm_avg_epu8(c0, c1);
3922 c2 = _mm_avg_epu8(c2, c3);
3923 c4 = _mm_avg_epu8(c4, c5);
3924 c6 = _mm_avg_epu8(c6, c7);
3925 c8 = _mm_avg_epu8(c8, c9);
3926 cA = _mm_avg_epu8(cA, cB);
3927 cC = _mm_avg_epu8(cC, cD);
3928 cE = _mm_avg_epu8(cE, cF);
3929 c0 = _mm_avg_epu8(c0, c2);
3930 c4 = _mm_avg_epu8(c4, c6);
3931 c8 = _mm_avg_epu8(c8, cA);
3932 cC = _mm_avg_epu8(cC, cE);
3933 c0 = _mm_avg_epu8(c0, c4);
3934 c8 = _mm_avg_epu8(c8, cC);
3935 c0 = _mm_avg_epu8(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04003936
Nicolas Capens47dc8672017-04-25 12:54:39 -04003937 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3938 }
3939
3940 source0 += pitch;
3941 source1 += pitch;
3942 source2 += pitch;
3943 source3 += pitch;
3944 source4 += pitch;
3945 source5 += pitch;
3946 source6 += pitch;
3947 source7 += pitch;
3948 source8 += pitch;
3949 source9 += pitch;
3950 sourceA += pitch;
3951 sourceB += pitch;
3952 sourceC += pitch;
3953 sourceD += pitch;
3954 sourceE += pitch;
3955 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04003956 }
John Bauman89401822014-05-06 15:04:28 -04003957 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003958 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04003959 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003960 else
3961 #endif
John Bauman89401822014-05-06 15:04:28 -04003962 {
3963 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
3964
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003965 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04003966 {
3967 for(int y = 0; y < height; y++)
3968 {
3969 for(int x = 0; x < width; x++)
3970 {
3971 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3972 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3973
3974 c0 = AVERAGE(c0, c1);
3975
3976 *(unsigned int*)(source0 + 4 * x) = c0;
3977 }
3978
3979 source0 += pitch;
3980 source1 += pitch;
3981 }
3982 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003983 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04003984 {
3985 for(int y = 0; y < height; y++)
3986 {
3987 for(int x = 0; x < width; x++)
3988 {
3989 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3990 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3991 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3992 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3993
3994 c0 = AVERAGE(c0, c1);
3995 c2 = AVERAGE(c2, c3);
3996 c0 = AVERAGE(c0, c2);
3997
3998 *(unsigned int*)(source0 + 4 * x) = c0;
3999 }
4000
4001 source0 += pitch;
4002 source1 += pitch;
4003 source2 += pitch;
4004 source3 += pitch;
4005 }
4006 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004007 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004008 {
4009 for(int y = 0; y < height; y++)
4010 {
4011 for(int x = 0; x < width; x++)
4012 {
4013 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4014 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4015 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4016 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4017 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4018 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4019 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4020 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4021
4022 c0 = AVERAGE(c0, c1);
4023 c2 = AVERAGE(c2, c3);
4024 c4 = AVERAGE(c4, c5);
4025 c6 = AVERAGE(c6, c7);
4026 c0 = AVERAGE(c0, c2);
4027 c4 = AVERAGE(c4, c6);
4028 c0 = AVERAGE(c0, c4);
4029
4030 *(unsigned int*)(source0 + 4 * x) = c0;
4031 }
4032
4033 source0 += pitch;
4034 source1 += pitch;
4035 source2 += pitch;
4036 source3 += pitch;
4037 source4 += pitch;
4038 source5 += pitch;
4039 source6 += pitch;
4040 source7 += pitch;
4041 }
4042 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004043 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004044 {
4045 for(int y = 0; y < height; y++)
4046 {
4047 for(int x = 0; x < width; x++)
4048 {
4049 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4050 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4051 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4052 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4053 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4054 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4055 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4056 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4057 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4058 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4059 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4060 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4061 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4062 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4063 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4064 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4065
4066 c0 = AVERAGE(c0, c1);
4067 c2 = AVERAGE(c2, c3);
4068 c4 = AVERAGE(c4, c5);
4069 c6 = AVERAGE(c6, c7);
4070 c8 = AVERAGE(c8, c9);
4071 cA = AVERAGE(cA, cB);
4072 cC = AVERAGE(cC, cD);
4073 cE = AVERAGE(cE, cF);
4074 c0 = AVERAGE(c0, c2);
4075 c4 = AVERAGE(c4, c6);
4076 c8 = AVERAGE(c8, cA);
4077 cC = AVERAGE(cC, cE);
4078 c0 = AVERAGE(c0, c4);
4079 c8 = AVERAGE(c8, cC);
4080 c0 = AVERAGE(c0, c8);
4081
4082 *(unsigned int*)(source0 + 4 * x) = c0;
4083 }
4084
4085 source0 += pitch;
4086 source1 += pitch;
4087 source2 += pitch;
4088 source3 += pitch;
4089 source4 += pitch;
4090 source5 += pitch;
4091 source6 += pitch;
4092 source7 += pitch;
4093 source8 += pitch;
4094 source9 += pitch;
4095 sourceA += pitch;
4096 sourceB += pitch;
4097 sourceC += pitch;
4098 sourceD += pitch;
4099 sourceE += pitch;
4100 sourceF += pitch;
4101 }
4102 }
4103 else ASSERT(false);
4104
4105 #undef AVERAGE
4106 }
4107 }
4108 else if(internal.format == FORMAT_G16R16)
4109 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004110
4111 #if defined(__i386__) || defined(__x86_64__)
4112 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004113 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004114 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004115 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004116 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004117 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004118 for(int x = 0; x < width; x += 4)
4119 {
4120 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4121 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004122
Nicolas Capens47dc8672017-04-25 12:54:39 -04004123 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004124
Nicolas Capens47dc8672017-04-25 12:54:39 -04004125 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4126 }
4127
4128 source0 += pitch;
4129 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004130 }
John Bauman89401822014-05-06 15:04:28 -04004131 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004132 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004133 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004134 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004135 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004136 for(int x = 0; x < width; x += 4)
4137 {
4138 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4139 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4140 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4141 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004142
Nicolas Capens47dc8672017-04-25 12:54:39 -04004143 c0 = _mm_avg_epu16(c0, c1);
4144 c2 = _mm_avg_epu16(c2, c3);
4145 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004146
Nicolas Capens47dc8672017-04-25 12:54:39 -04004147 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4148 }
4149
4150 source0 += pitch;
4151 source1 += pitch;
4152 source2 += pitch;
4153 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004154 }
John Bauman89401822014-05-06 15:04:28 -04004155 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004156 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004157 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004158 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004159 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004160 for(int x = 0; x < width; x += 4)
4161 {
4162 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4163 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4164 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4165 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4166 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4167 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4168 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4169 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004170
Nicolas Capens47dc8672017-04-25 12:54:39 -04004171 c0 = _mm_avg_epu16(c0, c1);
4172 c2 = _mm_avg_epu16(c2, c3);
4173 c4 = _mm_avg_epu16(c4, c5);
4174 c6 = _mm_avg_epu16(c6, c7);
4175 c0 = _mm_avg_epu16(c0, c2);
4176 c4 = _mm_avg_epu16(c4, c6);
4177 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004178
Nicolas Capens47dc8672017-04-25 12:54:39 -04004179 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4180 }
4181
4182 source0 += pitch;
4183 source1 += pitch;
4184 source2 += pitch;
4185 source3 += pitch;
4186 source4 += pitch;
4187 source5 += pitch;
4188 source6 += pitch;
4189 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004190 }
John Bauman89401822014-05-06 15:04:28 -04004191 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004192 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004193 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004194 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004195 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004196 for(int x = 0; x < width; x += 4)
4197 {
4198 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4199 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4200 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4201 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4202 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4203 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4204 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4205 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4206 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4207 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4208 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4209 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4210 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4211 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4212 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4213 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004214
Nicolas Capens47dc8672017-04-25 12:54:39 -04004215 c0 = _mm_avg_epu16(c0, c1);
4216 c2 = _mm_avg_epu16(c2, c3);
4217 c4 = _mm_avg_epu16(c4, c5);
4218 c6 = _mm_avg_epu16(c6, c7);
4219 c8 = _mm_avg_epu16(c8, c9);
4220 cA = _mm_avg_epu16(cA, cB);
4221 cC = _mm_avg_epu16(cC, cD);
4222 cE = _mm_avg_epu16(cE, cF);
4223 c0 = _mm_avg_epu16(c0, c2);
4224 c4 = _mm_avg_epu16(c4, c6);
4225 c8 = _mm_avg_epu16(c8, cA);
4226 cC = _mm_avg_epu16(cC, cE);
4227 c0 = _mm_avg_epu16(c0, c4);
4228 c8 = _mm_avg_epu16(c8, cC);
4229 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004230
Nicolas Capens47dc8672017-04-25 12:54:39 -04004231 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4232 }
4233
4234 source0 += pitch;
4235 source1 += pitch;
4236 source2 += pitch;
4237 source3 += pitch;
4238 source4 += pitch;
4239 source5 += pitch;
4240 source6 += pitch;
4241 source7 += pitch;
4242 source8 += pitch;
4243 source9 += pitch;
4244 sourceA += pitch;
4245 sourceB += pitch;
4246 sourceC += pitch;
4247 sourceD += pitch;
4248 sourceE += pitch;
4249 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004250 }
John Bauman89401822014-05-06 15:04:28 -04004251 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004252 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004253 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004254 else
4255 #endif
John Bauman89401822014-05-06 15:04:28 -04004256 {
4257 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4258
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004259 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004260 {
4261 for(int y = 0; y < height; y++)
4262 {
4263 for(int x = 0; x < width; x++)
4264 {
4265 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4266 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4267
4268 c0 = AVERAGE(c0, c1);
4269
4270 *(unsigned int*)(source0 + 4 * x) = c0;
4271 }
4272
4273 source0 += pitch;
4274 source1 += pitch;
4275 }
4276 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004277 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004278 {
4279 for(int y = 0; y < height; y++)
4280 {
4281 for(int x = 0; x < width; x++)
4282 {
4283 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4284 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4285 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4286 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4287
4288 c0 = AVERAGE(c0, c1);
4289 c2 = AVERAGE(c2, c3);
4290 c0 = AVERAGE(c0, c2);
4291
4292 *(unsigned int*)(source0 + 4 * x) = c0;
4293 }
4294
4295 source0 += pitch;
4296 source1 += pitch;
4297 source2 += pitch;
4298 source3 += pitch;
4299 }
4300 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004301 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004302 {
4303 for(int y = 0; y < height; y++)
4304 {
4305 for(int x = 0; x < width; x++)
4306 {
4307 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4308 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4309 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4310 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4311 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4312 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4313 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4314 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4315
4316 c0 = AVERAGE(c0, c1);
4317 c2 = AVERAGE(c2, c3);
4318 c4 = AVERAGE(c4, c5);
4319 c6 = AVERAGE(c6, c7);
4320 c0 = AVERAGE(c0, c2);
4321 c4 = AVERAGE(c4, c6);
4322 c0 = AVERAGE(c0, c4);
4323
4324 *(unsigned int*)(source0 + 4 * x) = c0;
4325 }
4326
4327 source0 += pitch;
4328 source1 += pitch;
4329 source2 += pitch;
4330 source3 += pitch;
4331 source4 += pitch;
4332 source5 += pitch;
4333 source6 += pitch;
4334 source7 += pitch;
4335 }
4336 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004337 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004338 {
4339 for(int y = 0; y < height; y++)
4340 {
4341 for(int x = 0; x < width; x++)
4342 {
4343 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4344 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4345 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4346 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4347 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4348 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4349 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4350 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4351 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4352 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4353 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4354 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4355 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4356 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4357 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4358 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4359
4360 c0 = AVERAGE(c0, c1);
4361 c2 = AVERAGE(c2, c3);
4362 c4 = AVERAGE(c4, c5);
4363 c6 = AVERAGE(c6, c7);
4364 c8 = AVERAGE(c8, c9);
4365 cA = AVERAGE(cA, cB);
4366 cC = AVERAGE(cC, cD);
4367 cE = AVERAGE(cE, cF);
4368 c0 = AVERAGE(c0, c2);
4369 c4 = AVERAGE(c4, c6);
4370 c8 = AVERAGE(c8, cA);
4371 cC = AVERAGE(cC, cE);
4372 c0 = AVERAGE(c0, c4);
4373 c8 = AVERAGE(c8, cC);
4374 c0 = AVERAGE(c0, c8);
4375
4376 *(unsigned int*)(source0 + 4 * x) = c0;
4377 }
4378
4379 source0 += pitch;
4380 source1 += pitch;
4381 source2 += pitch;
4382 source3 += pitch;
4383 source4 += pitch;
4384 source5 += pitch;
4385 source6 += pitch;
4386 source7 += pitch;
4387 source8 += pitch;
4388 source9 += pitch;
4389 sourceA += pitch;
4390 sourceB += pitch;
4391 sourceC += pitch;
4392 sourceD += pitch;
4393 sourceE += pitch;
4394 sourceF += pitch;
4395 }
4396 }
4397 else ASSERT(false);
4398
4399 #undef AVERAGE
4400 }
4401 }
4402 else if(internal.format == FORMAT_A16B16G16R16)
4403 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004404 #if defined(__i386__) || defined(__x86_64__)
4405 if(CPUID::supportsSSE2() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004406 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004407 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004408 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004409 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004410 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004411 for(int x = 0; x < width; x += 2)
4412 {
4413 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4414 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004415
Nicolas Capens47dc8672017-04-25 12:54:39 -04004416 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004417
Nicolas Capens47dc8672017-04-25 12:54:39 -04004418 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4419 }
4420
4421 source0 += pitch;
4422 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004423 }
John Bauman89401822014-05-06 15:04:28 -04004424 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004425 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004426 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004427 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004428 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004429 for(int x = 0; x < width; x += 2)
4430 {
4431 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4432 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4433 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4434 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004435
Nicolas Capens47dc8672017-04-25 12:54:39 -04004436 c0 = _mm_avg_epu16(c0, c1);
4437 c2 = _mm_avg_epu16(c2, c3);
4438 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004439
Nicolas Capens47dc8672017-04-25 12:54:39 -04004440 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4441 }
4442
4443 source0 += pitch;
4444 source1 += pitch;
4445 source2 += pitch;
4446 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004447 }
John Bauman89401822014-05-06 15:04:28 -04004448 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004449 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004450 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004451 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004452 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004453 for(int x = 0; x < width; x += 2)
4454 {
4455 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4456 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4457 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4458 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4459 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4460 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4461 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4462 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004463
Nicolas Capens47dc8672017-04-25 12:54:39 -04004464 c0 = _mm_avg_epu16(c0, c1);
4465 c2 = _mm_avg_epu16(c2, c3);
4466 c4 = _mm_avg_epu16(c4, c5);
4467 c6 = _mm_avg_epu16(c6, c7);
4468 c0 = _mm_avg_epu16(c0, c2);
4469 c4 = _mm_avg_epu16(c4, c6);
4470 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004471
Nicolas Capens47dc8672017-04-25 12:54:39 -04004472 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4473 }
4474
4475 source0 += pitch;
4476 source1 += pitch;
4477 source2 += pitch;
4478 source3 += pitch;
4479 source4 += pitch;
4480 source5 += pitch;
4481 source6 += pitch;
4482 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004483 }
John Bauman89401822014-05-06 15:04:28 -04004484 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004485 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004486 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004487 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004488 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004489 for(int x = 0; x < width; x += 2)
4490 {
4491 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4492 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4493 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4494 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4495 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4496 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4497 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4498 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4499 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4500 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4501 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4502 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4503 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4504 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4505 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4506 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04004507
Nicolas Capens47dc8672017-04-25 12:54:39 -04004508 c0 = _mm_avg_epu16(c0, c1);
4509 c2 = _mm_avg_epu16(c2, c3);
4510 c4 = _mm_avg_epu16(c4, c5);
4511 c6 = _mm_avg_epu16(c6, c7);
4512 c8 = _mm_avg_epu16(c8, c9);
4513 cA = _mm_avg_epu16(cA, cB);
4514 cC = _mm_avg_epu16(cC, cD);
4515 cE = _mm_avg_epu16(cE, cF);
4516 c0 = _mm_avg_epu16(c0, c2);
4517 c4 = _mm_avg_epu16(c4, c6);
4518 c8 = _mm_avg_epu16(c8, cA);
4519 cC = _mm_avg_epu16(cC, cE);
4520 c0 = _mm_avg_epu16(c0, c4);
4521 c8 = _mm_avg_epu16(c8, cC);
4522 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004523
Nicolas Capens47dc8672017-04-25 12:54:39 -04004524 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4525 }
4526
4527 source0 += pitch;
4528 source1 += pitch;
4529 source2 += pitch;
4530 source3 += pitch;
4531 source4 += pitch;
4532 source5 += pitch;
4533 source6 += pitch;
4534 source7 += pitch;
4535 source8 += pitch;
4536 source9 += pitch;
4537 sourceA += pitch;
4538 sourceB += pitch;
4539 sourceC += pitch;
4540 sourceD += pitch;
4541 sourceE += pitch;
4542 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004543 }
John Bauman89401822014-05-06 15:04:28 -04004544 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004545 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004546 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004547 else
4548 #endif
John Bauman89401822014-05-06 15:04:28 -04004549 {
4550 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4551
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004552 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004553 {
4554 for(int y = 0; y < height; y++)
4555 {
4556 for(int x = 0; x < 2 * width; x++)
4557 {
4558 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4559 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4560
4561 c0 = AVERAGE(c0, c1);
4562
4563 *(unsigned int*)(source0 + 4 * x) = c0;
4564 }
4565
4566 source0 += pitch;
4567 source1 += pitch;
4568 }
4569 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004570 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004571 {
4572 for(int y = 0; y < height; y++)
4573 {
4574 for(int x = 0; x < 2 * width; x++)
4575 {
4576 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4577 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4578 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4579 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4580
4581 c0 = AVERAGE(c0, c1);
4582 c2 = AVERAGE(c2, c3);
4583 c0 = AVERAGE(c0, c2);
4584
4585 *(unsigned int*)(source0 + 4 * x) = c0;
4586 }
4587
4588 source0 += pitch;
4589 source1 += pitch;
4590 source2 += pitch;
4591 source3 += pitch;
4592 }
4593 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004594 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004595 {
4596 for(int y = 0; y < height; y++)
4597 {
4598 for(int x = 0; x < 2 * width; x++)
4599 {
4600 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4601 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4602 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4603 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4604 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4605 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4606 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4607 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4608
4609 c0 = AVERAGE(c0, c1);
4610 c2 = AVERAGE(c2, c3);
4611 c4 = AVERAGE(c4, c5);
4612 c6 = AVERAGE(c6, c7);
4613 c0 = AVERAGE(c0, c2);
4614 c4 = AVERAGE(c4, c6);
4615 c0 = AVERAGE(c0, c4);
4616
4617 *(unsigned int*)(source0 + 4 * x) = c0;
4618 }
4619
4620 source0 += pitch;
4621 source1 += pitch;
4622 source2 += pitch;
4623 source3 += pitch;
4624 source4 += pitch;
4625 source5 += pitch;
4626 source6 += pitch;
4627 source7 += pitch;
4628 }
4629 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004630 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004631 {
4632 for(int y = 0; y < height; y++)
4633 {
4634 for(int x = 0; x < 2 * width; x++)
4635 {
4636 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4637 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4638 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4639 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4640 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4641 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4642 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4643 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4644 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4645 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4646 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4647 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4648 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4649 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4650 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4651 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4652
4653 c0 = AVERAGE(c0, c1);
4654 c2 = AVERAGE(c2, c3);
4655 c4 = AVERAGE(c4, c5);
4656 c6 = AVERAGE(c6, c7);
4657 c8 = AVERAGE(c8, c9);
4658 cA = AVERAGE(cA, cB);
4659 cC = AVERAGE(cC, cD);
4660 cE = AVERAGE(cE, cF);
4661 c0 = AVERAGE(c0, c2);
4662 c4 = AVERAGE(c4, c6);
4663 c8 = AVERAGE(c8, cA);
4664 cC = AVERAGE(cC, cE);
4665 c0 = AVERAGE(c0, c4);
4666 c8 = AVERAGE(c8, cC);
4667 c0 = AVERAGE(c0, c8);
4668
4669 *(unsigned int*)(source0 + 4 * x) = c0;
4670 }
4671
4672 source0 += pitch;
4673 source1 += pitch;
4674 source2 += pitch;
4675 source3 += pitch;
4676 source4 += pitch;
4677 source5 += pitch;
4678 source6 += pitch;
4679 source7 += pitch;
4680 source8 += pitch;
4681 source9 += pitch;
4682 sourceA += pitch;
4683 sourceB += pitch;
4684 sourceC += pitch;
4685 sourceD += pitch;
4686 sourceE += pitch;
4687 sourceF += pitch;
4688 }
4689 }
4690 else ASSERT(false);
4691
4692 #undef AVERAGE
4693 }
4694 }
4695 else if(internal.format == FORMAT_R32F)
4696 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004697 #if defined(__i386__) || defined(__x86_64__)
4698 if(CPUID::supportsSSE() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004699 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004700 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004701 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004702 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004703 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004704 for(int x = 0; x < width; x += 4)
4705 {
4706 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4707 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004708
Nicolas Capens47dc8672017-04-25 12:54:39 -04004709 c0 = _mm_add_ps(c0, c1);
4710 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004711
Nicolas Capens47dc8672017-04-25 12:54:39 -04004712 _mm_store_ps((float*)(source0 + 4 * x), c0);
4713 }
4714
4715 source0 += pitch;
4716 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004717 }
John Bauman89401822014-05-06 15:04:28 -04004718 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004719 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004720 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004721 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004722 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004723 for(int x = 0; x < width; x += 4)
4724 {
4725 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4726 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4727 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4728 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004729
Nicolas Capens47dc8672017-04-25 12:54:39 -04004730 c0 = _mm_add_ps(c0, c1);
4731 c2 = _mm_add_ps(c2, c3);
4732 c0 = _mm_add_ps(c0, c2);
4733 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004734
Nicolas Capens47dc8672017-04-25 12:54:39 -04004735 _mm_store_ps((float*)(source0 + 4 * x), c0);
4736 }
4737
4738 source0 += pitch;
4739 source1 += pitch;
4740 source2 += pitch;
4741 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004742 }
John Bauman89401822014-05-06 15:04:28 -04004743 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004744 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004745 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004746 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004747 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004748 for(int x = 0; x < width; x += 4)
4749 {
4750 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4751 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4752 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4753 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4754 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4755 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4756 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4757 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004758
Nicolas Capens47dc8672017-04-25 12:54:39 -04004759 c0 = _mm_add_ps(c0, c1);
4760 c2 = _mm_add_ps(c2, c3);
4761 c4 = _mm_add_ps(c4, c5);
4762 c6 = _mm_add_ps(c6, c7);
4763 c0 = _mm_add_ps(c0, c2);
4764 c4 = _mm_add_ps(c4, c6);
4765 c0 = _mm_add_ps(c0, c4);
4766 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004767
Nicolas Capens47dc8672017-04-25 12:54:39 -04004768 _mm_store_ps((float*)(source0 + 4 * x), c0);
4769 }
4770
4771 source0 += pitch;
4772 source1 += pitch;
4773 source2 += pitch;
4774 source3 += pitch;
4775 source4 += pitch;
4776 source5 += pitch;
4777 source6 += pitch;
4778 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004779 }
John Bauman89401822014-05-06 15:04:28 -04004780 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004781 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004782 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004783 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004784 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004785 for(int x = 0; x < width; x += 4)
4786 {
4787 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4788 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4789 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4790 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4791 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4792 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4793 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4794 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4795 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4796 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4797 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4798 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4799 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
4800 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
4801 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
4802 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004803
Nicolas Capens47dc8672017-04-25 12:54:39 -04004804 c0 = _mm_add_ps(c0, c1);
4805 c2 = _mm_add_ps(c2, c3);
4806 c4 = _mm_add_ps(c4, c5);
4807 c6 = _mm_add_ps(c6, c7);
4808 c8 = _mm_add_ps(c8, c9);
4809 cA = _mm_add_ps(cA, cB);
4810 cC = _mm_add_ps(cC, cD);
4811 cE = _mm_add_ps(cE, cF);
4812 c0 = _mm_add_ps(c0, c2);
4813 c4 = _mm_add_ps(c4, c6);
4814 c8 = _mm_add_ps(c8, cA);
4815 cC = _mm_add_ps(cC, cE);
4816 c0 = _mm_add_ps(c0, c4);
4817 c8 = _mm_add_ps(c8, cC);
4818 c0 = _mm_add_ps(c0, c8);
4819 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04004820
Nicolas Capens47dc8672017-04-25 12:54:39 -04004821 _mm_store_ps((float*)(source0 + 4 * x), c0);
4822 }
4823
4824 source0 += pitch;
4825 source1 += pitch;
4826 source2 += pitch;
4827 source3 += pitch;
4828 source4 += pitch;
4829 source5 += pitch;
4830 source6 += pitch;
4831 source7 += pitch;
4832 source8 += pitch;
4833 source9 += pitch;
4834 sourceA += pitch;
4835 sourceB += pitch;
4836 sourceC += pitch;
4837 sourceD += pitch;
4838 sourceE += pitch;
4839 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004840 }
John Bauman89401822014-05-06 15:04:28 -04004841 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004842 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004843 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004844 else
4845 #endif
John Bauman89401822014-05-06 15:04:28 -04004846 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004847 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004848 {
4849 for(int y = 0; y < height; y++)
4850 {
4851 for(int x = 0; x < width; x++)
4852 {
4853 float c0 = *(float*)(source0 + 4 * x);
4854 float c1 = *(float*)(source1 + 4 * x);
4855
4856 c0 = c0 + c1;
4857 c0 *= 1.0f / 2.0f;
4858
4859 *(float*)(source0 + 4 * x) = c0;
4860 }
4861
4862 source0 += pitch;
4863 source1 += pitch;
4864 }
4865 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004866 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004867 {
4868 for(int y = 0; y < height; y++)
4869 {
4870 for(int x = 0; x < width; x++)
4871 {
4872 float c0 = *(float*)(source0 + 4 * x);
4873 float c1 = *(float*)(source1 + 4 * x);
4874 float c2 = *(float*)(source2 + 4 * x);
4875 float c3 = *(float*)(source3 + 4 * x);
4876
4877 c0 = c0 + c1;
4878 c2 = c2 + c3;
4879 c0 = c0 + c2;
4880 c0 *= 1.0f / 4.0f;
4881
4882 *(float*)(source0 + 4 * x) = c0;
4883 }
4884
4885 source0 += pitch;
4886 source1 += pitch;
4887 source2 += pitch;
4888 source3 += pitch;
4889 }
4890 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004891 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004892 {
4893 for(int y = 0; y < height; y++)
4894 {
4895 for(int x = 0; x < width; x++)
4896 {
4897 float c0 = *(float*)(source0 + 4 * x);
4898 float c1 = *(float*)(source1 + 4 * x);
4899 float c2 = *(float*)(source2 + 4 * x);
4900 float c3 = *(float*)(source3 + 4 * x);
4901 float c4 = *(float*)(source4 + 4 * x);
4902 float c5 = *(float*)(source5 + 4 * x);
4903 float c6 = *(float*)(source6 + 4 * x);
4904 float c7 = *(float*)(source7 + 4 * x);
4905
4906 c0 = c0 + c1;
4907 c2 = c2 + c3;
4908 c4 = c4 + c5;
4909 c6 = c6 + c7;
4910 c0 = c0 + c2;
4911 c4 = c4 + c6;
4912 c0 = c0 + c4;
4913 c0 *= 1.0f / 8.0f;
4914
4915 *(float*)(source0 + 4 * x) = c0;
4916 }
4917
4918 source0 += pitch;
4919 source1 += pitch;
4920 source2 += pitch;
4921 source3 += pitch;
4922 source4 += pitch;
4923 source5 += pitch;
4924 source6 += pitch;
4925 source7 += pitch;
4926 }
4927 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004928 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004929 {
4930 for(int y = 0; y < height; y++)
4931 {
4932 for(int x = 0; x < width; x++)
4933 {
4934 float c0 = *(float*)(source0 + 4 * x);
4935 float c1 = *(float*)(source1 + 4 * x);
4936 float c2 = *(float*)(source2 + 4 * x);
4937 float c3 = *(float*)(source3 + 4 * x);
4938 float c4 = *(float*)(source4 + 4 * x);
4939 float c5 = *(float*)(source5 + 4 * x);
4940 float c6 = *(float*)(source6 + 4 * x);
4941 float c7 = *(float*)(source7 + 4 * x);
4942 float c8 = *(float*)(source8 + 4 * x);
4943 float c9 = *(float*)(source9 + 4 * x);
4944 float cA = *(float*)(sourceA + 4 * x);
4945 float cB = *(float*)(sourceB + 4 * x);
4946 float cC = *(float*)(sourceC + 4 * x);
4947 float cD = *(float*)(sourceD + 4 * x);
4948 float cE = *(float*)(sourceE + 4 * x);
4949 float cF = *(float*)(sourceF + 4 * x);
4950
4951 c0 = c0 + c1;
4952 c2 = c2 + c3;
4953 c4 = c4 + c5;
4954 c6 = c6 + c7;
4955 c8 = c8 + c9;
4956 cA = cA + cB;
4957 cC = cC + cD;
4958 cE = cE + cF;
4959 c0 = c0 + c2;
4960 c4 = c4 + c6;
4961 c8 = c8 + cA;
4962 cC = cC + cE;
4963 c0 = c0 + c4;
4964 c8 = c8 + cC;
4965 c0 = c0 + c8;
4966 c0 *= 1.0f / 16.0f;
4967
4968 *(float*)(source0 + 4 * x) = c0;
4969 }
4970
4971 source0 += pitch;
4972 source1 += pitch;
4973 source2 += pitch;
4974 source3 += pitch;
4975 source4 += pitch;
4976 source5 += pitch;
4977 source6 += pitch;
4978 source7 += pitch;
4979 source8 += pitch;
4980 source9 += pitch;
4981 sourceA += pitch;
4982 sourceB += pitch;
4983 sourceC += pitch;
4984 sourceD += pitch;
4985 sourceE += pitch;
4986 sourceF += pitch;
4987 }
4988 }
4989 else ASSERT(false);
4990 }
4991 }
4992 else if(internal.format == FORMAT_G32R32F)
4993 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004994 #if defined(__i386__) || defined(__x86_64__)
4995 if(CPUID::supportsSSE() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004996 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004997 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004998 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004999 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005000 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005001 for(int x = 0; x < width; x += 2)
5002 {
5003 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5004 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005005
Nicolas Capens47dc8672017-04-25 12:54:39 -04005006 c0 = _mm_add_ps(c0, c1);
5007 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005008
Nicolas Capens47dc8672017-04-25 12:54:39 -04005009 _mm_store_ps((float*)(source0 + 8 * x), c0);
5010 }
5011
5012 source0 += pitch;
5013 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005014 }
John Bauman89401822014-05-06 15:04:28 -04005015 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005016 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005017 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005018 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005019 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005020 for(int x = 0; x < width; x += 2)
5021 {
5022 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5023 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5024 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5025 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005026
Nicolas Capens47dc8672017-04-25 12:54:39 -04005027 c0 = _mm_add_ps(c0, c1);
5028 c2 = _mm_add_ps(c2, c3);
5029 c0 = _mm_add_ps(c0, c2);
5030 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005031
Nicolas Capens47dc8672017-04-25 12:54:39 -04005032 _mm_store_ps((float*)(source0 + 8 * x), c0);
5033 }
5034
5035 source0 += pitch;
5036 source1 += pitch;
5037 source2 += pitch;
5038 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005039 }
John Bauman89401822014-05-06 15:04:28 -04005040 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005041 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005042 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005043 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005044 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005045 for(int x = 0; x < width; x += 2)
5046 {
5047 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5048 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5049 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5050 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5051 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5052 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5053 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5054 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005055
Nicolas Capens47dc8672017-04-25 12:54:39 -04005056 c0 = _mm_add_ps(c0, c1);
5057 c2 = _mm_add_ps(c2, c3);
5058 c4 = _mm_add_ps(c4, c5);
5059 c6 = _mm_add_ps(c6, c7);
5060 c0 = _mm_add_ps(c0, c2);
5061 c4 = _mm_add_ps(c4, c6);
5062 c0 = _mm_add_ps(c0, c4);
5063 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005064
Nicolas Capens47dc8672017-04-25 12:54:39 -04005065 _mm_store_ps((float*)(source0 + 8 * x), c0);
5066 }
5067
5068 source0 += pitch;
5069 source1 += pitch;
5070 source2 += pitch;
5071 source3 += pitch;
5072 source4 += pitch;
5073 source5 += pitch;
5074 source6 += pitch;
5075 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005076 }
John Bauman89401822014-05-06 15:04:28 -04005077 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005078 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005079 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005080 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005081 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005082 for(int x = 0; x < width; x += 2)
5083 {
5084 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5085 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5086 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5087 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5088 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5089 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5090 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5091 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5092 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5093 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5094 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5095 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5096 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5097 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5098 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5099 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04005100
Nicolas Capens47dc8672017-04-25 12:54:39 -04005101 c0 = _mm_add_ps(c0, c1);
5102 c2 = _mm_add_ps(c2, c3);
5103 c4 = _mm_add_ps(c4, c5);
5104 c6 = _mm_add_ps(c6, c7);
5105 c8 = _mm_add_ps(c8, c9);
5106 cA = _mm_add_ps(cA, cB);
5107 cC = _mm_add_ps(cC, cD);
5108 cE = _mm_add_ps(cE, cF);
5109 c0 = _mm_add_ps(c0, c2);
5110 c4 = _mm_add_ps(c4, c6);
5111 c8 = _mm_add_ps(c8, cA);
5112 cC = _mm_add_ps(cC, cE);
5113 c0 = _mm_add_ps(c0, c4);
5114 c8 = _mm_add_ps(c8, cC);
5115 c0 = _mm_add_ps(c0, c8);
5116 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005117
Nicolas Capens47dc8672017-04-25 12:54:39 -04005118 _mm_store_ps((float*)(source0 + 8 * x), c0);
5119 }
5120
5121 source0 += pitch;
5122 source1 += pitch;
5123 source2 += pitch;
5124 source3 += pitch;
5125 source4 += pitch;
5126 source5 += pitch;
5127 source6 += pitch;
5128 source7 += pitch;
5129 source8 += pitch;
5130 source9 += pitch;
5131 sourceA += pitch;
5132 sourceB += pitch;
5133 sourceC += pitch;
5134 sourceD += pitch;
5135 sourceE += pitch;
5136 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005137 }
John Bauman89401822014-05-06 15:04:28 -04005138 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005139 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005140 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005141 else
5142 #endif
John Bauman89401822014-05-06 15:04:28 -04005143 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005144 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005145 {
5146 for(int y = 0; y < height; y++)
5147 {
5148 for(int x = 0; x < 2 * width; x++)
5149 {
5150 float c0 = *(float*)(source0 + 4 * x);
5151 float c1 = *(float*)(source1 + 4 * x);
5152
5153 c0 = c0 + c1;
5154 c0 *= 1.0f / 2.0f;
5155
5156 *(float*)(source0 + 4 * x) = c0;
5157 }
5158
5159 source0 += pitch;
5160 source1 += pitch;
5161 }
5162 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005163 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005164 {
5165 for(int y = 0; y < height; y++)
5166 {
5167 for(int x = 0; x < 2 * width; x++)
5168 {
5169 float c0 = *(float*)(source0 + 4 * x);
5170 float c1 = *(float*)(source1 + 4 * x);
5171 float c2 = *(float*)(source2 + 4 * x);
5172 float c3 = *(float*)(source3 + 4 * x);
5173
5174 c0 = c0 + c1;
5175 c2 = c2 + c3;
5176 c0 = c0 + c2;
5177 c0 *= 1.0f / 4.0f;
5178
5179 *(float*)(source0 + 4 * x) = c0;
5180 }
5181
5182 source0 += pitch;
5183 source1 += pitch;
5184 source2 += pitch;
5185 source3 += pitch;
5186 }
5187 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005188 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005189 {
5190 for(int y = 0; y < height; y++)
5191 {
5192 for(int x = 0; x < 2 * width; x++)
5193 {
5194 float c0 = *(float*)(source0 + 4 * x);
5195 float c1 = *(float*)(source1 + 4 * x);
5196 float c2 = *(float*)(source2 + 4 * x);
5197 float c3 = *(float*)(source3 + 4 * x);
5198 float c4 = *(float*)(source4 + 4 * x);
5199 float c5 = *(float*)(source5 + 4 * x);
5200 float c6 = *(float*)(source6 + 4 * x);
5201 float c7 = *(float*)(source7 + 4 * x);
5202
5203 c0 = c0 + c1;
5204 c2 = c2 + c3;
5205 c4 = c4 + c5;
5206 c6 = c6 + c7;
5207 c0 = c0 + c2;
5208 c4 = c4 + c6;
5209 c0 = c0 + c4;
5210 c0 *= 1.0f / 8.0f;
5211
5212 *(float*)(source0 + 4 * x) = c0;
5213 }
5214
5215 source0 += pitch;
5216 source1 += pitch;
5217 source2 += pitch;
5218 source3 += pitch;
5219 source4 += pitch;
5220 source5 += pitch;
5221 source6 += pitch;
5222 source7 += pitch;
5223 }
5224 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005225 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005226 {
5227 for(int y = 0; y < height; y++)
5228 {
5229 for(int x = 0; x < 2 * width; x++)
5230 {
5231 float c0 = *(float*)(source0 + 4 * x);
5232 float c1 = *(float*)(source1 + 4 * x);
5233 float c2 = *(float*)(source2 + 4 * x);
5234 float c3 = *(float*)(source3 + 4 * x);
5235 float c4 = *(float*)(source4 + 4 * x);
5236 float c5 = *(float*)(source5 + 4 * x);
5237 float c6 = *(float*)(source6 + 4 * x);
5238 float c7 = *(float*)(source7 + 4 * x);
5239 float c8 = *(float*)(source8 + 4 * x);
5240 float c9 = *(float*)(source9 + 4 * x);
5241 float cA = *(float*)(sourceA + 4 * x);
5242 float cB = *(float*)(sourceB + 4 * x);
5243 float cC = *(float*)(sourceC + 4 * x);
5244 float cD = *(float*)(sourceD + 4 * x);
5245 float cE = *(float*)(sourceE + 4 * x);
5246 float cF = *(float*)(sourceF + 4 * x);
5247
5248 c0 = c0 + c1;
5249 c2 = c2 + c3;
5250 c4 = c4 + c5;
5251 c6 = c6 + c7;
5252 c8 = c8 + c9;
5253 cA = cA + cB;
5254 cC = cC + cD;
5255 cE = cE + cF;
5256 c0 = c0 + c2;
5257 c4 = c4 + c6;
5258 c8 = c8 + cA;
5259 cC = cC + cE;
5260 c0 = c0 + c4;
5261 c8 = c8 + cC;
5262 c0 = c0 + c8;
5263 c0 *= 1.0f / 16.0f;
5264
5265 *(float*)(source0 + 4 * x) = c0;
5266 }
5267
5268 source0 += pitch;
5269 source1 += pitch;
5270 source2 += pitch;
5271 source3 += pitch;
5272 source4 += pitch;
5273 source5 += pitch;
5274 source6 += pitch;
5275 source7 += pitch;
5276 source8 += pitch;
5277 source9 += pitch;
5278 sourceA += pitch;
5279 sourceB += pitch;
5280 sourceC += pitch;
5281 sourceD += pitch;
5282 sourceE += pitch;
5283 sourceF += pitch;
5284 }
5285 }
5286 else ASSERT(false);
5287 }
5288 }
Nicolas Capens67fdd832017-12-21 11:20:54 -05005289 else if(internal.format == FORMAT_A32B32G32R32F ||
5290 internal.format == FORMAT_X32B32G32R32F ||
5291 internal.format == FORMAT_X32B32G32R32F_UNSIGNED)
John Bauman89401822014-05-06 15:04:28 -04005292 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005293 #if defined(__i386__) || defined(__x86_64__)
5294 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04005295 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005296 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005297 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005298 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005299 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005300 for(int x = 0; x < width; x++)
5301 {
5302 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5303 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005304
Nicolas Capens47dc8672017-04-25 12:54:39 -04005305 c0 = _mm_add_ps(c0, c1);
5306 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005307
Nicolas Capens47dc8672017-04-25 12:54:39 -04005308 _mm_store_ps((float*)(source0 + 16 * x), c0);
5309 }
5310
5311 source0 += pitch;
5312 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005313 }
John Bauman89401822014-05-06 15:04:28 -04005314 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005315 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005316 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005317 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005318 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005319 for(int x = 0; x < width; x++)
5320 {
5321 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5322 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5323 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5324 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005325
Nicolas Capens47dc8672017-04-25 12:54:39 -04005326 c0 = _mm_add_ps(c0, c1);
5327 c2 = _mm_add_ps(c2, c3);
5328 c0 = _mm_add_ps(c0, c2);
5329 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005330
Nicolas Capens47dc8672017-04-25 12:54:39 -04005331 _mm_store_ps((float*)(source0 + 16 * x), c0);
5332 }
5333
5334 source0 += pitch;
5335 source1 += pitch;
5336 source2 += pitch;
5337 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005338 }
John Bauman89401822014-05-06 15:04:28 -04005339 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005340 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005341 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005342 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005343 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005344 for(int x = 0; x < width; x++)
5345 {
5346 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5347 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5348 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5349 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5350 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5351 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5352 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5353 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005354
Nicolas Capens47dc8672017-04-25 12:54:39 -04005355 c0 = _mm_add_ps(c0, c1);
5356 c2 = _mm_add_ps(c2, c3);
5357 c4 = _mm_add_ps(c4, c5);
5358 c6 = _mm_add_ps(c6, c7);
5359 c0 = _mm_add_ps(c0, c2);
5360 c4 = _mm_add_ps(c4, c6);
5361 c0 = _mm_add_ps(c0, c4);
5362 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005363
Nicolas Capens47dc8672017-04-25 12:54:39 -04005364 _mm_store_ps((float*)(source0 + 16 * x), c0);
5365 }
5366
5367 source0 += pitch;
5368 source1 += pitch;
5369 source2 += pitch;
5370 source3 += pitch;
5371 source4 += pitch;
5372 source5 += pitch;
5373 source6 += pitch;
5374 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005375 }
John Bauman89401822014-05-06 15:04:28 -04005376 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005377 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005378 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005379 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005380 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005381 for(int x = 0; x < width; x++)
5382 {
5383 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5384 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5385 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5386 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5387 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5388 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5389 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5390 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5391 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5392 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5393 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5394 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5395 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5396 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5397 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5398 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
John Bauman89401822014-05-06 15:04:28 -04005399
Nicolas Capens47dc8672017-04-25 12:54:39 -04005400 c0 = _mm_add_ps(c0, c1);
5401 c2 = _mm_add_ps(c2, c3);
5402 c4 = _mm_add_ps(c4, c5);
5403 c6 = _mm_add_ps(c6, c7);
5404 c8 = _mm_add_ps(c8, c9);
5405 cA = _mm_add_ps(cA, cB);
5406 cC = _mm_add_ps(cC, cD);
5407 cE = _mm_add_ps(cE, cF);
5408 c0 = _mm_add_ps(c0, c2);
5409 c4 = _mm_add_ps(c4, c6);
5410 c8 = _mm_add_ps(c8, cA);
5411 cC = _mm_add_ps(cC, cE);
5412 c0 = _mm_add_ps(c0, c4);
5413 c8 = _mm_add_ps(c8, cC);
5414 c0 = _mm_add_ps(c0, c8);
5415 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005416
Nicolas Capens47dc8672017-04-25 12:54:39 -04005417 _mm_store_ps((float*)(source0 + 16 * x), c0);
5418 }
5419
5420 source0 += pitch;
5421 source1 += pitch;
5422 source2 += pitch;
5423 source3 += pitch;
5424 source4 += pitch;
5425 source5 += pitch;
5426 source6 += pitch;
5427 source7 += pitch;
5428 source8 += pitch;
5429 source9 += pitch;
5430 sourceA += pitch;
5431 sourceB += pitch;
5432 sourceC += pitch;
5433 sourceD += pitch;
5434 sourceE += pitch;
5435 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005436 }
John Bauman89401822014-05-06 15:04:28 -04005437 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005438 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005439 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005440 else
5441 #endif
John Bauman89401822014-05-06 15:04:28 -04005442 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005443 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005444 {
5445 for(int y = 0; y < height; y++)
5446 {
5447 for(int x = 0; x < 4 * width; x++)
5448 {
5449 float c0 = *(float*)(source0 + 4 * x);
5450 float c1 = *(float*)(source1 + 4 * x);
5451
5452 c0 = c0 + c1;
5453 c0 *= 1.0f / 2.0f;
5454
5455 *(float*)(source0 + 4 * x) = c0;
5456 }
5457
5458 source0 += pitch;
5459 source1 += pitch;
5460 }
5461 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005462 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005463 {
5464 for(int y = 0; y < height; y++)
5465 {
5466 for(int x = 0; x < 4 * width; x++)
5467 {
5468 float c0 = *(float*)(source0 + 4 * x);
5469 float c1 = *(float*)(source1 + 4 * x);
5470 float c2 = *(float*)(source2 + 4 * x);
5471 float c3 = *(float*)(source3 + 4 * x);
5472
5473 c0 = c0 + c1;
5474 c2 = c2 + c3;
5475 c0 = c0 + c2;
5476 c0 *= 1.0f / 4.0f;
5477
5478 *(float*)(source0 + 4 * x) = c0;
5479 }
5480
5481 source0 += pitch;
5482 source1 += pitch;
5483 source2 += pitch;
5484 source3 += pitch;
5485 }
5486 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005487 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005488 {
5489 for(int y = 0; y < height; y++)
5490 {
5491 for(int x = 0; x < 4 * width; x++)
5492 {
5493 float c0 = *(float*)(source0 + 4 * x);
5494 float c1 = *(float*)(source1 + 4 * x);
5495 float c2 = *(float*)(source2 + 4 * x);
5496 float c3 = *(float*)(source3 + 4 * x);
5497 float c4 = *(float*)(source4 + 4 * x);
5498 float c5 = *(float*)(source5 + 4 * x);
5499 float c6 = *(float*)(source6 + 4 * x);
5500 float c7 = *(float*)(source7 + 4 * x);
5501
5502 c0 = c0 + c1;
5503 c2 = c2 + c3;
5504 c4 = c4 + c5;
5505 c6 = c6 + c7;
5506 c0 = c0 + c2;
5507 c4 = c4 + c6;
5508 c0 = c0 + c4;
5509 c0 *= 1.0f / 8.0f;
5510
5511 *(float*)(source0 + 4 * x) = c0;
5512 }
5513
5514 source0 += pitch;
5515 source1 += pitch;
5516 source2 += pitch;
5517 source3 += pitch;
5518 source4 += pitch;
5519 source5 += pitch;
5520 source6 += pitch;
5521 source7 += pitch;
5522 }
5523 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005524 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005525 {
5526 for(int y = 0; y < height; y++)
5527 {
5528 for(int x = 0; x < 4 * width; x++)
5529 {
5530 float c0 = *(float*)(source0 + 4 * x);
5531 float c1 = *(float*)(source1 + 4 * x);
5532 float c2 = *(float*)(source2 + 4 * x);
5533 float c3 = *(float*)(source3 + 4 * x);
5534 float c4 = *(float*)(source4 + 4 * x);
5535 float c5 = *(float*)(source5 + 4 * x);
5536 float c6 = *(float*)(source6 + 4 * x);
5537 float c7 = *(float*)(source7 + 4 * x);
5538 float c8 = *(float*)(source8 + 4 * x);
5539 float c9 = *(float*)(source9 + 4 * x);
5540 float cA = *(float*)(sourceA + 4 * x);
5541 float cB = *(float*)(sourceB + 4 * x);
5542 float cC = *(float*)(sourceC + 4 * x);
5543 float cD = *(float*)(sourceD + 4 * x);
5544 float cE = *(float*)(sourceE + 4 * x);
5545 float cF = *(float*)(sourceF + 4 * x);
5546
5547 c0 = c0 + c1;
5548 c2 = c2 + c3;
5549 c4 = c4 + c5;
5550 c6 = c6 + c7;
5551 c8 = c8 + c9;
5552 cA = cA + cB;
5553 cC = cC + cD;
5554 cE = cE + cF;
5555 c0 = c0 + c2;
5556 c4 = c4 + c6;
5557 c8 = c8 + cA;
5558 cC = cC + cE;
5559 c0 = c0 + c4;
5560 c8 = c8 + cC;
5561 c0 = c0 + c8;
5562 c0 *= 1.0f / 16.0f;
5563
5564 *(float*)(source0 + 4 * x) = c0;
5565 }
5566
5567 source0 += pitch;
5568 source1 += pitch;
5569 source2 += pitch;
5570 source3 += pitch;
5571 source4 += pitch;
5572 source5 += pitch;
5573 source6 += pitch;
5574 source7 += pitch;
5575 source8 += pitch;
5576 source9 += pitch;
5577 sourceA += pitch;
5578 sourceB += pitch;
5579 sourceC += pitch;
5580 sourceD += pitch;
5581 sourceE += pitch;
5582 sourceF += pitch;
5583 }
5584 }
5585 else ASSERT(false);
5586 }
5587 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005588 else if(internal.format == FORMAT_R5G6B5)
5589 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005590 #if defined(__i386__) || defined(__x86_64__)
5591 if(CPUID::supportsSSE2() && (width % 8) == 0)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005592 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005593 if(internal.samples == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005594 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005595 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005596 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005597 for(int x = 0; x < width; x += 8)
5598 {
5599 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5600 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005601
Nicolas Capens47dc8672017-04-25 12:54:39 -04005602 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5603 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5604 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5605 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5606 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5607 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005608
Nicolas Capens47dc8672017-04-25 12:54:39 -04005609 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5610 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5611 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5612 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5613 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005614
Nicolas Capens47dc8672017-04-25 12:54:39 -04005615 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5616 }
5617
5618 source0 += pitch;
5619 source1 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005620 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005621 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005622 else if(internal.samples == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005623 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005624 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005625 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005626 for(int x = 0; x < width; x += 8)
5627 {
5628 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5629 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5630 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5631 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005632
Nicolas Capens47dc8672017-04-25 12:54:39 -04005633 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5634 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5635 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5636 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5637 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5638 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5639 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5640 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5641 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5642 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005643
Nicolas Capens47dc8672017-04-25 12:54:39 -04005644 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5645 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5646 c0 = _mm_avg_epu8(c0, c2);
5647 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5648 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5649 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5650 c1 = _mm_avg_epu16(c1, c3);
5651 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5652 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005653
Nicolas Capens47dc8672017-04-25 12:54:39 -04005654 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5655 }
5656
5657 source0 += pitch;
5658 source1 += pitch;
5659 source2 += pitch;
5660 source3 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005661 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005662 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005663 else if(internal.samples == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005664 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005665 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005666 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005667 for(int x = 0; x < width; x += 8)
5668 {
5669 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5670 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5671 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5672 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5673 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5674 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5675 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5676 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005677
Nicolas Capens47dc8672017-04-25 12:54:39 -04005678 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5679 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5680 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5681 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5682 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5683 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5684 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5685 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5686 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5687 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5688 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5689 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5690 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5691 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5692 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5693 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5694 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5695 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005696
Nicolas Capens47dc8672017-04-25 12:54:39 -04005697 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5698 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5699 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5700 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5701 c0 = _mm_avg_epu8(c0, c2);
5702 c4 = _mm_avg_epu8(c4, c6);
5703 c0 = _mm_avg_epu8(c0, c4);
5704 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5705 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5706 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5707 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5708 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5709 c1 = _mm_avg_epu16(c1, c3);
5710 c5 = _mm_avg_epu16(c5, c7);
5711 c1 = _mm_avg_epu16(c1, c5);
5712 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5713 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005714
Nicolas Capens47dc8672017-04-25 12:54:39 -04005715 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5716 }
5717
5718 source0 += pitch;
5719 source1 += pitch;
5720 source2 += pitch;
5721 source3 += pitch;
5722 source4 += pitch;
5723 source5 += pitch;
5724 source6 += pitch;
5725 source7 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005726 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005727 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005728 else if(internal.samples == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005729 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005730 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005731 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005732 for(int x = 0; x < width; x += 8)
5733 {
5734 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5735 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5736 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5737 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5738 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5739 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5740 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5741 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5742 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5743 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5744 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5745 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5746 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5747 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5748 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5749 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005750
Nicolas Capens47dc8672017-04-25 12:54:39 -04005751 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5752 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5753 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5754 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5755 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5756 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5757 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5758 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5759 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5760 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5761 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5762 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5763 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5764 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5765 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5766 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5767 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5768 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5769 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5770 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5771 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5772 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5773 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5774 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5775 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5776 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5777 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5778 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
5779 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
5780 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
5781 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
5782 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
5783 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
5784 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005785
Nicolas Capens47dc8672017-04-25 12:54:39 -04005786 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5787 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5788 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5789 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5790 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
5791 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
5792 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
5793 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
5794 c0 = _mm_avg_epu8(c0, c2);
5795 c4 = _mm_avg_epu8(c4, c6);
5796 c8 = _mm_avg_epu8(c8, cA);
5797 cC = _mm_avg_epu8(cC, cE);
5798 c0 = _mm_avg_epu8(c0, c4);
5799 c8 = _mm_avg_epu8(c8, cC);
5800 c0 = _mm_avg_epu8(c0, c8);
5801 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5802 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5803 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5804 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5805 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5806 c9 = _mm_avg_epu16(c8__g_, c9__g_);
5807 cB = _mm_avg_epu16(cA__g_, cB__g_);
5808 cD = _mm_avg_epu16(cC__g_, cD__g_);
5809 cF = _mm_avg_epu16(cE__g_, cF__g_);
5810 c1 = _mm_avg_epu8(c1, c3);
5811 c5 = _mm_avg_epu8(c5, c7);
5812 c9 = _mm_avg_epu8(c9, cB);
5813 cD = _mm_avg_epu8(cD, cF);
5814 c1 = _mm_avg_epu8(c1, c5);
5815 c9 = _mm_avg_epu8(c9, cD);
5816 c1 = _mm_avg_epu8(c1, c9);
5817 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5818 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005819
Nicolas Capens47dc8672017-04-25 12:54:39 -04005820 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5821 }
5822
5823 source0 += pitch;
5824 source1 += pitch;
5825 source2 += pitch;
5826 source3 += pitch;
5827 source4 += pitch;
5828 source5 += pitch;
5829 source6 += pitch;
5830 source7 += pitch;
5831 source8 += pitch;
5832 source9 += pitch;
5833 sourceA += pitch;
5834 sourceB += pitch;
5835 sourceC += pitch;
5836 sourceD += pitch;
5837 sourceE += pitch;
5838 sourceF += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005839 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005840 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005841 else ASSERT(false);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005842 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005843 else
5844 #endif
Nicolas Capens0e12a922015-09-04 09:18:15 -04005845 {
5846 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
5847
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005848 if(internal.samples == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005849 {
5850 for(int y = 0; y < height; y++)
5851 {
5852 for(int x = 0; x < width; x++)
5853 {
5854 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5855 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5856
5857 c0 = AVERAGE(c0, c1);
5858
5859 *(unsigned short*)(source0 + 2 * x) = c0;
5860 }
5861
5862 source0 += pitch;
5863 source1 += pitch;
5864 }
5865 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005866 else if(internal.samples == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005867 {
5868 for(int y = 0; y < height; y++)
5869 {
5870 for(int x = 0; x < width; x++)
5871 {
5872 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5873 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5874 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5875 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5876
5877 c0 = AVERAGE(c0, c1);
5878 c2 = AVERAGE(c2, c3);
5879 c0 = AVERAGE(c0, c2);
5880
5881 *(unsigned short*)(source0 + 2 * x) = c0;
5882 }
5883
5884 source0 += pitch;
5885 source1 += pitch;
5886 source2 += pitch;
5887 source3 += pitch;
5888 }
5889 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005890 else if(internal.samples == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005891 {
5892 for(int y = 0; y < height; y++)
5893 {
5894 for(int x = 0; x < width; x++)
5895 {
5896 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5897 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5898 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5899 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5900 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5901 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5902 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5903 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5904
5905 c0 = AVERAGE(c0, c1);
5906 c2 = AVERAGE(c2, c3);
5907 c4 = AVERAGE(c4, c5);
5908 c6 = AVERAGE(c6, c7);
5909 c0 = AVERAGE(c0, c2);
5910 c4 = AVERAGE(c4, c6);
5911 c0 = AVERAGE(c0, c4);
5912
5913 *(unsigned short*)(source0 + 2 * x) = c0;
5914 }
5915
5916 source0 += pitch;
5917 source1 += pitch;
5918 source2 += pitch;
5919 source3 += pitch;
5920 source4 += pitch;
5921 source5 += pitch;
5922 source6 += pitch;
5923 source7 += pitch;
5924 }
5925 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005926 else if(internal.samples == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005927 {
5928 for(int y = 0; y < height; y++)
5929 {
5930 for(int x = 0; x < width; x++)
5931 {
5932 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5933 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5934 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5935 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5936 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5937 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5938 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5939 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5940 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
5941 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
5942 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
5943 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
5944 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
5945 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
5946 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
5947 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
5948
5949 c0 = AVERAGE(c0, c1);
5950 c2 = AVERAGE(c2, c3);
5951 c4 = AVERAGE(c4, c5);
5952 c6 = AVERAGE(c6, c7);
5953 c8 = AVERAGE(c8, c9);
5954 cA = AVERAGE(cA, cB);
5955 cC = AVERAGE(cC, cD);
5956 cE = AVERAGE(cE, cF);
5957 c0 = AVERAGE(c0, c2);
5958 c4 = AVERAGE(c4, c6);
5959 c8 = AVERAGE(c8, cA);
5960 cC = AVERAGE(cC, cE);
5961 c0 = AVERAGE(c0, c4);
5962 c8 = AVERAGE(c8, cC);
5963 c0 = AVERAGE(c0, c8);
5964
5965 *(unsigned short*)(source0 + 2 * x) = c0;
5966 }
5967
5968 source0 += pitch;
5969 source1 += pitch;
5970 source2 += pitch;
5971 source3 += pitch;
5972 source4 += pitch;
5973 source5 += pitch;
5974 source6 += pitch;
5975 source7 += pitch;
5976 source8 += pitch;
5977 source9 += pitch;
5978 sourceA += pitch;
5979 sourceB += pitch;
5980 sourceC += pitch;
5981 sourceD += pitch;
5982 sourceE += pitch;
5983 sourceF += pitch;
5984 }
5985 }
5986 else ASSERT(false);
5987
5988 #undef AVERAGE
5989 }
5990 }
John Bauman89401822014-05-06 15:04:28 -04005991 else
5992 {
5993 // UNIMPLEMENTED();
5994 }
5995 }
5996}