blob: 6f73b4d116f122336083eba3ad780dc918186886 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef sw_Half_hpp
16#define sw_Half_hpp
17
Nicolas Capens5c09b6a2019-08-07 11:13:03 -040018#include "Math.hpp"
19
Alexis Hetu42528912019-01-16 14:58:33 -050020#include <algorithm>
Chris Forbesa1cd1192019-01-17 09:14:45 -080021#include <cmath>
Alexis Hetu42528912019-01-16 14:58:33 -050022
Nicolas Capens157ba262019-12-10 17:49:14 -050023namespace sw {
24
25class half
Nicolas Capens68a82382018-10-02 13:16:55 -040026{
Nicolas Capens157ba262019-12-10 17:49:14 -050027public:
28 half() = default;
29 explicit half(float f);
30
31 operator float() const;
32
33 half &operator=(half h);
34 half &operator=(float f);
35
36private:
37 unsigned short fp16i;
38};
39
40inline half shortAsHalf(short s)
41{
42 union
Nicolas Capens68a82382018-10-02 13:16:55 -040043 {
Nicolas Capens157ba262019-12-10 17:49:14 -050044 half h;
45 short s;
46 } hs;
Nicolas Capens68a82382018-10-02 13:16:55 -040047
Nicolas Capens157ba262019-12-10 17:49:14 -050048 hs.s = s;
Nicolas Capens68a82382018-10-02 13:16:55 -040049
Nicolas Capens157ba262019-12-10 17:49:14 -050050 return hs.h;
51}
Nicolas Capens68a82382018-10-02 13:16:55 -040052
Nicolas Capens157ba262019-12-10 17:49:14 -050053class RGB9E5
54{
55 unsigned int R : 9;
56 unsigned int G : 9;
57 unsigned int B : 9;
58 unsigned int E : 5;
Nicolas Capens68a82382018-10-02 13:16:55 -040059
Nicolas Capens157ba262019-12-10 17:49:14 -050060public:
Ben Clayton595d9112019-12-17 20:37:57 +000061 RGB9E5(float rgb[3])
62 : RGB9E5(rgb[0], rgb[1], rgb[2])
Nicolas Capens68a82382018-10-02 13:16:55 -040063 {
Nicolas Capens68a82382018-10-02 13:16:55 -040064 }
65
Nicolas Capens157ba262019-12-10 17:49:14 -050066 RGB9E5(float r, float g, float b)
Nicolas Capens68a82382018-10-02 13:16:55 -040067 {
Nicolas Capens157ba262019-12-10 17:49:14 -050068 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
Nicolas Capens68a82382018-10-02 13:16:55 -040069
Nicolas Capens157ba262019-12-10 17:49:14 -050070 // B is the exponent bias (15)
71 constexpr int g_sharedexp_bias = 15;
Nicolas Capens5c09b6a2019-08-07 11:13:03 -040072
Nicolas Capens157ba262019-12-10 17:49:14 -050073 // N is the number of mantissa bits per component (9)
74 constexpr int g_sharedexp_mantissabits = 9;
Nicolas Capens5c09b6a2019-08-07 11:13:03 -040075
Nicolas Capens157ba262019-12-10 17:49:14 -050076 // Emax is the maximum allowed biased exponent value (31)
77 constexpr int g_sharedexp_maxexponent = 31;
Alexis Hetu42528912019-01-16 14:58:33 -050078
Nicolas Capens157ba262019-12-10 17:49:14 -050079 constexpr float g_sharedexp_max =
Ben Clayton595d9112019-12-17 20:37:57 +000080 ((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) /
81 static_cast<float>(1 << g_sharedexp_mantissabits)) *
82 static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias));
Alexis Hetu42528912019-01-16 14:58:33 -050083
Nicolas Capens157ba262019-12-10 17:49:14 -050084 // Clamp components to valid range. NaN becomes 0.
Ben Clayton595d9112019-12-17 20:37:57 +000085 const float red_c = std::min(!(r > 0) ? 0 : r, g_sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -050086 const float green_c = std::min(!(g > 0) ? 0 : g, g_sharedexp_max);
Ben Clayton595d9112019-12-17 20:37:57 +000087 const float blue_c = std::min(!(b > 0) ? 0 : b, g_sharedexp_max);
Alexis Hetu42528912019-01-16 14:58:33 -050088
Nicolas Capens157ba262019-12-10 17:49:14 -050089 // We're reducing the mantissa to 9 bits, so we must round up if the next
90 // bit is 1. In other words add 0.5 to the new mantissa's position and
91 // allow overflow into the exponent so we can scale correctly.
92 constexpr int half = 1 << (23 - g_sharedexp_mantissabits);
93 const float red_r = bit_cast<float>(bit_cast<int>(red_c) + half);
94 const float green_r = bit_cast<float>(bit_cast<int>(green_c) + half);
95 const float blue_r = bit_cast<float>(bit_cast<int>(blue_c) + half);
Alexis Hetu42528912019-01-16 14:58:33 -050096
Nicolas Capens157ba262019-12-10 17:49:14 -050097 // The largest component determines the shared exponent. It can't be lower
98 // than 0 (after bias subtraction) so also limit to the mimimum representable.
99 constexpr float min_s = 0.5f / (1 << g_sharedexp_bias);
100 float max_s = std::max(std::max(red_r, green_r), std::max(blue_r, min_s));
Alexis Hetu42528912019-01-16 14:58:33 -0500101
Nicolas Capens157ba262019-12-10 17:49:14 -0500102 // Obtain the reciprocal of the shared exponent by inverting the bits,
103 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
104 // format has an implicit leading 1, but this shared component format does not.
105 float scale = bit_cast<float>((bit_cast<int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (g_sharedexp_mantissabits - 2));
Alexis Hetu42528912019-01-16 14:58:33 -0500106
Nicolas Capens157ba262019-12-10 17:49:14 -0500107 R = static_cast<unsigned int>(round(red_c * scale));
108 G = static_cast<unsigned int>(round(green_c * scale));
109 B = static_cast<unsigned int>(round(blue_c * scale));
110 E = (bit_cast<unsigned int>(max_s) >> 23) - 127 + 15 + 1;
111 }
Nicolas Capens5c09b6a2019-08-07 11:13:03 -0400112
Nicolas Capens157ba262019-12-10 17:49:14 -0500113 operator unsigned int() const
Nicolas Capens68a82382018-10-02 13:16:55 -0400114 {
Ben Clayton595d9112019-12-17 20:37:57 +0000115 return *reinterpret_cast<const unsigned int *>(this);
Nicolas Capens157ba262019-12-10 17:49:14 -0500116 }
Nicolas Capens68a82382018-10-02 13:16:55 -0400117
Nicolas Capens157ba262019-12-10 17:49:14 -0500118 void toRGB16F(half rgb[3]) const
119 {
Ben Clayton595d9112019-12-17 20:37:57 +0000120 constexpr int offset = 24; // Exponent bias (15) + number of mantissa bits per component (9) = 24
Nicolas Capens157ba262019-12-10 17:49:14 -0500121
122 const float factor = (1u << E) * (1.0f / (1 << offset));
123 rgb[0] = half(R * factor);
124 rgb[1] = half(G * factor);
125 rgb[2] = half(B * factor);
126 }
127};
128
129class R11G11B10F
130{
131 unsigned int R : 11;
132 unsigned int G : 11;
133 unsigned int B : 10;
134
135 static inline half float11ToFloat16(unsigned short fp11)
136 {
Ben Clayton595d9112019-12-17 20:37:57 +0000137 return shortAsHalf(fp11 << 4); // Sign bit 0
Nicolas Capens157ba262019-12-10 17:49:14 -0500138 }
139
140 static inline half float10ToFloat16(unsigned short fp10)
141 {
Ben Clayton595d9112019-12-17 20:37:57 +0000142 return shortAsHalf(fp10 << 5); // Sign bit 0
Nicolas Capens157ba262019-12-10 17:49:14 -0500143 }
144
145 inline unsigned short float32ToFloat11(float fp32)
146 {
147 const unsigned int float32MantissaMask = 0x7FFFFF;
148 const unsigned int float32ExponentMask = 0x7F800000;
149 const unsigned int float32SignMask = 0x80000000;
150 const unsigned int float32ValueMask = ~float32SignMask;
151 const unsigned int float32ExponentFirstBit = 23;
152 const unsigned int float32ExponentBias = 127;
153
154 const unsigned short float11Max = 0x7BF;
155 const unsigned short float11MantissaMask = 0x3F;
156 const unsigned short float11ExponentMask = 0x7C0;
157 const unsigned short float11BitMask = 0x7FF;
158 const unsigned int float11ExponentBias = 14;
159
160 const unsigned int float32Maxfloat11 = 0x477E0000;
161 const unsigned int float32Minfloat11 = 0x38800000;
162
Ben Clayton595d9112019-12-17 20:37:57 +0000163 const unsigned int float32Bits = *reinterpret_cast<unsigned int *>(&fp32);
Nicolas Capens157ba262019-12-10 17:49:14 -0500164 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
165
166 unsigned int float32Val = float32Bits & float32ValueMask;
167
168 if((float32Val & float32ExponentMask) == float32ExponentMask)
Nicolas Capens68a82382018-10-02 13:16:55 -0400169 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500170 // INF or NAN
171 if((float32Val & float32MantissaMask) != 0)
Alexis Hetu42528912019-01-16 14:58:33 -0500172 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500173 return float11ExponentMask |
Ben Clayton595d9112019-12-17 20:37:57 +0000174 (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
175 float11MantissaMask);
Alexis Hetu42528912019-01-16 14:58:33 -0500176 }
177 else if(float32Sign)
178 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500179 // -INF is clamped to 0 since float11 is positive only
Alexis Hetu42528912019-01-16 14:58:33 -0500180 return 0;
181 }
Alexis Hetu42528912019-01-16 14:58:33 -0500182 else
183 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500184 return float11ExponentMask;
Alexis Hetu42528912019-01-16 14:58:33 -0500185 }
186 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500187 else if(float32Sign)
Alexis Hetu42528912019-01-16 14:58:33 -0500188 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500189 // float11 is positive only, so clamp to zero
190 return 0;
191 }
192 else if(float32Val > float32Maxfloat11)
193 {
194 // The number is too large to be represented as a float11, set to max
195 return float11Max;
196 }
197 else
198 {
199 if(float32Val < float32Minfloat11)
Alexis Hetu42528912019-01-16 14:58:33 -0500200 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500201 // The number is too small to be represented as a normalized float11
202 // Convert it to a denormalized value.
203 const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
Ben Clayton595d9112019-12-17 20:37:57 +0000204 (float32Val >> float32ExponentFirstBit);
Nicolas Capens157ba262019-12-10 17:49:14 -0500205 float32Val =
Ben Clayton595d9112019-12-17 20:37:57 +0000206 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
Nicolas Capens157ba262019-12-10 17:49:14 -0500207 }
208 else
209 {
210 // Rebias the exponent to represent the value as a normalized float11
211 float32Val += 0xC8000000;
212 }
213
214 return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
215 }
216 }
217
218 inline unsigned short float32ToFloat10(float fp32)
219 {
220 const unsigned int float32MantissaMask = 0x7FFFFF;
221 const unsigned int float32ExponentMask = 0x7F800000;
222 const unsigned int float32SignMask = 0x80000000;
223 const unsigned int float32ValueMask = ~float32SignMask;
224 const unsigned int float32ExponentFirstBit = 23;
225 const unsigned int float32ExponentBias = 127;
226
227 const unsigned short float10Max = 0x3DF;
228 const unsigned short float10MantissaMask = 0x1F;
229 const unsigned short float10ExponentMask = 0x3E0;
230 const unsigned short float10BitMask = 0x3FF;
231 const unsigned int float10ExponentBias = 14;
232
233 const unsigned int float32Maxfloat10 = 0x477C0000;
234 const unsigned int float32Minfloat10 = 0x38800000;
235
Ben Clayton595d9112019-12-17 20:37:57 +0000236 const unsigned int float32Bits = *reinterpret_cast<unsigned int *>(&fp32);
Nicolas Capens157ba262019-12-10 17:49:14 -0500237 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
238
239 unsigned int float32Val = float32Bits & float32ValueMask;
240
241 if((float32Val & float32ExponentMask) == float32ExponentMask)
242 {
243 // INF or NAN
244 if((float32Val & float32MantissaMask) != 0)
245 {
246 return float10ExponentMask |
Ben Clayton595d9112019-12-17 20:37:57 +0000247 (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
248 float10MantissaMask);
Alexis Hetu42528912019-01-16 14:58:33 -0500249 }
250 else if(float32Sign)
251 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500252 // -INF is clamped to 0 since float11 is positive only
Alexis Hetu42528912019-01-16 14:58:33 -0500253 return 0;
254 }
Alexis Hetu42528912019-01-16 14:58:33 -0500255 else
256 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500257 return float10ExponentMask;
Alexis Hetu42528912019-01-16 14:58:33 -0500258 }
259 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500260 else if(float32Sign)
Alexis Hetu42528912019-01-16 14:58:33 -0500261 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500262 // float10 is positive only, so clamp to zero
263 return 0;
Alexis Hetu42528912019-01-16 14:58:33 -0500264 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500265 else if(float32Val > float32Maxfloat10)
Alexis Hetu42528912019-01-16 14:58:33 -0500266 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500267 // The number is too large to be represented as a float11, set to max
268 return float10Max;
Alexis Hetu42528912019-01-16 14:58:33 -0500269 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500270 else
Nicolas Capens68a82382018-10-02 13:16:55 -0400271 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500272 if(float32Val < float32Minfloat10)
273 {
274 // The number is too small to be represented as a normalized float11
275 // Convert it to a denormalized value.
276 const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
Ben Clayton595d9112019-12-17 20:37:57 +0000277 (float32Val >> float32ExponentFirstBit);
Nicolas Capens157ba262019-12-10 17:49:14 -0500278 float32Val =
Ben Clayton595d9112019-12-17 20:37:57 +0000279 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
Nicolas Capens157ba262019-12-10 17:49:14 -0500280 }
281 else
282 {
283 // Rebias the exponent to represent the value as a normalized float11
284 float32Val += 0xC8000000;
285 }
286
287 return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
Nicolas Capens68a82382018-10-02 13:16:55 -0400288 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500289 }
290
291public:
292 R11G11B10F(float rgb[3])
293 {
294 R = float32ToFloat11(rgb[0]);
295 G = float32ToFloat11(rgb[1]);
296 B = float32ToFloat10(rgb[2]);
297 }
298
299 operator unsigned int() const
300 {
Ben Clayton595d9112019-12-17 20:37:57 +0000301 return *reinterpret_cast<const unsigned int *>(this);
Nicolas Capens157ba262019-12-10 17:49:14 -0500302 }
303
304 void toRGB16F(half rgb[3]) const
305 {
306 rgb[0] = float11ToFloat16(R);
307 rgb[1] = float11ToFloat16(G);
308 rgb[2] = float10ToFloat16(B);
309 }
310};
311
312} // namespace sw
Nicolas Capens68a82382018-10-02 13:16:55 -0400313
Ben Clayton595d9112019-12-17 20:37:57 +0000314#endif // sw_Half_hpp