1 /* 2 * License for Berkeley SoftFloat Release 3e 3 * 4 * John R. Hauser 5 * 2018 January 20 6 * 7 * The following applies to the whole of SoftFloat Release 3e as well as to 8 * each source file individually. 9 * 10 * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the 11 * University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions are met: 15 * 16 * 1. Redistributions of source code must retain the above copyright notice, 17 * this list of conditions, and the following disclaimer. 18 * 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions, and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * 3. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY 28 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE 30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 31 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 32 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 33 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 34 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 36 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 * 38 * 39 * The functions listed in this file are modified versions of the ones 40 * from the Berkeley SoftFloat 3e Library. 41 * 42 * Their implementation correctness has been checked with the Berkeley 43 * TestFloat Release 3e tool for x86_64. 44 */ 45 46 #include "rounding.h" 47 #include "bitscan.h" 48 #include "softfloat.h" 49 50 #if defined(BIG_ENDIAN) 51 #define word_incr -1 52 #define index_word(total, n) ((total) - 1 - (n)) 53 #define index_word_hi(total) 0 54 #define index_word_lo(total) ((total) - 1) 55 #define index_multiword_hi(total, n) 0 56 #define index_multiword_lo(total, n) ((total) - (n)) 57 #define index_multiword_hi_but(total, n) 0 58 #define index_multiword_lo_but(total, n) (n) 59 #else 60 #define word_incr 1 61 #define index_word(total, n) (n) 62 #define index_word_hi(total) ((total) - 1) 63 #define index_word_lo(total) 0 64 #define index_multiword_hi(total, n) ((total) - (n)) 65 #define index_multiword_lo(total, n) 0 66 #define index_multiword_hi_but(total, n) (n) 67 #define index_multiword_lo_but(total, n) 0 68 #endif 69 70 typedef union { double f; int64_t i; uint64_t u; } di_type; 71 typedef union { float f; int32_t i; uint32_t u; } fi_type; 72 73 const uint8_t count_leading_zeros8[256] = { 74 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 75 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 82 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 90 }; 91 92 /** 93 * \brief Shifts 'a' right by the number of bits given in 'dist', which must be in 94 * the range 1 to 63. If any nonzero bits are shifted off, they are "jammed" 95 * into the least-significant bit of the shifted value by setting the 96 * least-significant bit to 1. This shifted-and-jammed value is returned. 97 * 98 * From softfloat_shortShiftRightJam64() 99 */ 100 static inline _mesa_short_shift_right_jam64(uint64_t a,uint8_t dist)101 uint64_t _mesa_short_shift_right_jam64(uint64_t a, uint8_t dist) 102 { 103 return a >> dist | ((a & (((uint64_t) 1 << dist) - 1)) != 0); 104 } 105 106 /** 107 * \brief Shifts 'a' right by the number of bits given in 'dist', which must not 108 * be zero. If any nonzero bits are shifted off, they are "jammed" into the 109 * least-significant bit of the shifted value by setting the least-significant 110 * bit to 1. This shifted-and-jammed value is returned. 111 * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is 112 * greater than 64, the result will be either 0 or 1, depending on whether 'a' 113 * is zero or nonzero. 114 * 115 * From softfloat_shiftRightJam64() 116 */ 117 static inline _mesa_shift_right_jam64(uint64_t a,uint32_t dist)118 uint64_t _mesa_shift_right_jam64(uint64_t a, uint32_t dist) 119 { 120 return 121 (dist < 63) ? a >> dist | ((uint64_t) (a << (-dist & 63)) != 0) : (a != 0); 122 } 123 124 /** 125 * \brief Shifts 'a' right by the number of bits given in 'dist', which must not be 126 * zero. If any nonzero bits are shifted off, they are "jammed" into the 127 * least-significant bit of the shifted value by setting the least-significant 128 * bit to 1. This shifted-and-jammed value is returned. 129 * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is 130 * greater than 32, the result will be either 0 or 1, depending on whether 'a' 131 * is zero or nonzero. 132 * 133 * From softfloat_shiftRightJam32() 134 */ 135 static inline _mesa_shift_right_jam32(uint32_t a,uint16_t dist)136 uint32_t _mesa_shift_right_jam32(uint32_t a, uint16_t dist) 137 { 138 return 139 (dist < 31) ? a >> dist | ((uint32_t) (a << (-dist & 31)) != 0) : (a != 0); 140 } 141 142 /** 143 * \brief Extracted from softfloat_roundPackToF64() 144 */ 145 static inline _mesa_roundtozero_f64(int64_t s,int64_t e,int64_t m)146 double _mesa_roundtozero_f64(int64_t s, int64_t e, int64_t m) 147 { 148 di_type result; 149 150 if ((uint64_t) e >= 0x7fd) { 151 if (e < 0) { 152 m = _mesa_shift_right_jam64(m, -e); 153 e = 0; 154 } else if ((e > 0x7fd) || (0x8000000000000000 <= m)) { 155 e = 0x7ff; 156 m = 0; 157 result.u = (s << 63) + (e << 52) + m; 158 result.u -= 1; 159 return result.f; 160 } 161 } 162 163 m >>= 10; 164 if (m == 0) 165 e = 0; 166 167 result.u = (s << 63) + (e << 52) + m; 168 return result.f; 169 } 170 171 /** 172 * \brief Extracted from softfloat_roundPackToF32() 173 */ 174 static inline _mesa_round_f32(int32_t s,int32_t e,int32_t m,bool rtz)175 float _mesa_round_f32(int32_t s, int32_t e, int32_t m, bool rtz) 176 { 177 fi_type result; 178 uint8_t round_increment = rtz ? 0 : 0x40; 179 180 if ((uint32_t) e >= 0xfd) { 181 if (e < 0) { 182 m = _mesa_shift_right_jam32(m, -e); 183 e = 0; 184 } else if ((e > 0xfd) || (0x80000000 <= m + round_increment)) { 185 e = 0xff; 186 m = 0; 187 result.u = (s << 31) + (e << 23) + m; 188 result.u -= !round_increment; 189 return result.f; 190 } 191 } 192 193 uint8_t round_bits; 194 round_bits = m & 0x7f; 195 m = ((uint32_t) m + round_increment) >> 7; 196 m &= ~(uint32_t) (! (round_bits ^ 0x40) & !rtz); 197 if (m == 0) 198 e = 0; 199 200 result.u = (s << 31) + (e << 23) + m; 201 return result.f; 202 } 203 204 /** 205 * \brief Extracted from softfloat_roundPackToF16() 206 */ 207 static inline _mesa_roundtozero_f16(int16_t s,int16_t e,int16_t m)208 uint16_t _mesa_roundtozero_f16(int16_t s, int16_t e, int16_t m) 209 { 210 if ((uint16_t) e >= 0x1d) { 211 if (e < 0) { 212 m = _mesa_shift_right_jam32(m, -e); 213 e = 0; 214 } else if (e > 0x1d) { 215 e = 0x1f; 216 m = 0; 217 return (s << 15) + (e << 10) + m - 1; 218 } 219 } 220 221 m >>= 4; 222 if (m == 0) 223 e = 0; 224 225 return (s << 15) + (e << 10) + m; 226 } 227 228 /** 229 * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of 230 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 231 * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The 232 * shifted N-bit result is stored at the location pointed to by 'm_out'. Each 233 * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements 234 * that concatenate in the platform's normal endian order to form an N-bit 235 * integer. 236 * 237 * From softfloat_shortShiftLeftM() 238 */ 239 static inline void _mesa_short_shift_left_m(uint8_t size_words,const uint32_t * a,uint8_t dist,uint32_t * m_out)240 _mesa_short_shift_left_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 241 { 242 uint8_t neg_dist; 243 unsigned index, last_index; 244 uint32_t part_word, a_word; 245 246 neg_dist = -dist; 247 index = index_word_hi(size_words); 248 last_index = index_word_lo(size_words); 249 part_word = a[index] << dist; 250 while (index != last_index) { 251 a_word = a[index - word_incr]; 252 m_out[index] = part_word | a_word >> (neg_dist & 31); 253 index -= word_incr; 254 part_word = a_word << dist; 255 } 256 m_out[index] = part_word; 257 } 258 259 /** 260 * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of 261 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 262 * must not be zero. Any nonzero bits shifted off are lost. The shifted 263 * N-bit result is stored at the location pointed to by 'm_out'. Each of 'a' 264 * and 'm_out' points to a 'size_words'-long array of 32-bit elements that 265 * concatenate in the platform's normal endian order to form an N-bit 266 * integer. The value of 'dist' can be arbitrarily large. In particular, if 267 * 'dist' is greater than N, the stored result will be 0. 268 * 269 * From softfloat_shiftLeftM() 270 */ 271 static inline void _mesa_shift_left_m(uint8_t size_words,const uint32_t * a,uint32_t dist,uint32_t * m_out)272 _mesa_shift_left_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) 273 { 274 uint32_t word_dist; 275 uint8_t inner_dist; 276 uint8_t i; 277 278 word_dist = dist >> 5; 279 if (word_dist < size_words) { 280 a += index_multiword_lo_but(size_words, word_dist); 281 inner_dist = dist & 31; 282 if (inner_dist) { 283 _mesa_short_shift_left_m(size_words - word_dist, a, inner_dist, 284 m_out + index_multiword_hi_but(size_words, word_dist)); 285 if (!word_dist) 286 return; 287 } else { 288 uint32_t *dest = m_out + index_word_hi(size_words); 289 a += index_word_hi(size_words - word_dist); 290 for (i = size_words - word_dist; i; --i) { 291 *dest = *a; 292 a -= word_incr; 293 dest -= word_incr; 294 } 295 } 296 m_out += index_multiword_lo(size_words, word_dist); 297 } else { 298 word_dist = size_words; 299 } 300 do { 301 *m_out++ = 0; 302 --word_dist; 303 } while (word_dist); 304 } 305 306 /** 307 * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 308 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 309 * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The 310 * shifted N-bit result is stored at the location pointed to by 'm_out'. Each 311 * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements 312 * that concatenate in the platform's normal endian order to form an N-bit 313 * integer. 314 * 315 * From softfloat_shortShiftRightM() 316 */ 317 static inline void _mesa_short_shift_right_m(uint8_t size_words,const uint32_t * a,uint8_t dist,uint32_t * m_out)318 _mesa_short_shift_right_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 319 { 320 uint8_t neg_dist; 321 unsigned index, last_index; 322 uint32_t part_word, a_word; 323 324 neg_dist = -dist; 325 index = index_word_lo(size_words); 326 last_index = index_word_hi(size_words); 327 part_word = a[index] >> dist; 328 while (index != last_index) { 329 a_word = a[index + word_incr]; 330 m_out[index] = a_word << (neg_dist & 31) | part_word; 331 index += word_incr; 332 part_word = a_word >> dist; 333 } 334 m_out[index] = part_word; 335 } 336 337 /** 338 * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 339 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 340 * must be in the range 1 to 31. If any nonzero bits are shifted off, they 341 * are "jammed" into the least-significant bit of the shifted value by setting 342 * the least-significant bit to 1. This shifted-and-jammed N-bit result is 343 * stored at the location pointed to by 'm_out'. Each of 'a' and 'm_out' 344 * points to a 'size_words'-long array of 32-bit elements that concatenate in 345 * the platform's normal endian order to form an N-bit integer. 346 * 347 * 348 * From softfloat_shortShiftRightJamM() 349 */ 350 static inline void _mesa_short_shift_right_jam_m(uint8_t size_words,const uint32_t * a,uint8_t dist,uint32_t * m_out)351 _mesa_short_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 352 { 353 uint8_t neg_dist; 354 unsigned index, last_index; 355 uint64_t part_word, a_word; 356 357 neg_dist = -dist; 358 index = index_word_lo(size_words); 359 last_index = index_word_hi(size_words); 360 a_word = a[index]; 361 part_word = a_word >> dist; 362 if (part_word << dist != a_word ) 363 part_word |= 1; 364 while (index != last_index) { 365 a_word = a[index + word_incr]; 366 m_out[index] = a_word << (neg_dist & 31) | part_word; 367 index += word_incr; 368 part_word = a_word >> dist; 369 } 370 m_out[index] = part_word; 371 } 372 373 /** 374 * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 375 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 376 * must not be zero. If any nonzero bits are shifted off, they are "jammed" 377 * into the least-significant bit of the shifted value by setting the 378 * least-significant bit to 1. This shifted-and-jammed N-bit result is stored 379 * at the location pointed to by 'm_out'. Each of 'a' and 'm_out' points to a 380 * 'size_words'-long array of 32-bit elements that concatenate in the 381 * platform's normal endian order to form an N-bit integer. The value of 382 * 'dist' can be arbitrarily large. In particular, if 'dist' is greater than 383 * N, the stored result will be either 0 or 1, depending on whether the 384 * original N bits are all zeros. 385 * 386 * From softfloat_shiftRightJamM() 387 */ 388 static inline void _mesa_shift_right_jam_m(uint8_t size_words,const uint32_t * a,uint32_t dist,uint32_t * m_out)389 _mesa_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) 390 { 391 uint32_t word_jam, word_dist, *tmp; 392 uint8_t i, inner_dist; 393 394 word_jam = 0; 395 word_dist = dist >> 5; 396 tmp = NULL; 397 if (word_dist) { 398 if (size_words < word_dist) 399 word_dist = size_words; 400 tmp = (uint32_t *) (a + index_multiword_lo(size_words, word_dist)); 401 i = word_dist; 402 do { 403 word_jam = *tmp++; 404 if (word_jam) 405 break; 406 --i; 407 } while (i); 408 tmp = m_out; 409 } 410 if (word_dist < size_words) { 411 a += index_multiword_hi_but(size_words, word_dist); 412 inner_dist = dist & 31; 413 if (inner_dist) { 414 _mesa_short_shift_right_jam_m(size_words - word_dist, a, inner_dist, 415 m_out + index_multiword_lo_but(size_words, word_dist)); 416 if (!word_dist) { 417 if (word_jam) 418 m_out[index_word_lo(size_words)] |= 1; 419 return; 420 } 421 } else { 422 a += index_word_lo(size_words - word_dist); 423 tmp = m_out + index_word_lo(size_words); 424 for (i = size_words - word_dist; i; --i) { 425 *tmp = *a; 426 a += word_incr; 427 tmp += word_incr; 428 } 429 } 430 tmp = m_out + index_multiword_hi(size_words, word_dist); 431 } 432 if (tmp) { 433 do { 434 *tmp++ = 0; 435 --word_dist; 436 } while (word_dist); 437 } 438 if (word_jam) 439 m_out[index_word_lo(size_words)] |= 1; 440 } 441 442 /** 443 * \brief Calculate a + b but rounding to zero. 444 * 445 * Notice that this mainly differs from the original Berkeley SoftFloat 3e 446 * implementation in that we don't really treat NaNs, Zeroes nor the 447 * signalling flags. Any NaN is good for us and the sign of the Zero is not 448 * important. 449 * 450 * From f64_add() 451 */ 452 double _mesa_double_add_rtz(double a,double b)453 _mesa_double_add_rtz(double a, double b) 454 { 455 const di_type a_di = {a}; 456 uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 457 uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 458 uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 459 const di_type b_di = {b}; 460 uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 461 uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 462 uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 463 int64_t s, e, m = 0; 464 465 s = a_flt_s; 466 467 const int64_t exp_diff = a_flt_e - b_flt_e; 468 469 /* Handle special cases */ 470 471 if (a_flt_s != b_flt_s) { 472 return _mesa_double_sub_rtz(a, -b); 473 } else if ((a_flt_e == 0) && (a_flt_m == 0)) { 474 /* 'a' is zero, return 'b' */ 475 return b; 476 } else if ((b_flt_e == 0) && (b_flt_m == 0)) { 477 /* 'b' is zero, return 'a' */ 478 return a; 479 } else if (a_flt_e == 0x7ff && a_flt_m != 0) { 480 /* 'a' is a NaN, return NaN */ 481 return a; 482 } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 483 /* 'b' is a NaN, return NaN */ 484 return b; 485 } else if (a_flt_e == 0x7ff && a_flt_m == 0) { 486 /* Inf + x = Inf */ 487 return a; 488 } else if (b_flt_e == 0x7ff && b_flt_m == 0) { 489 /* x + Inf = Inf */ 490 return b; 491 } else if (exp_diff == 0 && a_flt_e == 0) { 492 di_type result_di; 493 result_di.u = a_di.u + b_flt_m; 494 return result_di.f; 495 } else if (exp_diff == 0) { 496 e = a_flt_e; 497 m = 0x0020000000000000 + a_flt_m + b_flt_m; 498 m <<= 9; 499 } else if (exp_diff < 0) { 500 a_flt_m <<= 9; 501 b_flt_m <<= 9; 502 e = b_flt_e; 503 504 if (a_flt_e != 0) 505 a_flt_m += 0x2000000000000000; 506 else 507 a_flt_m <<= 1; 508 509 a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); 510 m = 0x2000000000000000 + a_flt_m + b_flt_m; 511 if (m < 0x4000000000000000) { 512 --e; 513 m <<= 1; 514 } 515 } else { 516 a_flt_m <<= 9; 517 b_flt_m <<= 9; 518 e = a_flt_e; 519 520 if (b_flt_e != 0) 521 b_flt_m += 0x2000000000000000; 522 else 523 b_flt_m <<= 1; 524 525 b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); 526 m = 0x2000000000000000 + a_flt_m + b_flt_m; 527 if (m < 0x4000000000000000) { 528 --e; 529 m <<= 1; 530 } 531 } 532 533 return _mesa_roundtozero_f64(s, e, m); 534 } 535 536 /** 537 * \brief Returns the number of leading 0 bits before the most-significant 1 bit of 538 * 'a'. If 'a' is zero, 64 is returned. 539 */ 540 static inline unsigned _mesa_count_leading_zeros64(uint64_t a)541 _mesa_count_leading_zeros64(uint64_t a) 542 { 543 return 64 - util_last_bit64(a); 544 } 545 546 /** 547 * \brief Returns the number of leading 0 bits before the most-significant 1 bit of 548 * 'a'. If 'a' is zero, 32 is returned. 549 */ 550 static inline unsigned _mesa_count_leading_zeros32(uint32_t a)551 _mesa_count_leading_zeros32(uint32_t a) 552 { 553 return 32 - util_last_bit(a); 554 } 555 556 static inline double _mesa_norm_round_pack_f64(int64_t s,int64_t e,int64_t m)557 _mesa_norm_round_pack_f64(int64_t s, int64_t e, int64_t m) 558 { 559 int8_t shift_dist; 560 561 shift_dist = _mesa_count_leading_zeros64(m) - 1; 562 e -= shift_dist; 563 if ((10 <= shift_dist) && ((unsigned) e < 0x7fd)) { 564 di_type result; 565 result.u = (s << 63) + ((m ? e : 0) << 52) + (m << (shift_dist - 10)); 566 return result.f; 567 } else { 568 return _mesa_roundtozero_f64(s, e, m << shift_dist); 569 } 570 } 571 572 /** 573 * \brief Replaces the N-bit unsigned integer pointed to by 'm_out' by the 574 * 2s-complement of itself, where N = 'size_words' * 32. Argument 'm_out' 575 * points to a 'size_words'-long array of 32-bit elements that concatenate in 576 * the platform's normal endian order to form an N-bit integer. 577 * 578 * From softfloat_negXM() 579 */ 580 static inline void _mesa_neg_x_m(uint8_t size_words,uint32_t * m_out)581 _mesa_neg_x_m(uint8_t size_words, uint32_t *m_out) 582 { 583 unsigned index, last_index; 584 uint8_t carry; 585 uint32_t word; 586 587 index = index_word_lo(size_words); 588 last_index = index_word_hi(size_words); 589 carry = 1; 590 for (;;) { 591 word = ~m_out[index] + carry; 592 m_out[index] = word; 593 if (index == last_index) 594 break; 595 index += word_incr; 596 if (word) 597 carry = 0; 598 } 599 } 600 601 /** 602 * \brief Adds the two N-bit integers pointed to by 'a' and 'b', where N = 603 * 'size_words' * 32. The addition is modulo 2^N, so any carry out is 604 * lost. The N-bit sum is stored at the location pointed to by 'm_out'. Each 605 * of 'a', 'b', and 'm_out' points to a 'size_words'-long array of 32-bit 606 * elements that concatenate in the platform's normal endian order to form an 607 * N-bit integer. 608 * 609 * From softfloat_addM() 610 */ 611 static inline void _mesa_add_m(uint8_t size_words,const uint32_t * a,const uint32_t * b,uint32_t * m_out)612 _mesa_add_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) 613 { 614 unsigned index, last_index; 615 uint8_t carry; 616 uint32_t a_word, word; 617 618 index = index_word_lo(size_words); 619 last_index = index_word_hi(size_words); 620 carry = 0; 621 for (;;) { 622 a_word = a[index]; 623 word = a_word + b[index] + carry; 624 m_out[index] = word; 625 if (index == last_index) 626 break; 627 if (word != a_word) 628 carry = (word < a_word); 629 index += word_incr; 630 } 631 } 632 633 /** 634 * \brief Subtracts the two N-bit integers pointed to by 'a' and 'b', where N = 635 * 'size_words' * 32. The subtraction is modulo 2^N, so any borrow out (carry 636 * out) is lost. The N-bit difference is stored at the location pointed to by 637 * 'm_out'. Each of 'a', 'b', and 'm_out' points to a 'size_words'-long array 638 * of 32-bit elements that concatenate in the platform's normal endian order 639 * to form an N-bit integer. 640 * 641 * From softfloat_subM() 642 */ 643 static inline void _mesa_sub_m(uint8_t size_words,const uint32_t * a,const uint32_t * b,uint32_t * m_out)644 _mesa_sub_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) 645 { 646 unsigned index, last_index; 647 uint8_t borrow; 648 uint32_t a_word, b_word; 649 650 index = index_word_lo(size_words); 651 last_index = index_word_hi(size_words); 652 borrow = 0; 653 for (;;) { 654 a_word = a[index]; 655 b_word = b[index]; 656 m_out[index] = a_word - b_word - borrow; 657 if (index == last_index) 658 break; 659 borrow = borrow ? (a_word <= b_word) : (a_word < b_word); 660 index += word_incr; 661 } 662 } 663 664 /* Calculate a - b but rounding to zero. 665 * 666 * Notice that this mainly differs from the original Berkeley SoftFloat 3e 667 * implementation in that we don't really treat NaNs, Zeroes nor the 668 * signalling flags. Any NaN is good for us and the sign of the Zero is not 669 * important. 670 * 671 * From f64_sub() 672 */ 673 double _mesa_double_sub_rtz(double a,double b)674 _mesa_double_sub_rtz(double a, double b) 675 { 676 const di_type a_di = {a}; 677 uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 678 uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 679 uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 680 const di_type b_di = {b}; 681 uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 682 uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 683 uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 684 int64_t s, e, m = 0; 685 int64_t m_diff = 0; 686 unsigned shift_dist = 0; 687 688 s = a_flt_s; 689 690 const int64_t exp_diff = a_flt_e - b_flt_e; 691 692 /* Handle special cases */ 693 694 if (a_flt_s != b_flt_s) { 695 return _mesa_double_add_rtz(a, -b); 696 } else if ((a_flt_e == 0) && (a_flt_m == 0)) { 697 /* 'a' is zero, return '-b' */ 698 return -b; 699 } else if ((b_flt_e == 0) && (b_flt_m == 0)) { 700 /* 'b' is zero, return 'a' */ 701 return a; 702 } else if (a_flt_e == 0x7ff && a_flt_m != 0) { 703 /* 'a' is a NaN, return NaN */ 704 return a; 705 } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 706 /* 'b' is a NaN, return NaN */ 707 return b; 708 } else if (a_flt_e == 0x7ff && a_flt_m == 0) { 709 if (b_flt_e == 0x7ff && b_flt_m == 0) { 710 /* Inf - Inf = NaN */ 711 di_type result; 712 e = 0x7ff; 713 result.u = (s << 63) + (e << 52) + 0x1; 714 return result.f; 715 } 716 /* Inf - x = Inf */ 717 return a; 718 } else if (b_flt_e == 0x7ff && b_flt_m == 0) { 719 /* x - Inf = -Inf */ 720 return -b; 721 } else if (exp_diff == 0) { 722 m_diff = a_flt_m - b_flt_m; 723 724 if (m_diff == 0) 725 return 0; 726 if (a_flt_e) 727 --a_flt_e; 728 if (m_diff < 0) { 729 s = !s; 730 m_diff = -m_diff; 731 } 732 733 shift_dist = _mesa_count_leading_zeros64(m_diff) - 11; 734 e = a_flt_e - shift_dist; 735 if (e < 0) { 736 shift_dist = a_flt_e; 737 e = 0; 738 } 739 740 di_type result; 741 result.u = (s << 63) + (e << 52) + (m_diff << shift_dist); 742 return result.f; 743 } else if (exp_diff < 0) { 744 a_flt_m <<= 10; 745 b_flt_m <<= 10; 746 s = !s; 747 748 a_flt_m += (a_flt_e) ? 0x4000000000000000 : a_flt_m; 749 a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); 750 b_flt_m |= 0x4000000000000000; 751 e = b_flt_e; 752 m = b_flt_m - a_flt_m; 753 } else { 754 a_flt_m <<= 10; 755 b_flt_m <<= 10; 756 757 b_flt_m += (b_flt_e) ? 0x4000000000000000 : b_flt_m; 758 b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); 759 a_flt_m |= 0x4000000000000000; 760 e = a_flt_e; 761 m = a_flt_m - b_flt_m; 762 } 763 764 return _mesa_norm_round_pack_f64(s, e - 1, m); 765 } 766 767 static inline void _mesa_norm_subnormal_mantissa_f64(uint64_t m,uint64_t * exp,uint64_t * m_out)768 _mesa_norm_subnormal_mantissa_f64(uint64_t m, uint64_t *exp, uint64_t *m_out) 769 { 770 int shift_dist; 771 772 shift_dist = _mesa_count_leading_zeros64(m) - 11; 773 *exp = 1 - shift_dist; 774 *m_out = m << shift_dist; 775 } 776 777 static inline void _mesa_norm_subnormal_mantissa_f32(uint32_t m,uint32_t * exp,uint32_t * m_out)778 _mesa_norm_subnormal_mantissa_f32(uint32_t m, uint32_t *exp, uint32_t *m_out) 779 { 780 int shift_dist; 781 782 shift_dist = _mesa_count_leading_zeros32(m) - 8; 783 *exp = 1 - shift_dist; 784 *m_out = m << shift_dist; 785 } 786 787 /** 788 * \brief Multiplies 'a' and 'b' and stores the 128-bit product at the location 789 * pointed to by 'zPtr'. Argument 'zPtr' points to an array of four 32-bit 790 * elements that concatenate in the platform's normal endian order to form a 791 * 128-bit integer. 792 * 793 * From softfloat_mul64To128M() 794 */ 795 static inline void _mesa_softfloat_mul_f64_to_f128_m(uint64_t a,uint64_t b,uint32_t * m_out)796 _mesa_softfloat_mul_f64_to_f128_m(uint64_t a, uint64_t b, uint32_t *m_out) 797 { 798 uint32_t a32, a0, b32, b0; 799 uint64_t z0, mid1, z64, mid; 800 801 a32 = a >> 32; 802 a0 = a; 803 b32 = b >> 32; 804 b0 = b; 805 z0 = (uint64_t) a0 * b0; 806 mid1 = (uint64_t) a32 * b0; 807 mid = mid1 + (uint64_t) a0 * b32; 808 z64 = (uint64_t) a32 * b32; 809 z64 += (uint64_t) (mid < mid1) << 32 | mid >> 32; 810 mid <<= 32; 811 z0 += mid; 812 m_out[index_word(4, 1)] = z0 >> 32; 813 m_out[index_word(4, 0)] = z0; 814 z64 += (z0 < mid); 815 m_out[index_word(4, 3)] = z64 >> 32; 816 m_out[index_word(4, 2)] = z64; 817 } 818 819 /* Calculate a * b but rounding to zero. 820 * 821 * Notice that this mainly differs from the original Berkeley SoftFloat 3e 822 * implementation in that we don't really treat NaNs, Zeroes nor the 823 * signalling flags. Any NaN is good for us and the sign of the Zero is not 824 * important. 825 * 826 * From f64_mul() 827 */ 828 double _mesa_double_mul_rtz(double a,double b)829 _mesa_double_mul_rtz(double a, double b) 830 { 831 const di_type a_di = {a}; 832 uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 833 uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 834 uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 835 const di_type b_di = {b}; 836 uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 837 uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 838 uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 839 int64_t s, e, m = 0; 840 841 s = a_flt_s ^ b_flt_s; 842 843 if (a_flt_e == 0x7ff) { 844 if (a_flt_m != 0) { 845 /* 'a' is a NaN, return NaN */ 846 return a; 847 } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 848 /* 'b' is a NaN, return NaN */ 849 return b; 850 } 851 852 if (!(b_flt_e | b_flt_m)) { 853 /* Inf * 0 = NaN */ 854 di_type result; 855 e = 0x7ff; 856 result.u = (s << 63) + (e << 52) + 0x1; 857 return result.f; 858 } 859 /* Inf * x = Inf */ 860 di_type result; 861 e = 0x7ff; 862 result.u = (s << 63) + (e << 52) + 0; 863 return result.f; 864 } 865 866 if (b_flt_e == 0x7ff) { 867 if (b_flt_m != 0) { 868 /* 'b' is a NaN, return NaN */ 869 return b; 870 } 871 if (!(a_flt_e | a_flt_m)) { 872 /* 0 * Inf = NaN */ 873 di_type result; 874 e = 0x7ff; 875 result.u = (s << 63) + (e << 52) + 0x1; 876 return result.f; 877 } 878 /* x * Inf = Inf */ 879 di_type result; 880 e = 0x7ff; 881 result.u = (s << 63) + (e << 52) + 0; 882 return result.f; 883 } 884 885 if (a_flt_e == 0) { 886 if (a_flt_m == 0) { 887 /* 'a' is zero. Return zero */ 888 di_type result; 889 result.u = (s << 63) + 0; 890 return result.f; 891 } 892 _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); 893 } 894 if (b_flt_e == 0) { 895 if (b_flt_m == 0) { 896 /* 'b' is zero. Return zero */ 897 di_type result; 898 result.u = (s << 63) + 0; 899 return result.f; 900 } 901 _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); 902 } 903 904 e = a_flt_e + b_flt_e - 0x3ff; 905 a_flt_m = (a_flt_m | 0x0010000000000000) << 10; 906 b_flt_m = (b_flt_m | 0x0010000000000000) << 11; 907 908 uint32_t m_128[4]; 909 _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); 910 911 m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; 912 if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 913 m |= 1; 914 915 if (m < 0x4000000000000000) { 916 --e; 917 m <<= 1; 918 } 919 920 return _mesa_roundtozero_f64(s, e, m); 921 } 922 923 924 /** 925 * \brief Calculate a * b + c but rounding to zero. 926 * 927 * Notice that this mainly differs from the original Berkeley SoftFloat 3e 928 * implementation in that we don't really treat NaNs, Zeroes nor the 929 * signalling flags. Any NaN is good for us and the sign of the Zero is not 930 * important. 931 * 932 * From f64_mulAdd() 933 */ 934 double _mesa_double_fma_rtz(double a,double b,double c)935 _mesa_double_fma_rtz(double a, double b, double c) 936 { 937 const di_type a_di = {a}; 938 uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 939 uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 940 uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 941 const di_type b_di = {b}; 942 uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 943 uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 944 uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 945 const di_type c_di = {c}; 946 uint64_t c_flt_m = c_di.u & 0x0fffffffffffff; 947 uint64_t c_flt_e = (c_di.u >> 52) & 0x7ff; 948 uint64_t c_flt_s = (c_di.u >> 63) & 0x1; 949 int64_t s, e, m = 0; 950 951 c_flt_s ^= 0; 952 s = a_flt_s ^ b_flt_s ^ 0; 953 954 if (a_flt_e == 0x7ff) { 955 if (a_flt_m != 0) { 956 /* 'a' is a NaN, return NaN */ 957 return a; 958 } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 959 /* 'b' is a NaN, return NaN */ 960 return b; 961 } else if (c_flt_e == 0x7ff && c_flt_m != 0) { 962 /* 'c' is a NaN, return NaN */ 963 return c; 964 } 965 966 if (!(b_flt_e | b_flt_m)) { 967 /* Inf * 0 + y = NaN */ 968 di_type result; 969 e = 0x7ff; 970 result.u = (s << 63) + (e << 52) + 0x1; 971 return result.f; 972 } 973 974 if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { 975 /* Inf * x - Inf = NaN */ 976 di_type result; 977 e = 0x7ff; 978 result.u = (s << 63) + (e << 52) + 0x1; 979 return result.f; 980 } 981 982 /* Inf * x + y = Inf */ 983 di_type result; 984 e = 0x7ff; 985 result.u = (s << 63) + (e << 52) + 0; 986 return result.f; 987 } 988 989 if (b_flt_e == 0x7ff) { 990 if (b_flt_m != 0) { 991 /* 'b' is a NaN, return NaN */ 992 return b; 993 } else if (c_flt_e == 0x7ff && c_flt_m != 0) { 994 /* 'c' is a NaN, return NaN */ 995 return c; 996 } 997 998 if (!(a_flt_e | a_flt_m)) { 999 /* 0 * Inf + y = NaN */ 1000 di_type result; 1001 e = 0x7ff; 1002 result.u = (s << 63) + (e << 52) + 0x1; 1003 return result.f; 1004 } 1005 1006 if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { 1007 /* x * Inf - Inf = NaN */ 1008 di_type result; 1009 e = 0x7ff; 1010 result.u = (s << 63) + (e << 52) + 0x1; 1011 return result.f; 1012 } 1013 1014 /* x * Inf + y = Inf */ 1015 di_type result; 1016 e = 0x7ff; 1017 result.u = (s << 63) + (e << 52) + 0; 1018 return result.f; 1019 } 1020 1021 if (c_flt_e == 0x7ff) { 1022 if (c_flt_m != 0) { 1023 /* 'c' is a NaN, return NaN */ 1024 return c; 1025 } 1026 1027 /* x * y + Inf = Inf */ 1028 return c; 1029 } 1030 1031 if (a_flt_e == 0) { 1032 if (a_flt_m == 0) { 1033 /* 'a' is zero, return 'c' */ 1034 return c; 1035 } 1036 _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); 1037 } 1038 1039 if (b_flt_e == 0) { 1040 if (b_flt_m == 0) { 1041 /* 'b' is zero, return 'c' */ 1042 return c; 1043 } 1044 _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); 1045 } 1046 1047 e = a_flt_e + b_flt_e - 0x3fe; 1048 a_flt_m = (a_flt_m | 0x0010000000000000) << 10; 1049 b_flt_m = (b_flt_m | 0x0010000000000000) << 11; 1050 1051 uint32_t m_128[4]; 1052 _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); 1053 1054 m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; 1055 1056 int64_t shift_dist = 0; 1057 if (!(m & 0x4000000000000000)) { 1058 --e; 1059 shift_dist = -1; 1060 } 1061 1062 if (c_flt_e == 0) { 1063 if (c_flt_m == 0) { 1064 /* 'c' is zero, return 'a * b' */ 1065 if (shift_dist) 1066 m <<= 1; 1067 1068 if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 1069 m |= 1; 1070 return _mesa_roundtozero_f64(s, e - 1, m); 1071 } 1072 _mesa_norm_subnormal_mantissa_f64(c_flt_m , &c_flt_e, &c_flt_m); 1073 } 1074 c_flt_m = (c_flt_m | 0x0010000000000000) << 10; 1075 1076 uint32_t c_flt_m_128[4]; 1077 int64_t exp_diff = e - c_flt_e; 1078 if (exp_diff < 0) { 1079 e = c_flt_e; 1080 if ((s == c_flt_s) || (exp_diff < -1)) { 1081 shift_dist -= exp_diff; 1082 if (shift_dist) { 1083 m = _mesa_shift_right_jam64(m, shift_dist); 1084 } 1085 } else { 1086 if (!shift_dist) { 1087 _mesa_short_shift_right_m(4, m_128, 1, m_128); 1088 } 1089 } 1090 } else { 1091 if (shift_dist) 1092 _mesa_add_m(4, m_128, m_128, m_128); 1093 if (!exp_diff) { 1094 m = (uint64_t) m_128[index_word(4, 3)] << 32 1095 | m_128[index_word(4, 2)]; 1096 } else { 1097 c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; 1098 c_flt_m_128[index_word(4, 2)] = c_flt_m; 1099 c_flt_m_128[index_word(4, 1)] = 0; 1100 c_flt_m_128[index_word(4, 0)] = 0; 1101 _mesa_shift_right_jam_m(4, c_flt_m_128, exp_diff, c_flt_m_128); 1102 } 1103 } 1104 1105 if (s == c_flt_s) { 1106 if (exp_diff <= 0) { 1107 m += c_flt_m; 1108 } else { 1109 _mesa_add_m(4, m_128, c_flt_m_128, m_128); 1110 m = (uint64_t) m_128[index_word(4, 3)] << 32 1111 | m_128[index_word(4, 2)]; 1112 } 1113 if (m & 0x8000000000000000) { 1114 e++; 1115 m = _mesa_short_shift_right_jam64(m, 1); 1116 } 1117 } else { 1118 if (exp_diff < 0) { 1119 s = c_flt_s; 1120 if (exp_diff < -1) { 1121 m = c_flt_m - m; 1122 if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) { 1123 m = (m - 1) | 1; 1124 } 1125 if (!(m & 0x4000000000000000)) { 1126 --e; 1127 m <<= 1; 1128 } 1129 return _mesa_roundtozero_f64(s, e - 1, m); 1130 } else { 1131 c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; 1132 c_flt_m_128[index_word(4, 2)] = c_flt_m; 1133 c_flt_m_128[index_word(4, 1)] = 0; 1134 c_flt_m_128[index_word(4, 0)] = 0; 1135 _mesa_sub_m(4, c_flt_m_128, m_128, m_128); 1136 } 1137 } else if (!exp_diff) { 1138 m -= c_flt_m; 1139 if (!m && !m_128[index_word(4, 1)] && !m_128[index_word(4, 0)]) { 1140 /* Return zero */ 1141 di_type result; 1142 result.u = (s << 63) + 0; 1143 return result.f; 1144 } 1145 m_128[index_word(4, 3)] = m >> 32; 1146 m_128[index_word(4, 2)] = m; 1147 if (m & 0x8000000000000000) { 1148 s = !s; 1149 _mesa_neg_x_m(4, m_128); 1150 } 1151 } else { 1152 _mesa_sub_m(4, m_128, c_flt_m_128, m_128); 1153 if (1 < exp_diff) { 1154 m = (uint64_t) m_128[index_word(4, 3)] << 32 1155 | m_128[index_word(4, 2)]; 1156 if (!(m & 0x4000000000000000)) { 1157 --e; 1158 m <<= 1; 1159 } 1160 if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 1161 m |= 1; 1162 return _mesa_roundtozero_f64(s, e - 1, m); 1163 } 1164 } 1165 1166 shift_dist = 0; 1167 m = (uint64_t) m_128[index_word(4, 3)] << 32 1168 | m_128[index_word(4, 2)]; 1169 if (!m) { 1170 shift_dist = 64; 1171 m = (uint64_t) m_128[index_word(4, 1)] << 32 1172 | m_128[index_word(4, 0)]; 1173 } 1174 shift_dist += _mesa_count_leading_zeros64(m) - 1; 1175 if (shift_dist) { 1176 e -= shift_dist; 1177 _mesa_shift_left_m(4, m_128, shift_dist, m_128); 1178 m = (uint64_t) m_128[index_word(4, 3)] << 32 1179 | m_128[index_word(4, 2)]; 1180 } 1181 } 1182 1183 if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 1184 m |= 1; 1185 return _mesa_roundtozero_f64(s, e - 1, m); 1186 } 1187 1188 1189 /** 1190 * \brief Calculate a * b + c but rounding to zero. 1191 * 1192 * Notice that this mainly differs from the original Berkeley SoftFloat 3e 1193 * implementation in that we don't really treat NaNs, Zeroes nor the 1194 * signalling flags. Any NaN is good for us and the sign of the Zero is not 1195 * important. 1196 * 1197 * From f32_mulAdd() 1198 */ 1199 float _mesa_float_fma_rtz(float a,float b,float c)1200 _mesa_float_fma_rtz(float a, float b, float c) 1201 { 1202 const fi_type a_fi = {a}; 1203 uint32_t a_flt_m = a_fi.u & 0x07fffff; 1204 uint32_t a_flt_e = (a_fi.u >> 23) & 0xff; 1205 uint32_t a_flt_s = (a_fi.u >> 31) & 0x1; 1206 const fi_type b_fi = {b}; 1207 uint32_t b_flt_m = b_fi.u & 0x07fffff; 1208 uint32_t b_flt_e = (b_fi.u >> 23) & 0xff; 1209 uint32_t b_flt_s = (b_fi.u >> 31) & 0x1; 1210 const fi_type c_fi = {c}; 1211 uint32_t c_flt_m = c_fi.u & 0x07fffff; 1212 uint32_t c_flt_e = (c_fi.u >> 23) & 0xff; 1213 uint32_t c_flt_s = (c_fi.u >> 31) & 0x1; 1214 int32_t s, e, m = 0; 1215 1216 c_flt_s ^= 0; 1217 s = a_flt_s ^ b_flt_s ^ 0; 1218 1219 if (a_flt_e == 0xff) { 1220 if (a_flt_m != 0) { 1221 /* 'a' is a NaN, return NaN */ 1222 return a; 1223 } else if (b_flt_e == 0xff && b_flt_m != 0) { 1224 /* 'b' is a NaN, return NaN */ 1225 return b; 1226 } else if (c_flt_e == 0xff && c_flt_m != 0) { 1227 /* 'c' is a NaN, return NaN */ 1228 return c; 1229 } 1230 1231 if (!(b_flt_e | b_flt_m)) { 1232 /* Inf * 0 + y = NaN */ 1233 fi_type result; 1234 e = 0xff; 1235 result.u = (s << 31) + (e << 23) + 0x1; 1236 return result.f; 1237 } 1238 1239 if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { 1240 /* Inf * x - Inf = NaN */ 1241 fi_type result; 1242 e = 0xff; 1243 result.u = (s << 31) + (e << 23) + 0x1; 1244 return result.f; 1245 } 1246 1247 /* Inf * x + y = Inf */ 1248 fi_type result; 1249 e = 0xff; 1250 result.u = (s << 31) + (e << 23) + 0; 1251 return result.f; 1252 } 1253 1254 if (b_flt_e == 0xff) { 1255 if (b_flt_m != 0) { 1256 /* 'b' is a NaN, return NaN */ 1257 return b; 1258 } else if (c_flt_e == 0xff && c_flt_m != 0) { 1259 /* 'c' is a NaN, return NaN */ 1260 return c; 1261 } 1262 1263 if (!(a_flt_e | a_flt_m)) { 1264 /* 0 * Inf + y = NaN */ 1265 fi_type result; 1266 e = 0xff; 1267 result.u = (s << 31) + (e << 23) + 0x1; 1268 return result.f; 1269 } 1270 1271 if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { 1272 /* x * Inf - Inf = NaN */ 1273 fi_type result; 1274 e = 0xff; 1275 result.u = (s << 31) + (e << 23) + 0x1; 1276 return result.f; 1277 } 1278 1279 /* x * Inf + y = Inf */ 1280 fi_type result; 1281 e = 0xff; 1282 result.u = (s << 31) + (e << 23) + 0; 1283 return result.f; 1284 } 1285 1286 if (c_flt_e == 0xff) { 1287 if (c_flt_m != 0) { 1288 /* 'c' is a NaN, return NaN */ 1289 return c; 1290 } 1291 1292 /* x * y + Inf = Inf */ 1293 return c; 1294 } 1295 1296 if (a_flt_e == 0) { 1297 if (a_flt_m == 0) { 1298 /* 'a' is zero, return 'c' */ 1299 return c; 1300 } 1301 _mesa_norm_subnormal_mantissa_f32(a_flt_m , &a_flt_e, &a_flt_m); 1302 } 1303 1304 if (b_flt_e == 0) { 1305 if (b_flt_m == 0) { 1306 /* 'b' is zero, return 'c' */ 1307 return c; 1308 } 1309 _mesa_norm_subnormal_mantissa_f32(b_flt_m , &b_flt_e, &b_flt_m); 1310 } 1311 1312 e = a_flt_e + b_flt_e - 0x7e; 1313 a_flt_m = (a_flt_m | 0x00800000) << 7; 1314 b_flt_m = (b_flt_m | 0x00800000) << 7; 1315 1316 uint64_t m_64 = (uint64_t) a_flt_m * b_flt_m; 1317 if (m_64 < 0x2000000000000000) { 1318 --e; 1319 m_64 <<= 1; 1320 } 1321 1322 if (c_flt_e == 0) { 1323 if (c_flt_m == 0) { 1324 /* 'c' is zero, return 'a * b' */ 1325 m = _mesa_short_shift_right_jam64(m_64, 31); 1326 return _mesa_round_f32(s, e - 1, m, true); 1327 } 1328 _mesa_norm_subnormal_mantissa_f32(c_flt_m , &c_flt_e, &c_flt_m); 1329 } 1330 c_flt_m = (c_flt_m | 0x00800000) << 6; 1331 1332 int16_t exp_diff = e - c_flt_e; 1333 if (s == c_flt_s) { 1334 if (exp_diff <= 0) { 1335 e = c_flt_e; 1336 m = c_flt_m + _mesa_shift_right_jam64(m_64, 32 - exp_diff); 1337 } else { 1338 m_64 += _mesa_shift_right_jam64((uint64_t) c_flt_m << 32, exp_diff); 1339 m = _mesa_short_shift_right_jam64(m_64, 32); 1340 } 1341 if (m < 0x40000000) { 1342 --e; 1343 m <<= 1; 1344 } 1345 } else { 1346 uint64_t c_flt_m_64 = (uint64_t) c_flt_m << 32; 1347 if (exp_diff < 0) { 1348 s = c_flt_s; 1349 e = c_flt_e; 1350 m_64 = c_flt_m_64 - _mesa_shift_right_jam64(m_64, -exp_diff); 1351 } else if (!exp_diff) { 1352 m_64 -= c_flt_m_64; 1353 if (!m_64) { 1354 /* Return zero */ 1355 fi_type result; 1356 result.u = (s << 31) + 0; 1357 return result.f; 1358 } 1359 if (m_64 & 0x8000000000000000) { 1360 s = !s; 1361 m_64 = -m_64; 1362 } 1363 } else { 1364 m_64 -= _mesa_shift_right_jam64(c_flt_m_64, exp_diff); 1365 } 1366 int8_t shift_dist = _mesa_count_leading_zeros64(m_64) - 1; 1367 e -= shift_dist; 1368 shift_dist -= 32; 1369 if (shift_dist < 0) { 1370 m = _mesa_short_shift_right_jam64(m_64, -shift_dist); 1371 } else { 1372 m = (uint32_t) m_64 << shift_dist; 1373 } 1374 } 1375 1376 return _mesa_round_f32(s, e, m, true); 1377 } 1378 1379 1380 /** 1381 * \brief Converts from 64bits to 32bits float and rounds according to 1382 * instructed. 1383 * 1384 * From f64_to_f32() 1385 */ 1386 float _mesa_double_to_f32(double val,bool rtz)1387 _mesa_double_to_f32(double val, bool rtz) 1388 { 1389 const di_type di = {val}; 1390 uint64_t flt_m = di.u & 0x0fffffffffffff; 1391 uint64_t flt_e = (di.u >> 52) & 0x7ff; 1392 uint64_t flt_s = (di.u >> 63) & 0x1; 1393 int32_t s, e, m = 0; 1394 1395 s = flt_s; 1396 1397 if (flt_e == 0x7ff) { 1398 if (flt_m != 0) { 1399 /* 'val' is a NaN, return NaN */ 1400 fi_type result; 1401 e = 0xff; 1402 m = 0x1; 1403 result.u = (s << 31) + (e << 23) + m; 1404 return result.f; 1405 } 1406 1407 /* 'val' is Inf, return Inf */ 1408 fi_type result; 1409 e = 0xff; 1410 result.u = (s << 31) + (e << 23) + m; 1411 return result.f; 1412 } 1413 1414 if (!(flt_e | flt_m)) { 1415 /* 'val' is zero, return zero */ 1416 fi_type result; 1417 e = 0; 1418 result.u = (s << 31) + (e << 23) + m; 1419 return result.f; 1420 } 1421 1422 m = _mesa_short_shift_right_jam64(flt_m, 22); 1423 if ( ! (flt_e | m) ) { 1424 /* 'val' is denorm, return zero */ 1425 fi_type result; 1426 e = 0; 1427 result.u = (s << 31) + (e << 23) + m; 1428 return result.f; 1429 } 1430 1431 return _mesa_round_f32(s, flt_e - 0x381, m | 0x40000000, rtz); 1432 } 1433 1434 1435 /** 1436 * \brief Converts from 32bits to 16bits float and rounds the result to zero. 1437 * 1438 * From f32_to_f16() 1439 */ 1440 uint16_t _mesa_float_to_half_rtz_slow(float val)1441 _mesa_float_to_half_rtz_slow(float val) 1442 { 1443 const fi_type fi = {val}; 1444 const uint32_t flt_m = fi.u & 0x7fffff; 1445 const uint32_t flt_e = (fi.u >> 23) & 0xff; 1446 const uint32_t flt_s = (fi.u >> 31) & 0x1; 1447 int16_t s, e, m = 0; 1448 1449 s = flt_s; 1450 1451 if (flt_e == 0xff) { 1452 if (flt_m != 0) { 1453 /* 'val' is a NaN, return NaN */ 1454 e = 0x1f; 1455 /* Retain the top bits of a NaN to make sure that the quiet/signaling 1456 * status stays the same. 1457 */ 1458 m = flt_m >> 13; 1459 if (!m) 1460 m = 1; 1461 return (s << 15) + (e << 10) + m; 1462 } 1463 1464 /* 'val' is Inf, return Inf */ 1465 e = 0x1f; 1466 return (s << 15) + (e << 10) + m; 1467 } 1468 1469 if (!(flt_e | flt_m)) { 1470 /* 'val' is zero, return zero */ 1471 e = 0; 1472 return (s << 15) + (e << 10) + m; 1473 } 1474 1475 m = flt_m >> 9 | ((flt_m & 0x1ff) != 0); 1476 if ( ! (flt_e | m) ) { 1477 /* 'val' is denorm, return zero */ 1478 e = 0; 1479 return (s << 15) + (e << 10) + m; 1480 } 1481 1482 return _mesa_roundtozero_f16(s, flt_e - 0x71, m | 0x4000); 1483 } 1484