| Mark Pizzolato | fffad7c | 2012-03-19 16:05:24 -0700 | [diff] [blame] | 1 | /* alpha_fpi.c - Alpha IEEE floating point simulator
|
| 2 |
|
| 3 | Copyright (c) 2003-2006, Robert M Supnik
|
| 4 |
|
| 5 | Permission is hereby granted, free of charge, to any person obtaining a
|
| 6 | copy of this software and associated documentation files (the "Software"),
|
| 7 | to deal in the Software without restriction, including without limitation
|
| 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 9 | and/or sell copies of the Software, and to permit persons to whom the
|
| 10 | Software is furnished to do so, subject to the following conditions:
|
| 11 |
|
| 12 | The above copyright notice and this permission notice shall be included in
|
| 13 | all copies or substantial portions of the Software.
|
| 14 |
|
| 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 18 | ROBERT M SUPNIK BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
| 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
| 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 21 |
|
| 22 | Except as contained in this notice, the name of Robert M Supnik shall not be
|
| 23 | used in advertising or otherwise to promote the sale, use or other dealings
|
| 24 | in this Software without prior written authorization from Robert M Supnik.
|
| 25 |
|
| 26 | This module contains the instruction simulators for
|
| 27 |
|
| 28 | - single precision floating point, S
|
| 29 | - double precision floating point, T
|
| 30 |
|
| 31 | Portions of this module (specifically, the convert floating to integer
|
| 32 | routine and the square root routine) are a derivative work from SoftFloat,
|
| 33 | written by John Hauser. SoftFloat includes the following license terms:
|
| 34 |
|
| 35 | Written by John R. Hauser. This work was made possible in part by the
|
| 36 | International Computer Science Institute, located at Suite 600, 1947 Center
|
| 37 | Street, Berkeley, California 94704. Funding was partially provided by the
|
| 38 | National Science Foundation under grant MIP-9311980. The original version
|
| 39 | of this code was written as part of a project to build a fixed-point vector
|
| 40 | processor in collaboration with the University of California at Berkeley,
|
| 41 | overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
| 42 | is available through the Web page 'http://www.cs.berkeley.edu/~jhauser/
|
| 43 | arithmetic/SoftFloat.html'.
|
| 44 |
|
| 45 | THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
| 46 | been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
| 47 | RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
| 48 | AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
| 49 | COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
| 50 | EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
| 51 | INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
| 52 | OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
| 53 |
|
| 54 | Derivative works are acceptable, even for commercial purposes, so long as
|
| 55 | (1) the source code for the derivative work includes prominent notice that
|
| 56 | the work is derivative, and (2) the source code includes prominent notice with
|
| 57 | these four paragraphs for those parts of this code that are retained.
|
| 58 | */
|
| 59 |
|
| 60 | #include "alpha_defs.h"
|
| 61 |
|
| 62 | #define UFT_ZERO 0 /* unpacked: zero */
|
| 63 | #define UFT_FIN 1 /* finite */
|
| 64 | #define UFT_DENORM 2 /* denormal */
|
| 65 | #define UFT_INF 3 /* infinity */
|
| 66 | #define UFT_NAN 4 /* not a number */
|
| 67 |
|
| 68 | #define Q_FINITE(x) ((x) <= UFT_FIN) /* finite */
|
| 69 | #define Q_SUI(x) (((x) & I_FTRP) == I_FTRP_SVI)
|
| 70 |
|
| 71 | /* Register format constants */
|
| 72 |
|
| 73 | #define QNAN 0x0008000000000000 /* quiet NaN flag */
|
| 74 | #define CQNAN 0xFFF8000000000000 /* canonical quiet NaN */
|
| 75 | #define FPZERO 0x0000000000000000 /* plus zero (fp) */
|
| 76 | #define FMZERO 0x8000000000000000 /* minus zero (fp) */
|
| 77 | #define FPINF 0x7FF0000000000000 /* plus infinity (fp) */
|
| 78 | #define FMINF 0xFFF0000000000000 /* minus infinity (fp) */
|
| 79 | #define FPMAX 0x7FEFFFFFFFFFFFFF /* plus MAX (fp) */
|
| 80 | #define FMMAX 0xFFEFFFFFFFFFFFFF /* minus MAX (fp) */
|
| 81 | #define IPMAX 0x7FFFFFFFFFFFFFFF /* plus MAX (int) */
|
| 82 | #define IMMAX 0x8000000000000000 /* minus MAX (int) */
|
| 83 |
|
| 84 | /* Unpacked rounding constants */
|
| 85 |
|
| 86 | #define UF_SRND 0x0000008000000000 /* S normal round */
|
| 87 | #define UF_SINF 0x000000FFFFFFFFFF /* S infinity round */
|
| 88 | #define UF_TRND 0x0000000000000400 /* T normal round */
|
| 89 | #define UF_TINF 0x00000000000007FF /* T infinity round */
|
| 90 |
|
| 91 | extern t_uint64 FR[32];
|
| 92 | extern uint32 fpcr;
|
| 93 | extern jmp_buf save_env;
|
| 94 |
|
| 95 | t_bool ieee_unpack (t_uint64 op, UFP *r, uint32 ir);
|
| 96 | void ieee_norm (UFP *r);
|
| 97 | t_uint64 ieee_rpack (UFP *r, uint32 ir, uint32 dp);
|
| 98 | void ieee_trap (uint32 trap, uint32 instenb, uint32 fpcrdsb, uint32 ir);
|
| 99 | int32 ieee_fcmp (t_uint64 a, t_uint64 b, uint32 ir, uint32 signal_nan);
|
| 100 | t_uint64 ieee_cvtst (t_uint64 op, uint32 ir);
|
| 101 | t_uint64 ieee_cvtts (t_uint64 op, uint32 ir);
|
| 102 | t_uint64 ieee_cvtif (t_uint64 val, uint32 ir, uint32 dp);
|
| 103 | t_uint64 ieee_cvtfi (t_uint64 op, uint32 ir);
|
| 104 | t_uint64 ieee_fadd (t_uint64 a, t_uint64 b, uint32 ir, uint32 dp, t_bool sub);
|
| 105 | t_uint64 ieee_fmul (t_uint64 a, t_uint64 b, uint32 ir, uint32 dp);
|
| 106 | t_uint64 ieee_fdiv (t_uint64 a, t_uint64 b, uint32 ir, uint32 dp);
|
| 107 | uint32 estimateSqrt32 (uint32 exp, uint32 a);
|
| 108 | t_uint64 estimateDiv128 (t_uint64 hi, t_uint64 lo, t_uint64 dvr);
|
| 109 |
|
| 110 | extern t_uint64 uemul64 (t_uint64 a, t_uint64 b, t_uint64 *hi);
|
| 111 | extern t_uint64 ufdiv64 (t_uint64 dvd, t_uint64 dvr, uint32 prec, uint32 *sticky);
|
| 112 | t_uint64 fsqrt64 (t_uint64 frac, int32 exp);
|
| 113 |
|
| 114 | /* IEEE S load */
|
| 115 |
|
| 116 | t_uint64 op_lds (t_uint64 op)
|
| 117 | {
|
| 118 | uint32 exp = S_GETEXP (op); /* get exponent */
|
| 119 |
|
| 120 | if (exp == S_NAN) exp = FPR_NAN; /* inf or NaN? */
|
| 121 | else if (exp != 0) exp = exp + T_BIAS - S_BIAS; /* zero or denorm? */
|
| 122 | return (((t_uint64) (op & S_SIGN))? FPR_SIGN: 0) | /* reg format */
|
| 123 | (((t_uint64) exp) << FPR_V_EXP) |
|
| 124 | (((t_uint64) (op & ~(S_SIGN|S_EXP))) << S_V_FRAC);
|
| 125 | }
|
| 126 |
|
| 127 | /* IEEE S store */
|
| 128 |
|
| 129 | t_uint64 op_sts (t_uint64 op)
|
| 130 | {
|
| 131 | uint32 sign = FPR_GETSIGN (op)? S_SIGN: 0;
|
| 132 | uint32 frac = ((uint32) (op >> S_V_FRAC)) & M32;
|
| 133 | uint32 exp = FPR_GETEXP (op);
|
| 134 |
|
| Mark Pizzolato | 66dba79 | 2015-03-30 10:24:24 -0700 | [diff] [blame] | 135 | if (exp == FPR_NAN) exp = S_NAN; /* inf or NaN? */
|
| 136 | else if (exp != 0) exp = exp + S_BIAS - T_BIAS; /* non-zero? */
|
| Mark Pizzolato | fffad7c | 2012-03-19 16:05:24 -0700 | [diff] [blame] | 137 | exp = (exp & S_M_EXP) << S_V_EXP;
|
| 138 | return (t_uint64) (sign | exp | (frac & ~(S_SIGN|S_EXP)));
|
| 139 | }
|
| 140 |
|
| 141 | /* IEEE floating operate */
|
| 142 |
|
| 143 | void ieee_fop (uint32 ir)
|
| 144 | {
|
| 145 | UFP a, b;
|
| 146 | uint32 ftpa, ftpb, fnc, ra, rb, rc;
|
| 147 | t_uint64 res;
|
| 148 |
|
| 149 | fnc = I_GETFFNC (ir); /* get function */
|
| 150 | ra = I_GETRA (ir); /* get registers */
|
| 151 | rb = I_GETRB (ir);
|
| 152 | rc = I_GETRC (ir);
|
| 153 | switch (fnc) { /* case on func */
|
| 154 |
|
| 155 | case 0x00: /* ADDS */
|
| 156 | res = ieee_fadd (FR[ra], FR[rb], ir, DT_S, 0);
|
| 157 | break;
|
| 158 |
|
| 159 | case 0x01: /* SUBS */
|
| 160 | res = ieee_fadd (FR[ra], FR[rb], ir, DT_S, 1);
|
| 161 | break;
|
| 162 |
|
| 163 | case 0x02: /* MULS */
|
| 164 | res = ieee_fmul (FR[ra], FR[rb], ir, DT_S);
|
| 165 | break;
|
| 166 |
|
| 167 | case 0x03: /* DIVS */
|
| 168 | res = ieee_fdiv (FR[ra], FR[rb], ir, DT_S);
|
| 169 | break;
|
| 170 |
|
| 171 | case 0x20: /* ADDT */
|
| 172 | res = ieee_fadd (FR[ra], FR[rb], ir, DT_T, 0);
|
| 173 | break;
|
| 174 |
|
| 175 | case 0x21: /* SUBT */
|
| 176 | res = ieee_fadd (FR[ra], FR[rb], ir, DT_T, 1);
|
| 177 | break;
|
| 178 |
|
| 179 | case 0x22: /* MULT */
|
| 180 | res = ieee_fmul (FR[ra], FR[rb], ir, DT_T);
|
| 181 | break;
|
| 182 |
|
| 183 | case 0x23: /* DIVT */
|
| 184 | res = ieee_fdiv (FR[ra], FR[rb], ir, DT_T);
|
| 185 | break;
|
| 186 |
|
| 187 | case 0x24: /* CMPTUN */
|
| 188 | ftpa = ieee_unpack (FR[ra], &a, ir); /* unpack */
|
| 189 | ftpb = ieee_unpack (FR[rb], &b, ir);
|
| 190 | if ((ftpa == UFT_NAN) || (ftpb == UFT_NAN)) /* if NaN, T */
|
| 191 | res = FP_TRUE;
|
| 192 | else res = 0;
|
| 193 | break;
|
| 194 |
|
| 195 | case 0x25: /* CMPTEQ */
|
| 196 | if (ieee_fcmp (FR[ra], FR[rb], ir, 0) == 0) res = FP_TRUE;
|
| 197 | else res = 0;
|
| 198 | break;
|
| 199 |
|
| 200 | case 0x26: /* CMPTLT */
|
| 201 | if (ieee_fcmp (FR[ra], FR[rb], ir, 1) < 0) res = FP_TRUE;
|
| 202 | else res = 0;
|
| 203 | break;
|
| 204 |
|
| 205 | case 0x27: /* CMPTLE */
|
| 206 | if (ieee_fcmp (FR[ra], FR[rb], ir, 1) <= 0) res = FP_TRUE;
|
| 207 | else res = 0;
|
| 208 | break;
|
| 209 |
|
| 210 | case 0x2C: /* CVTST, CVTTS */
|
| 211 | if (ir & 0x2000) res = ieee_cvtst (FR[rb], ir); /* CVTST */
|
| 212 | else res = ieee_cvtts (FR[rb], ir); /* CVTTS */
|
| 213 | break;
|
| 214 |
|
| 215 | case 0x2F: /* CVTTQ */
|
| 216 | res = ieee_cvtfi (FR[rb], ir);
|
| 217 | break;
|
| 218 |
|
| 219 | case 0x3C: /* CVTQS */
|
| 220 | res = ieee_cvtif (FR[rb], ir, DT_S);
|
| 221 | break;
|
| 222 |
|
| 223 | case 0x3E: /* CVTQT */
|
| 224 | res = ieee_cvtif (FR[rb], ir, DT_T);
|
| 225 | break;
|
| 226 |
|
| 227 | default:
|
| 228 | if ((ir & I_FSRC) == I_FSRC_X) ABORT (EXC_RSVI);
|
| 229 | res = FR[rc];
|
| 230 | break;
|
| 231 | }
|
| 232 |
|
| 233 | if (rc != 31) FR[rc] = res & M64;
|
| 234 | return;
|
| 235 | }
|
| 236 |
|
| 237 | /* IEEE S to T convert - LDS doesn't handle denorms correctly */
|
| 238 |
|
| 239 | t_uint64 ieee_cvtst (t_uint64 op, uint32 ir)
|
| 240 | {
|
| 241 | UFP b;
|
| 242 | uint32 ftpb;
|
| 243 |
|
| 244 | ftpb = ieee_unpack (op, &b, ir); /* unpack; norm dnorm */
|
| 245 | if (ftpb == UFT_DENORM) { /* denormal? */
|
| 246 | b.exp = b.exp + T_BIAS - S_BIAS; /* change 0 exp to T */
|
| 247 | return ieee_rpack (&b, ir, DT_T); /* round, pack */
|
| 248 | }
|
| 249 | else return op; /* identity */
|
| 250 | }
|
| 251 |
|
| 252 | /* IEEE T to S convert */
|
| 253 |
|
| 254 | t_uint64 ieee_cvtts (t_uint64 op, uint32 ir)
|
| 255 | {
|
| 256 | UFP b;
|
| 257 | uint32 ftpb;
|
| 258 |
|
| 259 | ftpb = ieee_unpack (op, &b, ir); /* unpack */
|
| 260 | if (Q_FINITE (ftpb)) return ieee_rpack (&b, ir, DT_S); /* finite? round, pack */
|
| 261 | if (ftpb == UFT_NAN) return (op | QNAN); /* nan? cvt to quiet */
|
| 262 | if (ftpb == UFT_INF) return op; /* inf? unchanged */
|
| 263 | return 0; /* denorm? 0 */
|
| 264 | }
|
| 265 |
|
| 266 | /* IEEE floating compare
|
| 267 |
|
| 268 | - Take care of NaNs
|
| 269 | - Force -0 to +0
|
| 270 | - Then normal compare will work (even on inf and denorms) */
|
| 271 |
|
| 272 | int32 ieee_fcmp (t_uint64 s1, t_uint64 s2, uint32 ir, uint32 trap_nan)
|
| 273 | {
|
| 274 | UFP a, b;
|
| 275 | uint32 ftpa, ftpb;
|
| 276 |
|
| 277 | ftpa = ieee_unpack (s1, &a, ir);
|
| 278 | ftpb = ieee_unpack (s2, &b, ir);
|
| 279 | if ((ftpa == UFT_NAN) || (ftpb == UFT_NAN)) { /* NaN involved? */
|
| 280 | if (trap_nan) ieee_trap (TRAP_INV, 1, FPCR_INVD, ir);
|
| 281 | return +1; /* force failure */
|
| 282 | }
|
| 283 | if (ftpa == UFT_ZERO) a.sign = 0; /* only +0 allowed */
|
| 284 | if (ftpb == UFT_ZERO) b.sign = 0;
|
| 285 | if (a.sign != b.sign) return (a.sign? -1: +1); /* unequal signs? */
|
| 286 | if (a.exp != b.exp) return ((a.sign ^ (a.exp < b.exp))? -1: +1);
|
| 287 | if (a.frac != b.frac) return ((a.sign ^ (a.frac < b.frac))? -1: +1);
|
| 288 | return 0;
|
| 289 | }
|
| 290 |
|
| 291 | /* IEEE integer to floating convert */
|
| 292 |
|
| 293 | t_uint64 ieee_cvtif (t_uint64 val, uint32 ir, uint32 dp)
|
| 294 | {
|
| 295 | UFP a;
|
| 296 |
|
| 297 | if (val == 0) return 0; /* 0? return +0 */
|
| 298 | if (val & FPR_SIGN) { /* < 0? */
|
| 299 | a.sign = 1; /* set sign */
|
| 300 | val = NEG_Q (val); /* |val| */
|
| 301 | }
|
| 302 | else a.sign = 0;
|
| 303 | a.exp = 63 + T_BIAS; /* set exp */
|
| 304 | a.frac = val; /* set frac */
|
| 305 | ieee_norm (&a); /* normalize */
|
| 306 | return ieee_rpack (&a, ir, dp); /* round and pack */
|
| 307 | }
|
| 308 |
|
| 309 | /* IEEE floating to integer convert - rounding code from SoftFloat
|
| 310 | The Alpha architecture specifies return of the low order bits of
|
| 311 | the true result, whereas the IEEE standard specifies the return
|
| 312 | of the maximum plus or minus value */
|
| 313 |
|
| 314 | t_uint64 ieee_cvtfi (t_uint64 op, uint32 ir)
|
| 315 | {
|
| 316 | UFP a;
|
| 317 | t_uint64 sticky;
|
| 318 | uint32 rndm, ftpa, ovf;
|
| 319 | int32 ubexp;
|
| 320 |
|
| 321 | ftpa = ieee_unpack (op, &a, ir); /* unpack */
|
| 322 | if (!Q_FINITE (ftpa)) { /* inf, NaN, dnorm? */
|
| 323 | ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv operation */
|
| 324 | return 0;
|
| 325 | }
|
| 326 | if (ftpa == UFT_ZERO) return 0; /* zero? */
|
| 327 | ovf = 0; /* assume no ovflo */
|
| 328 | ubexp = a.exp - T_BIAS; /* unbiased exp */
|
| 329 | if (ubexp < 0) { /* < 1? */
|
| 330 | if (ubexp == -1) sticky = a.frac; /* [.5,1)? */
|
| 331 | else sticky = 1; /* (0,.5) */
|
| 332 | a.frac = 0;
|
| 333 | }
|
| 334 | else if (ubexp < UF_V_NM) { /* in range? */
|
| 335 | sticky = (a.frac << (64 - (UF_V_NM - ubexp))) & M64;
|
| 336 | a.frac = a.frac >> (UF_V_NM - ubexp); /* result */
|
| 337 | }
|
| 338 | else if (ubexp == UF_V_NM) sticky = 0; /* at limit of range? */
|
| 339 | else {
|
| 340 | if ((ubexp - UF_V_NM) > 63) a.frac = 0; /* out of range */
|
| 341 | else a.frac = (a.frac << (ubexp - UF_V_NM)) & M64;
|
| 342 | ovf = 1; /* overflow */
|
| 343 | sticky = 0; /* no rounding */
|
| 344 | }
|
| 345 | rndm = I_GETFRND (ir); /* get round mode */
|
| 346 | if (((rndm == I_FRND_N) && (sticky & Q_SIGN)) || /* nearest? */
|
| 347 | ((rndm == I_FRND_P) && !a.sign && sticky) || /* +inf and +? */
|
| 348 | ((rndm == I_FRND_M) && a.sign && sticky)) { /* -inf and -? */
|
| 349 | a.frac = (a.frac + 1) & M64;
|
| 350 | if (a.frac == 0) ovf = 1; /* overflow? */
|
| 351 | if ((rndm == I_FRND_N) && (sticky == Q_SIGN)) /* round nearest hack */
|
| 352 | a.frac = a.frac & ~1;
|
| 353 | }
|
| 354 | if (a.frac > (a.sign? IMMAX: IPMAX)) ovf = 1; /* overflow? */
|
| 355 | if (ovf) ieee_trap (TRAP_IOV, ir & I_FTRP_V, 0, 0); /* overflow trap */
|
| 356 | if (ovf || sticky) /* ovflo or round? */
|
| 357 | ieee_trap (TRAP_INE, Q_SUI (ir), FPCR_INED, ir);
|
| 358 | return (a.sign? NEG_Q (a.frac): a.frac);
|
| 359 | }
|
| 360 |
|
| 361 | /* IEEE floating add
|
| 362 |
|
| 363 | - Take care of NaNs and infinites
|
| 364 | - Test for zero (fast exit)
|
| 365 | - Sticky logic for floating add
|
| 366 | > If result normalized, sticky in right place
|
| 367 | > If result carries out, renormalize, retain sticky
|
| 368 | - Sticky logic for floating subtract
|
| 369 | > If shift < guard, no sticky bits; 64b result is exact
|
| 370 | If shift <= 1, result may require extensive normalization,
|
| 371 | but there are no sticky bits to worry about
|
| 372 | > If shift >= guard, there is a sticky bit,
|
| 373 | but normalization is at most 1 place, sticky bit is retained
|
| 374 | for rounding purposes (but not in low order bit) */
|
| 375 |
|
| 376 | t_uint64 ieee_fadd (t_uint64 s1, t_uint64 s2, uint32 ir, uint32 dp, t_bool sub)
|
| 377 | {
|
| 378 | UFP a, b, t;
|
| 379 | uint32 ftpa, ftpb;
|
| 380 | uint32 sticky, rndm;
|
| 381 | int32 ediff;
|
| 382 |
|
| 383 | ftpa = ieee_unpack (s1, &a, ir); /* unpack operands */
|
| 384 | ftpb = ieee_unpack (s2, &b, ir);
|
| 385 | if (ftpb == UFT_NAN) return s2 | QNAN; /* B = NaN? quiet B */
|
| 386 | if (ftpa == UFT_NAN) return s1 | QNAN; /* A = NaN? quiet A */
|
| 387 | if (sub) b.sign = b.sign ^ 1; /* sign of B */
|
| 388 | if (ftpb == UFT_INF) { /* B = inf? */
|
| 389 | if ((ftpa == UFT_INF) && (a.sign ^ b.sign)) { /* eff sub of inf? */
|
| 390 | ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv op trap */
|
| 391 | return CQNAN; /* canonical NaN */
|
| 392 | }
|
| 393 | return (sub? (s2 ^ FPR_SIGN): s2); /* return B */
|
| 394 | }
|
| 395 | if (ftpa == UFT_INF) return s1; /* A = inf? ret A */
|
| 396 | rndm = I_GETFRND (ir); /* inst round mode */
|
| 397 | if (rndm == I_FRND_D) rndm = FPCR_GETFRND (fpcr); /* dynamic? use FPCR */
|
| 398 | if (ftpa == UFT_ZERO) { /* A = 0? */
|
| 399 | if (ftpb != UFT_ZERO) a = b; /* B != 0? result is B */
|
| 400 | else if (a.sign != b.sign) /* both 0, subtract? */
|
| 401 | a.sign = (rndm == I_FRND_M); /* +0 unless RM */
|
| 402 | }
|
| 403 | else if (ftpb != UFT_ZERO) { /* s2 != 0? */
|
| 404 | if ((a.exp < b.exp) || /* s1 < s2? swap */
|
| 405 | ((a.exp == b.exp) && (a.frac < b.frac))) {
|
| 406 | t = a;
|
| 407 | a = b;
|
| 408 | b = t;
|
| 409 | }
|
| 410 | ediff = a.exp - b.exp; /* exp diff */
|
| 411 | if (ediff > 63) b.frac = 1; /* >63? retain sticky */
|
| 412 | else if (ediff) { /* [1,63]? shift */
|
| 413 | sticky = ((b.frac << (64 - ediff)) & M64)? 1: 0; /* lost bits */
|
| 414 | b.frac = ((b.frac >> ediff) & M64) | sticky;
|
| 415 | }
|
| 416 | if (a.sign ^ b.sign) { /* eff sub? */
|
| 417 | a.frac = (a.frac - b.frac) & M64; /* subtract fractions */
|
| 418 | if (a.frac == 0) { /* result 0? */
|
| 419 | a.exp = 0;
|
| 420 | a.sign = (rndm == I_FRND_M); /* +0 unless RM */
|
| 421 | }
|
| 422 | else ieee_norm (&a); /* normalize */
|
| 423 | }
|
| 424 | else { /* eff add */
|
| 425 | a.frac = (a.frac + b.frac) & M64; /* add frac */
|
| 426 | if (a.frac < b.frac) { /* chk for carry */
|
| 427 | a.frac = UF_NM | (a.frac >> 1) | /* shift in carry */
|
| 428 | (a.frac & 1); /* retain sticky */
|
| 429 | a.exp = a.exp + 1; /* skip norm */
|
| 430 | }
|
| 431 | }
|
| 432 | } /* end else if */
|
| 433 | return ieee_rpack (&a, ir, dp); /* round and pack */
|
| 434 | }
|
| 435 |
|
| 436 | /* IEEE floating multiply
|
| 437 |
|
| 438 | - Take care of NaNs and infinites
|
| 439 | - Test for zero operands (fast exit)
|
| 440 | - 64b x 64b fraction multiply, yielding 128b result
|
| 441 | - Normalize (at most 1 bit)
|
| 442 | - Insert "sticky" bit in low order fraction, for rounding
|
| 443 |
|
| 444 | Because IEEE fractions have a range of [1,2), the result can have a range
|
| 445 | of [1,4). Results in the range of [1,2) appear to be denormalized by one
|
| 446 | place, when in fact they are correct. Results in the range of [2,4) appear
|
| 447 | to be in correct, when in fact they are 2X larger. This problem is taken
|
| 448 | care of in the result exponent calculation. */
|
| 449 |
|
| 450 | t_uint64 ieee_fmul (t_uint64 s1, t_uint64 s2, uint32 ir, uint32 dp)
|
| 451 | {
|
| 452 | UFP a, b;
|
| 453 | uint32 ftpa, ftpb;
|
| 454 | t_uint64 resl;
|
| 455 |
|
| 456 | ftpa = ieee_unpack (s1, &a, ir); /* unpack operands */
|
| 457 | ftpb = ieee_unpack (s2, &b, ir);
|
| 458 | if (ftpb == UFT_NAN) return s2 | QNAN; /* B = NaN? quiet B */
|
| 459 | if (ftpa == UFT_NAN) return s1 | QNAN; /* A = NaN? quiet A */
|
| 460 | a.sign = a.sign ^ b.sign; /* sign of result */
|
| 461 | if ((ftpa == UFT_ZERO) || (ftpb == UFT_ZERO)) { /* zero operand? */
|
| 462 | if ((ftpa == UFT_INF) || (ftpb == UFT_INF)) { /* 0 * inf? */
|
| 463 | ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv op trap */
|
| 464 | return CQNAN; /* canonical NaN */
|
| 465 | }
|
| 466 | return (a.sign? FMZERO: FPZERO); /* return signed 0 */
|
| 467 | }
|
| 468 | if (ftpb == UFT_INF) return (a.sign? FMINF: FPINF); /* B = inf? */
|
| 469 | if (ftpa == UFT_INF) return (a.sign? FMINF: FPINF); /* A = inf? */
|
| 470 | a.exp = a.exp + b.exp + 1 - T_BIAS; /* add exponents */
|
| 471 | resl = uemul64 (a.frac, b.frac, &a.frac); /* multiply fracs */
|
| 472 | ieee_norm (&a); /* normalize */
|
| 473 | a.frac = a.frac | (resl? 1: 0); /* sticky bit */
|
| 474 | return ieee_rpack (&a, ir, dp); /* round and pack */
|
| 475 | }
|
| 476 |
|
| 477 | /* Floating divide
|
| 478 |
|
| 479 | - Take care of NaNs and infinites
|
| 480 | - Check for zero cases
|
| 481 | - Divide fractions (55b to develop a rounding bit)
|
| 482 | - Set sticky bit if remainder non-zero
|
| 483 |
|
| 484 | Because IEEE fractions have a range of [1,2), the result can have a range
|
| 485 | of (.5,2). Results in the range of [1,2) are correct. Results in the
|
| 486 | range of (.5,1) need to be normalized by one place. */
|
| 487 |
|
| 488 | t_uint64 ieee_fdiv (t_uint64 s1, t_uint64 s2, uint32 ir, uint32 dp)
|
| 489 | {
|
| 490 | UFP a, b;
|
| 491 | uint32 ftpa, ftpb, sticky;
|
| 492 |
|
| 493 | ftpa = ieee_unpack (s1, &a, ir);
|
| 494 | ftpb = ieee_unpack (s2, &b, ir);
|
| 495 | if (ftpb == UFT_NAN) return s2 | QNAN; /* B = NaN? quiet B */
|
| 496 | if (ftpa == UFT_NAN) return s1 | QNAN; /* A = NaN? quiet A */
|
| 497 | a.sign = a.sign ^ b.sign; /* sign of result */
|
| 498 | if (ftpb == UFT_INF) { /* B = inf? */
|
| 499 | if (ftpa == UFT_INF) { /* inf/inf? */
|
| 500 | ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv op trap */
|
| 501 | return CQNAN; /* canonical NaN */
|
| 502 | }
|
| 503 | return (a.sign? FMZERO: FPZERO); /* !inf/inf, ret 0 */
|
| 504 | }
|
| 505 | if (ftpa == UFT_INF) /* A = inf? */
|
| 506 | return (a.sign? FMINF: FPINF); /* return inf */
|
| 507 | if (ftpb == UFT_ZERO) { /* B = 0? */
|
| 508 | if (ftpa == UFT_ZERO) { /* 0/0? */
|
| 509 | ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv op trap */
|
| 510 | return CQNAN; /* canonical NaN */
|
| 511 | }
|
| 512 | ieee_trap (TRAP_DZE, 1, FPCR_DZED, ir); /* div by 0 trap */
|
| 513 | return (a.sign? FMINF: FPINF); /* return inf */
|
| 514 | }
|
| 515 | if (ftpa == UFT_ZERO) return (a.sign? FMZERO: FPZERO); /* A = 0? */
|
| 516 | a.exp = a.exp - b.exp + T_BIAS; /* unbiased exp */
|
| 517 | a.frac = a.frac >> 1; /* allow 1 bit left */
|
| 518 | b.frac = b.frac >> 1;
|
| 519 | a.frac = ufdiv64 (a.frac, b.frac, 55, &sticky); /* divide */
|
| 520 | ieee_norm (&a); /* normalize */
|
| 521 | a.frac = a.frac | sticky; /* insert sticky */
|
| 522 | return ieee_rpack (&a, ir, dp); /* round and pack */
|
| 523 | }
|
| 524 |
|
| 525 | /* IEEE floating square root
|
| 526 |
|
| 527 | - Take care of NaNs, +infinite, zero
|
| 528 | - Check for negative operand
|
| 529 | - Compute result exponent
|
| 530 | - Compute sqrt of fraction */
|
| 531 |
|
| 532 | t_uint64 ieee_sqrt (uint32 ir, uint32 dp)
|
| 533 | {
|
| 534 | t_uint64 op;
|
| 535 | uint32 ftpb;
|
| 536 | UFP b;
|
| 537 |
|
| 538 | op = FR[I_GETRB (ir)]; /* get F[rb] */
|
| 539 | ftpb = ieee_unpack (op, &b, ir); /* unpack */
|
| 540 | if (ftpb == UFT_NAN) return op | QNAN; /* NaN? */
|
| 541 | if ((ftpb == UFT_ZERO) || /* zero? */
|
| 542 | ((ftpb == UFT_INF) && !b.sign)) return op; /* +infinity? */
|
| 543 | if (b.sign) { /* minus? */
|
| 544 | ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* signal inv op */
|
| 545 | return CQNAN;
|
| 546 | }
|
| 547 | b.exp = ((b.exp - T_BIAS) >> 1) + T_BIAS - 1; /* result exponent */
|
| 548 | b.frac = fsqrt64 (b.frac, b.exp); /* result fraction */
|
| 549 | return ieee_rpack (&b, ir, dp); /* round and pack */
|
| 550 | }
|
| 551 |
|
| 552 | /* Support routines */
|
| 553 |
|
| 554 | t_bool ieee_unpack (t_uint64 op, UFP *r, uint32 ir)
|
| 555 | {
|
| 556 | r->sign = FPR_GETSIGN (op); /* get sign */
|
| 557 | r->exp = FPR_GETEXP (op); /* get exponent */
|
| 558 | r->frac = FPR_GETFRAC (op); /* get fraction */
|
| 559 | if (r->exp == 0) { /* exponent = 0? */
|
| 560 | if (r->frac == 0) return UFT_ZERO; /* frac = 0? then true 0 */
|
| 561 | if (fpcr & FPCR_DNZ) { /* denorms to 0? */
|
| 562 | r->frac = 0; /* clear fraction */
|
| 563 | return UFT_ZERO;
|
| 564 | }
|
| 565 | r->frac = r->frac << FPR_GUARD; /* guard fraction */
|
| 566 | ieee_norm (r); /* normalize dnorm */
|
| 567 | ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* signal inv op */
|
| 568 | return UFT_DENORM;
|
| 569 | }
|
| 570 | if (r->exp == FPR_NAN) { /* exponent = max? */
|
| 571 | if (r->frac == 0) return UFT_INF; /* frac = 0? then inf */
|
| 572 | if (!(r->frac & QNAN)) /* signaling NaN? */
|
| 573 | ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* signal inv op */
|
| 574 | return UFT_NAN;
|
| 575 | }
|
| 576 | r->frac = (r->frac | FPR_HB) << FPR_GUARD; /* ins hidden bit, guard */
|
| 577 | return UFT_FIN; /* finite */
|
| 578 | }
|
| 579 |
|
| 580 | /* Normalize - input must be zero, finite, or denorm */
|
| 581 |
|
| 582 | void ieee_norm (UFP *r)
|
| 583 | {
|
| 584 | int32 i;
|
| 585 | static t_uint64 normmask[5] = {
|
| 586 | 0xc000000000000000, 0xf000000000000000, 0xff00000000000000,
|
| 587 | 0xffff000000000000, 0xffffffff00000000
|
| 588 | };
|
| 589 | static int32 normtab[6] = { 1, 2, 4, 8, 16, 32 };
|
| 590 |
|
| 591 | r->frac = r->frac & M64;
|
| 592 | if (r->frac == 0) { /* if fraction = 0 */
|
| 593 | r->sign = 0;
|
| 594 | r->exp = 0; /* result is 0 */
|
| 595 | return;
|
| 596 | }
|
| 597 | while ((r->frac & UF_NM) == 0) { /* normalized? */
|
| 598 | for (i = 0; i < 5; i++) { /* find first 1 */
|
| 599 | if (r->frac & normmask[i]) break;
|
| 600 | }
|
| 601 | r->frac = r->frac << normtab[i]; /* shift frac */
|
| 602 | r->exp = r->exp - normtab[i]; /* decr exp */
|
| 603 | }
|
| 604 | return;
|
| 605 | }
|
| 606 |
|
| 607 | /* Round and pack
|
| 608 |
|
| 609 | Much of the treachery of the IEEE standard is buried here
|
| 610 | - Rounding modes (chopped, +infinity, nearest, -infinity)
|
| 611 | - Inexact (set if there are any rounding bits, regardless of rounding)
|
| 612 | - Overflow (result is infinite if rounded, max if not)
|
| 613 | - Underflow (no denorms!)
|
| 614 |
|
| 615 | Underflow handling is particularly complicated
|
| 616 | - Result is always 0
|
| 617 | - UNF and INE are always set in FPCR
|
| 618 | - If /U is set,
|
| 619 | o If /S is clear, trap
|
| 620 | o If /S is set, UNFD is set, but UNFZ is clear, ignore UNFD and
|
| 621 | trap, because the hardware cannot produce denormals
|
| 622 | o If /S is set, UNFD is set, and UNFZ is set, do not trap
|
| 623 | - If /SUI is set, and INED is clear, trap */
|
| 624 |
|
| 625 | t_uint64 ieee_rpack (UFP *r, uint32 ir, uint32 dp)
|
| 626 | {
|
| 627 | static const t_uint64 stdrnd[2] = { UF_SRND, UF_TRND };
|
| 628 | static const t_uint64 infrnd[2] = { UF_SINF, UF_TINF };
|
| 629 | static const int32 expmax[2] = { T_BIAS - S_BIAS + S_M_EXP - 1, T_M_EXP - 1 };
|
| 630 | static const int32 expmin[2] = { T_BIAS - S_BIAS, 0 };
|
| 631 | t_uint64 rndadd, rndbits, res;
|
| 632 | uint32 rndm;
|
| 633 |
|
| Mark Pizzolato | 66dba79 | 2015-03-30 10:24:24 -0700 | [diff] [blame] | 634 | if (r->frac == 0) /* result 0? */
|
| 635 | return ((t_uint64) r->sign << FPR_V_SIGN);
|
| Mark Pizzolato | fffad7c | 2012-03-19 16:05:24 -0700 | [diff] [blame] | 636 | rndm = I_GETFRND (ir); /* inst round mode */
|
| 637 | if (rndm == I_FRND_D) rndm = FPCR_GETFRND (fpcr); /* dynamic? use FPCR */
|
| 638 | rndbits = r->frac & infrnd[dp]; /* isolate round bits */
|
| 639 | if (rndm == I_FRND_N) rndadd = stdrnd[dp]; /* round to nearest? */
|
| 640 | else if (((rndm == I_FRND_P) && !r->sign) || /* round to inf and */
|
| 641 | ((rndm == I_FRND_M) && r->sign)) /* right sign? */
|
| 642 | rndadd = infrnd[dp];
|
| 643 | else rndadd = 0;
|
| 644 | r->frac = (r->frac + rndadd) & M64; /* round */
|
| 645 | if ((r->frac & UF_NM) == 0) { /* carry out? */
|
| 646 | r->frac = (r->frac >> 1) | UF_NM; /* renormalize */
|
| 647 | r->exp = r->exp + 1;
|
| 648 | }
|
| 649 | if (rndbits) /* inexact? */
|
| 650 | ieee_trap (TRAP_INE, Q_SUI (ir), FPCR_INED, ir); /* set inexact */
|
| 651 | if (r->exp > expmax[dp]) { /* ovflo? */
|
| 652 | ieee_trap (TRAP_OVF, 1, FPCR_OVFD, ir); /* set overflow trap */
|
| 653 | ieee_trap (TRAP_INE, Q_SUI (ir), FPCR_INED, ir); /* set inexact */
|
| 654 | if (rndadd) /* did we round? */
|
| 655 | return (r->sign? FMINF: FPINF); /* return infinity */
|
| 656 | return (r->sign? FMMAX: FPMAX); /* no, return max */
|
| 657 | }
|
| 658 | if (r->exp <= expmin[dp]) { /* underflow? */
|
| 659 | ieee_trap (TRAP_UNF, ir & I_FTRP_U, /* set underflow trap */
|
| 660 | (fpcr & FPCR_UNDZ)? FPCR_UNFD: 0, ir); /* (dsbl only if UNFZ set) */
|
| 661 | ieee_trap (TRAP_INE, Q_SUI (ir), FPCR_INED, ir); /* set inexact */
|
| 662 | return 0; /* underflow to +0 */
|
| 663 | }
|
| 664 | res = (((t_uint64) r->sign) << FPR_V_SIGN) | /* form result */
|
| 665 | (((t_uint64) r->exp) << FPR_V_EXP) |
|
| 666 | ((r->frac >> FPR_GUARD) & FPR_FRAC);
|
| 667 | if ((rndm == I_FRND_N) && (rndbits == stdrnd[dp])) /* nearest and halfway? */
|
| 668 | res = res & ~1; /* clear lo bit */
|
| 669 | return res;
|
| 670 | }
|
| 671 |
|
| 672 | /* IEEE arithmetic trap - only one can be set at a time! */
|
| 673 |
|
| 674 | void ieee_trap (uint32 trap, uint32 instenb, uint32 fpcrdsb, uint32 ir)
|
| 675 | {
|
| 676 | fpcr = fpcr | (trap << 19); /* FPCR to trap summ offset */
|
| 677 | if ((instenb == 0) || /* not enabled in inst? ignore */
|
| 678 | ((ir & I_FTRP_S) && (fpcr & fpcrdsb))) return; /* /S and disabled? ignore */
|
| 679 | arith_trap (trap, ir); /* set Alpha trap */
|
| 680 | return;
|
| 681 | }
|
| 682 |
|
| 683 | /* Fraction square root routine - code from SoftFloat */
|
| 684 |
|
| 685 | t_uint64 fsqrt64 (t_uint64 asig, int32 exp)
|
| 686 | {
|
| 687 | t_uint64 zsig, remh, reml, t;
|
| 688 | uint32 sticky = 0;
|
| 689 |
|
| 690 | zsig = estimateSqrt32 (exp, (uint32) (asig >> 32));
|
| 691 |
|
| 692 | /* Calculate the final answer in two steps. First, do one iteration of
|
| 693 | Newton's approximation. The divide-by-2 is accomplished by clever
|
| 694 | positioning of the operands. Then, check the bits just below the
|
| 695 | (double precision) rounding bit to see if they are close to zero
|
| 696 | (that is, the rounding bits are close to midpoint). If so, make
|
| 697 | sure that the result^2 is <below> the input operand */
|
| 698 |
|
| 699 | asig = asig >> ((exp & 1)? 3: 2); /* leave 2b guard */
|
| 700 | zsig = estimateDiv128 (asig, 0, zsig << 32) + (zsig << 30 );
|
| 701 | if ((zsig & 0x1FF) <= 5) { /* close to even? */
|
| 702 | reml = uemul64 (zsig, zsig, &remh); /* result^2 */
|
| 703 | remh = asig - remh - (reml? 1:0); /* arg - result^2 */
|
| 704 | reml = NEG_Q (reml);
|
| 705 | while (Q_GETSIGN (remh) != 0) { /* if arg < result^2 */
|
| 706 | zsig = zsig - 1; /* decr result */
|
| 707 | t = (zsig << 1) | 1; /* incr result^2 */
|
| 708 | reml = reml + t; /* and retest */
|
| 709 | remh = remh + (zsig >> 63) + ((reml < t)? 1: 0);
|
| 710 | }
|
| 711 | if ((remh | reml) != 0 ) sticky = 1; /* not exact? */
|
| 712 | }
|
| 713 | return zsig;
|
| 714 | }
|
| 715 |
|
| 716 | /* Estimate 32b SQRT
|
| 717 |
|
| 718 | Calculate an approximation to the square root of the 32-bit significand given
|
| 719 | by 'a'. Considered as an integer, 'a' must be at least 2^31. If bit 0 of
|
| 720 | 'exp' (the least significant bit) is 1, the integer returned approximates
|
| 721 | 2^31*sqrt('a'/2^31), where 'a' is considered an integer. If bit 0 of 'exp'
|
| 722 | is 0, the integer returned approximates 2^31*sqrt('a'/2^30). In either
|
| 723 | case, the approximation returned lies strictly within +/-2 of the exact
|
| 724 | value. */
|
| 725 |
|
| 726 | uint32 estimateSqrt32 (uint32 exp, uint32 a)
|
| 727 | {
|
| 728 | uint32 index, z;
|
| 729 | static const uint32 sqrtOdd[] = {
|
| 730 | 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
|
| 731 | 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
|
| 732 | };
|
| 733 | static const uint32 sqrtEven[] = {
|
| 734 | 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
|
| 735 | 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
|
| 736 | };
|
| 737 |
|
| 738 | index = (a >> 27) & 0xF; /* bits<30:27> */
|
| 739 | if (exp & 1) { /* odd exp? */
|
| 740 | z = 0x4000 + (a >> 17) - sqrtOdd[index]; /* initial guess */
|
| 741 | z = ((a / z) << 14) + (z << 15); /* Newton iteration */
|
| 742 | a = a >> 1;
|
| 743 | }
|
| 744 | else {
|
| 745 | z = 0x8000 + (a >> 17) - sqrtEven[index]; /* initial guess */
|
| 746 | z = (a / z) + z; /* Newton iteration */
|
| 747 | z = (z >= 0x20000) ? 0xFFFF8000: (z << 15);
|
| 748 | if (z <= a) z = (a >> 1) | 0x80000000;
|
| 749 | }
|
| 750 | return (uint32) ((((((t_uint64) a) << 31) / ((t_uint64) z)) + (z >> 1)) & M32);
|
| 751 | }
|
| 752 |
|
| 753 | /* Estimate 128b unsigned divide */
|
| 754 |
|
| 755 | t_uint64 estimateDiv128 (t_uint64 a0, t_uint64 a1, t_uint64 b)
|
| 756 | {
|
| 757 | t_uint64 b0, b1;
|
| 758 | t_uint64 rem0, rem1, term0, term1;
|
| 759 | t_uint64 z;
|
| 760 |
|
| 761 | if (b <= a0) return 0xFFFFFFFFFFFFFFFF;
|
| 762 | b0 = b >> 32;
|
| 763 | z = ((b0 << 32) <= a0)? 0xFFFFFFFF00000000: ((a0 / b0) << 32);
|
| 764 | term1 = uemul64 (b, z, &term0);
|
| 765 | rem0 = a0 - term0 - (a1 < term1);
|
| 766 | rem1 = a1 - term1;
|
| 767 | while (Q_GETSIGN (rem0)) {
|
| 768 | z = z - ((t_uint64) 0x100000000);
|
| 769 | b1 = b << 32;
|
| 770 | rem1 = b1 + rem1;
|
| 771 | rem0 = b0 + rem0 + (rem1 < b1);
|
| 772 | }
|
| 773 | rem0 = (rem0 << 32) | (rem1 >> 32);
|
| 774 | z |= (((b0 << 32) <= rem0)? 0xFFFFFFFF : (rem0 / b0));
|
| 775 | return z;
|
| 776 | }
|