Blame - alpha/alpha_fpi.c - emulators/simh - Rivoreo Source Code Repositories

blob: ecf132118622bbb6785a6ad46278667622a6a04e [file] [log] [blame] [raw]

Mark Pizzolato	fffad7c	2012-03-19 16:05:24 -0700	[diff] [blame]	1	/* alpha_fpi.c - Alpha IEEE floating point simulator
				2
				3	Copyright (c) 2003-2006, Robert M Supnik
				4
				5	Permission is hereby granted, free of charge, to any person obtaining a
				6	copy of this software and associated documentation files (the "Software"),
				7	to deal in the Software without restriction, including without limitation
				8	the rights to use, copy, modify, merge, publish, distribute, sublicense,
				9	and/or sell copies of the Software, and to permit persons to whom the
				10	Software is furnished to do so, subject to the following conditions:
				11
				12	The above copyright notice and this permission notice shall be included in
				13	all copies or substantial portions of the Software.
				14
				15	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	ROBERT M SUPNIK BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
				19	IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				20	CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
				21
				22	Except as contained in this notice, the name of Robert M Supnik shall not be
				23	used in advertising or otherwise to promote the sale, use or other dealings
				24	in this Software without prior written authorization from Robert M Supnik.
				25
				26	This module contains the instruction simulators for
				27
				28	- single precision floating point, S
				29	- double precision floating point, T
				30
				31	Portions of this module (specifically, the convert floating to integer
				32	routine and the square root routine) are a derivative work from SoftFloat,
				33	written by John Hauser. SoftFloat includes the following license terms:
				34
				35	Written by John R. Hauser. This work was made possible in part by the
				36	International Computer Science Institute, located at Suite 600, 1947 Center
				37	Street, Berkeley, California 94704. Funding was partially provided by the
				38	National Science Foundation under grant MIP-9311980. The original version
				39	of this code was written as part of a project to build a fixed-point vector
				40	processor in collaboration with the University of California at Berkeley,
				41	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				42	is available through the Web page 'http://www.cs.berkeley.edu/~jhauser/
				43	arithmetic/SoftFloat.html'.
				44
				45	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
				46	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
				47	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
				48	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
				49	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
				50	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
				51	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
				52	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
				53
				54	Derivative works are acceptable, even for commercial purposes, so long as
				55	(1) the source code for the derivative work includes prominent notice that
				56	the work is derivative, and (2) the source code includes prominent notice with
				57	these four paragraphs for those parts of this code that are retained.
				58	*/
				59
				60	#include "alpha_defs.h"
				61
				62	#define UFT_ZERO 0 /* unpacked: zero */
				63	#define UFT_FIN 1 /* finite */
				64	#define UFT_DENORM 2 /* denormal */
				65	#define UFT_INF 3 /* infinity */
				66	#define UFT_NAN 4 /* not a number */
				67
				68	#define Q_FINITE(x) ((x) <= UFT_FIN) /* finite */
				69	#define Q_SUI(x) (((x) & I_FTRP) == I_FTRP_SVI)
				70
				71	/* Register format constants */
				72
				73	#define QNAN 0x0008000000000000 /* quiet NaN flag */
				74	#define CQNAN 0xFFF8000000000000 /* canonical quiet NaN */
				75	#define FPZERO 0x0000000000000000 /* plus zero (fp) */
				76	#define FMZERO 0x8000000000000000 /* minus zero (fp) */
				77	#define FPINF 0x7FF0000000000000 /* plus infinity (fp) */
				78	#define FMINF 0xFFF0000000000000 /* minus infinity (fp) */
				79	#define FPMAX 0x7FEFFFFFFFFFFFFF /* plus MAX (fp) */
				80	#define FMMAX 0xFFEFFFFFFFFFFFFF /* minus MAX (fp) */
				81	#define IPMAX 0x7FFFFFFFFFFFFFFF /* plus MAX (int) */
				82	#define IMMAX 0x8000000000000000 /* minus MAX (int) */
				83
				84	/* Unpacked rounding constants */
				85
				86	#define UF_SRND 0x0000008000000000 /* S normal round */
				87	#define UF_SINF 0x000000FFFFFFFFFF /* S infinity round */
				88	#define UF_TRND 0x0000000000000400 /* T normal round */
				89	#define UF_TINF 0x00000000000007FF /* T infinity round */
				90
				91	extern t_uint64 FR[32];
				92	extern uint32 fpcr;
				93	extern jmp_buf save_env;
				94
				95	t_bool ieee_unpack (t_uint64 op, UFP *r, uint32 ir);
				96	void ieee_norm (UFP *r);
				97	t_uint64 ieee_rpack (UFP *r, uint32 ir, uint32 dp);
				98	void ieee_trap (uint32 trap, uint32 instenb, uint32 fpcrdsb, uint32 ir);
				99	int32 ieee_fcmp (t_uint64 a, t_uint64 b, uint32 ir, uint32 signal_nan);
				100	t_uint64 ieee_cvtst (t_uint64 op, uint32 ir);
				101	t_uint64 ieee_cvtts (t_uint64 op, uint32 ir);
				102	t_uint64 ieee_cvtif (t_uint64 val, uint32 ir, uint32 dp);
				103	t_uint64 ieee_cvtfi (t_uint64 op, uint32 ir);
				104	t_uint64 ieee_fadd (t_uint64 a, t_uint64 b, uint32 ir, uint32 dp, t_bool sub);
				105	t_uint64 ieee_fmul (t_uint64 a, t_uint64 b, uint32 ir, uint32 dp);
				106	t_uint64 ieee_fdiv (t_uint64 a, t_uint64 b, uint32 ir, uint32 dp);
				107	uint32 estimateSqrt32 (uint32 exp, uint32 a);
				108	t_uint64 estimateDiv128 (t_uint64 hi, t_uint64 lo, t_uint64 dvr);
				109
				110	extern t_uint64 uemul64 (t_uint64 a, t_uint64 b, t_uint64 *hi);
				111	extern t_uint64 ufdiv64 (t_uint64 dvd, t_uint64 dvr, uint32 prec, uint32 *sticky);
				112	t_uint64 fsqrt64 (t_uint64 frac, int32 exp);
				113
				114	/* IEEE S load */
				115
				116	t_uint64 op_lds (t_uint64 op)
				117	{
				118	uint32 exp = S_GETEXP (op); /* get exponent */
				119
				120	if (exp == S_NAN) exp = FPR_NAN; /* inf or NaN? */
				121	else if (exp != 0) exp = exp + T_BIAS - S_BIAS; /* zero or denorm? */
				122	return (((t_uint64) (op & S_SIGN))? FPR_SIGN: 0) \| /* reg format */
				123	(((t_uint64) exp) << FPR_V_EXP) \|
				124	(((t_uint64) (op & ~(S_SIGN\|S_EXP))) << S_V_FRAC);
				125	}
				126
				127	/* IEEE S store */
				128
				129	t_uint64 op_sts (t_uint64 op)
				130	{
				131	uint32 sign = FPR_GETSIGN (op)? S_SIGN: 0;
				132	uint32 frac = ((uint32) (op >> S_V_FRAC)) & M32;
				133	uint32 exp = FPR_GETEXP (op);
				134
Mark Pizzolato	66dba79	2015-03-30 10:24:24 -0700	[diff] [blame]	135	if (exp == FPR_NAN) exp = S_NAN; /* inf or NaN? */
				136	else if (exp != 0) exp = exp + S_BIAS - T_BIAS; /* non-zero? */
Mark Pizzolato	fffad7c	2012-03-19 16:05:24 -0700	[diff] [blame]	137	exp = (exp & S_M_EXP) << S_V_EXP;
				138	return (t_uint64) (sign \| exp \| (frac & ~(S_SIGN\|S_EXP)));
				139	}
				140
				141	/* IEEE floating operate */
				142
				143	void ieee_fop (uint32 ir)
				144	{
				145	UFP a, b;
				146	uint32 ftpa, ftpb, fnc, ra, rb, rc;
				147	t_uint64 res;
				148
				149	fnc = I_GETFFNC (ir); /* get function */
				150	ra = I_GETRA (ir); /* get registers */
				151	rb = I_GETRB (ir);
				152	rc = I_GETRC (ir);
				153	switch (fnc) { /* case on func */
				154
				155	case 0x00: /* ADDS */
				156	res = ieee_fadd (FR[ra], FR[rb], ir, DT_S, 0);
				157	break;
				158
				159	case 0x01: /* SUBS */
				160	res = ieee_fadd (FR[ra], FR[rb], ir, DT_S, 1);
				161	break;
				162
				163	case 0x02: /* MULS */
				164	res = ieee_fmul (FR[ra], FR[rb], ir, DT_S);
				165	break;
				166
				167	case 0x03: /* DIVS */
				168	res = ieee_fdiv (FR[ra], FR[rb], ir, DT_S);
				169	break;
				170
				171	case 0x20: /* ADDT */
				172	res = ieee_fadd (FR[ra], FR[rb], ir, DT_T, 0);
				173	break;
				174
				175	case 0x21: /* SUBT */
				176	res = ieee_fadd (FR[ra], FR[rb], ir, DT_T, 1);
				177	break;
				178
				179	case 0x22: /* MULT */
				180	res = ieee_fmul (FR[ra], FR[rb], ir, DT_T);
				181	break;
				182
				183	case 0x23: /* DIVT */
				184	res = ieee_fdiv (FR[ra], FR[rb], ir, DT_T);
				185	break;
				186
				187	case 0x24: /* CMPTUN */
				188	ftpa = ieee_unpack (FR[ra], &a, ir); /* unpack */
				189	ftpb = ieee_unpack (FR[rb], &b, ir);
				190	if ((ftpa == UFT_NAN) \|\| (ftpb == UFT_NAN)) /* if NaN, T */
				191	res = FP_TRUE;
				192	else res = 0;
				193	break;
				194
				195	case 0x25: /* CMPTEQ */
				196	if (ieee_fcmp (FR[ra], FR[rb], ir, 0) == 0) res = FP_TRUE;
				197	else res = 0;
				198	break;
				199
				200	case 0x26: /* CMPTLT */
				201	if (ieee_fcmp (FR[ra], FR[rb], ir, 1) < 0) res = FP_TRUE;
				202	else res = 0;
				203	break;
				204
				205	case 0x27: /* CMPTLE */
				206	if (ieee_fcmp (FR[ra], FR[rb], ir, 1) <= 0) res = FP_TRUE;
				207	else res = 0;
				208	break;
				209
				210	case 0x2C: /* CVTST, CVTTS */
				211	if (ir & 0x2000) res = ieee_cvtst (FR[rb], ir); /* CVTST */
				212	else res = ieee_cvtts (FR[rb], ir); /* CVTTS */
				213	break;
				214
				215	case 0x2F: /* CVTTQ */
				216	res = ieee_cvtfi (FR[rb], ir);
				217	break;
				218
				219	case 0x3C: /* CVTQS */
				220	res = ieee_cvtif (FR[rb], ir, DT_S);
				221	break;
				222
				223	case 0x3E: /* CVTQT */
				224	res = ieee_cvtif (FR[rb], ir, DT_T);
				225	break;
				226
				227	default:
				228	if ((ir & I_FSRC) == I_FSRC_X) ABORT (EXC_RSVI);
				229	res = FR[rc];
				230	break;
				231	}
				232
				233	if (rc != 31) FR[rc] = res & M64;
				234	return;
				235	}
				236
				237	/* IEEE S to T convert - LDS doesn't handle denorms correctly */
				238
				239	t_uint64 ieee_cvtst (t_uint64 op, uint32 ir)
				240	{
				241	UFP b;
				242	uint32 ftpb;
				243
				244	ftpb = ieee_unpack (op, &b, ir); /* unpack; norm dnorm */
				245	if (ftpb == UFT_DENORM) { /* denormal? */
				246	b.exp = b.exp + T_BIAS - S_BIAS; /* change 0 exp to T */
				247	return ieee_rpack (&b, ir, DT_T); /* round, pack */
				248	}
				249	else return op; /* identity */
				250	}
				251
				252	/* IEEE T to S convert */
				253
				254	t_uint64 ieee_cvtts (t_uint64 op, uint32 ir)
				255	{
				256	UFP b;
				257	uint32 ftpb;
				258
				259	ftpb = ieee_unpack (op, &b, ir); /* unpack */
				260	if (Q_FINITE (ftpb)) return ieee_rpack (&b, ir, DT_S); /* finite? round, pack */
				261	if (ftpb == UFT_NAN) return (op \| QNAN); /* nan? cvt to quiet */
				262	if (ftpb == UFT_INF) return op; /* inf? unchanged */
				263	return 0; /* denorm? 0 */
				264	}
				265
				266	/* IEEE floating compare
				267
				268	- Take care of NaNs
				269	- Force -0 to +0
				270	- Then normal compare will work (even on inf and denorms) */
				271
				272	int32 ieee_fcmp (t_uint64 s1, t_uint64 s2, uint32 ir, uint32 trap_nan)
				273	{
				274	UFP a, b;
				275	uint32 ftpa, ftpb;
				276
				277	ftpa = ieee_unpack (s1, &a, ir);
				278	ftpb = ieee_unpack (s2, &b, ir);
				279	if ((ftpa == UFT_NAN) \|\| (ftpb == UFT_NAN)) { /* NaN involved? */
				280	if (trap_nan) ieee_trap (TRAP_INV, 1, FPCR_INVD, ir);
				281	return +1; /* force failure */
				282	}
				283	if (ftpa == UFT_ZERO) a.sign = 0; /* only +0 allowed */
				284	if (ftpb == UFT_ZERO) b.sign = 0;
				285	if (a.sign != b.sign) return (a.sign? -1: +1); /* unequal signs? */
				286	if (a.exp != b.exp) return ((a.sign ^ (a.exp < b.exp))? -1: +1);
				287	if (a.frac != b.frac) return ((a.sign ^ (a.frac < b.frac))? -1: +1);
				288	return 0;
				289	}
				290
				291	/* IEEE integer to floating convert */
				292
				293	t_uint64 ieee_cvtif (t_uint64 val, uint32 ir, uint32 dp)
				294	{
				295	UFP a;
				296
				297	if (val == 0) return 0; /* 0? return +0 */
				298	if (val & FPR_SIGN) { /* < 0? */
				299	a.sign = 1; /* set sign */
				300	val = NEG_Q (val); /* \|val\| */
				301	}
				302	else a.sign = 0;
				303	a.exp = 63 + T_BIAS; /* set exp */
				304	a.frac = val; /* set frac */
				305	ieee_norm (&a); /* normalize */
				306	return ieee_rpack (&a, ir, dp); /* round and pack */
				307	}
				308
				309	/* IEEE floating to integer convert - rounding code from SoftFloat
				310	The Alpha architecture specifies return of the low order bits of
				311	the true result, whereas the IEEE standard specifies the return
				312	of the maximum plus or minus value */
				313
				314	t_uint64 ieee_cvtfi (t_uint64 op, uint32 ir)
				315	{
				316	UFP a;
				317	t_uint64 sticky;
				318	uint32 rndm, ftpa, ovf;
				319	int32 ubexp;
				320
				321	ftpa = ieee_unpack (op, &a, ir); /* unpack */
				322	if (!Q_FINITE (ftpa)) { /* inf, NaN, dnorm? */
				323	ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv operation */
				324	return 0;
				325	}
				326	if (ftpa == UFT_ZERO) return 0; /* zero? */
				327	ovf = 0; /* assume no ovflo */
				328	ubexp = a.exp - T_BIAS; /* unbiased exp */
				329	if (ubexp < 0) { /* < 1? */
				330	if (ubexp == -1) sticky = a.frac; /* [.5,1)? */
				331	else sticky = 1; /* (0,.5) */
				332	a.frac = 0;
				333	}
				334	else if (ubexp < UF_V_NM) { /* in range? */
				335	sticky = (a.frac << (64 - (UF_V_NM - ubexp))) & M64;
				336	a.frac = a.frac >> (UF_V_NM - ubexp); /* result */
				337	}
				338	else if (ubexp == UF_V_NM) sticky = 0; /* at limit of range? */
				339	else {
				340	if ((ubexp - UF_V_NM) > 63) a.frac = 0; /* out of range */
				341	else a.frac = (a.frac << (ubexp - UF_V_NM)) & M64;
				342	ovf = 1; /* overflow */
				343	sticky = 0; /* no rounding */
				344	}
				345	rndm = I_GETFRND (ir); /* get round mode */
				346	if (((rndm == I_FRND_N) && (sticky & Q_SIGN)) \|\| /* nearest? */
				347	((rndm == I_FRND_P) && !a.sign && sticky) \|\| /* +inf and +? */
				348	((rndm == I_FRND_M) && a.sign && sticky)) { /* -inf and -? */
				349	a.frac = (a.frac + 1) & M64;
				350	if (a.frac == 0) ovf = 1; /* overflow? */
				351	if ((rndm == I_FRND_N) && (sticky == Q_SIGN)) /* round nearest hack */
				352	a.frac = a.frac & ~1;
				353	}
				354	if (a.frac > (a.sign? IMMAX: IPMAX)) ovf = 1; /* overflow? */
				355	if (ovf) ieee_trap (TRAP_IOV, ir & I_FTRP_V, 0, 0); /* overflow trap */
				356	if (ovf \|\| sticky) /* ovflo or round? */
				357	ieee_trap (TRAP_INE, Q_SUI (ir), FPCR_INED, ir);
				358	return (a.sign? NEG_Q (a.frac): a.frac);
				359	}
				360
				361	/* IEEE floating add
				362
				363	- Take care of NaNs and infinites
				364	- Test for zero (fast exit)
				365	- Sticky logic for floating add
				366	> If result normalized, sticky in right place
				367	> If result carries out, renormalize, retain sticky
				368	- Sticky logic for floating subtract
				369	> If shift < guard, no sticky bits; 64b result is exact
				370	If shift <= 1, result may require extensive normalization,
				371	but there are no sticky bits to worry about
				372	> If shift >= guard, there is a sticky bit,
				373	but normalization is at most 1 place, sticky bit is retained
				374	for rounding purposes (but not in low order bit) */
				375
				376	t_uint64 ieee_fadd (t_uint64 s1, t_uint64 s2, uint32 ir, uint32 dp, t_bool sub)
				377	{
				378	UFP a, b, t;
				379	uint32 ftpa, ftpb;
				380	uint32 sticky, rndm;
				381	int32 ediff;
				382
				383	ftpa = ieee_unpack (s1, &a, ir); /* unpack operands */
				384	ftpb = ieee_unpack (s2, &b, ir);
				385	if (ftpb == UFT_NAN) return s2 \| QNAN; /* B = NaN? quiet B */
				386	if (ftpa == UFT_NAN) return s1 \| QNAN; /* A = NaN? quiet A */
				387	if (sub) b.sign = b.sign ^ 1; /* sign of B */
				388	if (ftpb == UFT_INF) { /* B = inf? */
				389	if ((ftpa == UFT_INF) && (a.sign ^ b.sign)) { /* eff sub of inf? */
				390	ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv op trap */
				391	return CQNAN; /* canonical NaN */
				392	}
				393	return (sub? (s2 ^ FPR_SIGN): s2); /* return B */
				394	}
				395	if (ftpa == UFT_INF) return s1; /* A = inf? ret A */
				396	rndm = I_GETFRND (ir); /* inst round mode */
				397	if (rndm == I_FRND_D) rndm = FPCR_GETFRND (fpcr); /* dynamic? use FPCR */
				398	if (ftpa == UFT_ZERO) { /* A = 0? */
				399	if (ftpb != UFT_ZERO) a = b; /* B != 0? result is B */
				400	else if (a.sign != b.sign) /* both 0, subtract? */
				401	a.sign = (rndm == I_FRND_M); /* +0 unless RM */
				402	}
				403	else if (ftpb != UFT_ZERO) { /* s2 != 0? */
				404	if ((a.exp < b.exp) \|\| /* s1 < s2? swap */
				405	((a.exp == b.exp) && (a.frac < b.frac))) {
				406	t = a;
				407	a = b;
				408	b = t;
				409	}
				410	ediff = a.exp - b.exp; /* exp diff */
				411	if (ediff > 63) b.frac = 1; /* >63? retain sticky */
				412	else if (ediff) { /* [1,63]? shift */
				413	sticky = ((b.frac << (64 - ediff)) & M64)? 1: 0; /* lost bits */
				414	b.frac = ((b.frac >> ediff) & M64) \| sticky;
				415	}
				416	if (a.sign ^ b.sign) { /* eff sub? */
				417	a.frac = (a.frac - b.frac) & M64; /* subtract fractions */
				418	if (a.frac == 0) { /* result 0? */
				419	a.exp = 0;
				420	a.sign = (rndm == I_FRND_M); /* +0 unless RM */
				421	}
				422	else ieee_norm (&a); /* normalize */
				423	}
				424	else { /* eff add */
				425	a.frac = (a.frac + b.frac) & M64; /* add frac */
				426	if (a.frac < b.frac) { /* chk for carry */
				427	a.frac = UF_NM \| (a.frac >> 1) \| /* shift in carry */
				428	(a.frac & 1); /* retain sticky */
				429	a.exp = a.exp + 1; /* skip norm */
				430	}
				431	}
				432	} /* end else if */
				433	return ieee_rpack (&a, ir, dp); /* round and pack */
				434	}
				435
				436	/* IEEE floating multiply
				437
				438	- Take care of NaNs and infinites
				439	- Test for zero operands (fast exit)
				440	- 64b x 64b fraction multiply, yielding 128b result
				441	- Normalize (at most 1 bit)
				442	- Insert "sticky" bit in low order fraction, for rounding
				443
				444	Because IEEE fractions have a range of [1,2), the result can have a range
				445	of [1,4). Results in the range of [1,2) appear to be denormalized by one
				446	place, when in fact they are correct. Results in the range of [2,4) appear
				447	to be in correct, when in fact they are 2X larger. This problem is taken
				448	care of in the result exponent calculation. */
				449
				450	t_uint64 ieee_fmul (t_uint64 s1, t_uint64 s2, uint32 ir, uint32 dp)
				451	{
				452	UFP a, b;
				453	uint32 ftpa, ftpb;
				454	t_uint64 resl;
				455
				456	ftpa = ieee_unpack (s1, &a, ir); /* unpack operands */
				457	ftpb = ieee_unpack (s2, &b, ir);
				458	if (ftpb == UFT_NAN) return s2 \| QNAN; /* B = NaN? quiet B */
				459	if (ftpa == UFT_NAN) return s1 \| QNAN; /* A = NaN? quiet A */
				460	a.sign = a.sign ^ b.sign; /* sign of result */
				461	if ((ftpa == UFT_ZERO) \|\| (ftpb == UFT_ZERO)) { /* zero operand? */
				462	if ((ftpa == UFT_INF) \|\| (ftpb == UFT_INF)) { /* 0 * inf? */
				463	ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv op trap */
				464	return CQNAN; /* canonical NaN */
				465	}
				466	return (a.sign? FMZERO: FPZERO); /* return signed 0 */
				467	}
				468	if (ftpb == UFT_INF) return (a.sign? FMINF: FPINF); /* B = inf? */
				469	if (ftpa == UFT_INF) return (a.sign? FMINF: FPINF); /* A = inf? */
				470	a.exp = a.exp + b.exp + 1 - T_BIAS; /* add exponents */
				471	resl = uemul64 (a.frac, b.frac, &a.frac); /* multiply fracs */
				472	ieee_norm (&a); /* normalize */
				473	a.frac = a.frac \| (resl? 1: 0); /* sticky bit */
				474	return ieee_rpack (&a, ir, dp); /* round and pack */
				475	}
				476
				477	/* Floating divide
				478
				479	- Take care of NaNs and infinites
				480	- Check for zero cases
				481	- Divide fractions (55b to develop a rounding bit)
				482	- Set sticky bit if remainder non-zero
				483
				484	Because IEEE fractions have a range of [1,2), the result can have a range
				485	of (.5,2). Results in the range of [1,2) are correct. Results in the
				486	range of (.5,1) need to be normalized by one place. */
				487
				488	t_uint64 ieee_fdiv (t_uint64 s1, t_uint64 s2, uint32 ir, uint32 dp)
				489	{
				490	UFP a, b;
				491	uint32 ftpa, ftpb, sticky;
				492
				493	ftpa = ieee_unpack (s1, &a, ir);
				494	ftpb = ieee_unpack (s2, &b, ir);
				495	if (ftpb == UFT_NAN) return s2 \| QNAN; /* B = NaN? quiet B */
				496	if (ftpa == UFT_NAN) return s1 \| QNAN; /* A = NaN? quiet A */
				497	a.sign = a.sign ^ b.sign; /* sign of result */
				498	if (ftpb == UFT_INF) { /* B = inf? */
				499	if (ftpa == UFT_INF) { /* inf/inf? */
				500	ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv op trap */
				501	return CQNAN; /* canonical NaN */
				502	}
				503	return (a.sign? FMZERO: FPZERO); /* !inf/inf, ret 0 */
				504	}
				505	if (ftpa == UFT_INF) /* A = inf? */
				506	return (a.sign? FMINF: FPINF); /* return inf */
				507	if (ftpb == UFT_ZERO) { /* B = 0? */
				508	if (ftpa == UFT_ZERO) { /* 0/0? */
				509	ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* inv op trap */
				510	return CQNAN; /* canonical NaN */
				511	}
				512	ieee_trap (TRAP_DZE, 1, FPCR_DZED, ir); /* div by 0 trap */
				513	return (a.sign? FMINF: FPINF); /* return inf */
				514	}
				515	if (ftpa == UFT_ZERO) return (a.sign? FMZERO: FPZERO); /* A = 0? */
				516	a.exp = a.exp - b.exp + T_BIAS; /* unbiased exp */
				517	a.frac = a.frac >> 1; /* allow 1 bit left */
				518	b.frac = b.frac >> 1;
				519	a.frac = ufdiv64 (a.frac, b.frac, 55, &sticky); /* divide */
				520	ieee_norm (&a); /* normalize */
				521	a.frac = a.frac \| sticky; /* insert sticky */
				522	return ieee_rpack (&a, ir, dp); /* round and pack */
				523	}
				524
				525	/* IEEE floating square root
				526
				527	- Take care of NaNs, +infinite, zero
				528	- Check for negative operand
				529	- Compute result exponent
				530	- Compute sqrt of fraction */
				531
				532	t_uint64 ieee_sqrt (uint32 ir, uint32 dp)
				533	{
				534	t_uint64 op;
				535	uint32 ftpb;
				536	UFP b;
				537
				538	op = FR[I_GETRB (ir)]; /* get F[rb] */
				539	ftpb = ieee_unpack (op, &b, ir); /* unpack */
				540	if (ftpb == UFT_NAN) return op \| QNAN; /* NaN? */
				541	if ((ftpb == UFT_ZERO) \|\| /* zero? */
				542	((ftpb == UFT_INF) && !b.sign)) return op; /* +infinity? */
				543	if (b.sign) { /* minus? */
				544	ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* signal inv op */
				545	return CQNAN;
				546	}
				547	b.exp = ((b.exp - T_BIAS) >> 1) + T_BIAS - 1; /* result exponent */
				548	b.frac = fsqrt64 (b.frac, b.exp); /* result fraction */
				549	return ieee_rpack (&b, ir, dp); /* round and pack */
				550	}
				551
				552	/* Support routines */
				553
				554	t_bool ieee_unpack (t_uint64 op, UFP *r, uint32 ir)
				555	{
				556	r->sign = FPR_GETSIGN (op); /* get sign */
				557	r->exp = FPR_GETEXP (op); /* get exponent */
				558	r->frac = FPR_GETFRAC (op); /* get fraction */
				559	if (r->exp == 0) { /* exponent = 0? */
				560	if (r->frac == 0) return UFT_ZERO; /* frac = 0? then true 0 */
				561	if (fpcr & FPCR_DNZ) { /* denorms to 0? */
				562	r->frac = 0; /* clear fraction */
				563	return UFT_ZERO;
				564	}
				565	r->frac = r->frac << FPR_GUARD; /* guard fraction */
				566	ieee_norm (r); /* normalize dnorm */
				567	ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* signal inv op */
				568	return UFT_DENORM;
				569	}
				570	if (r->exp == FPR_NAN) { /* exponent = max? */
				571	if (r->frac == 0) return UFT_INF; /* frac = 0? then inf */
				572	if (!(r->frac & QNAN)) /* signaling NaN? */
				573	ieee_trap (TRAP_INV, 1, FPCR_INVD, ir); /* signal inv op */
				574	return UFT_NAN;
				575	}
				576	r->frac = (r->frac \| FPR_HB) << FPR_GUARD; /* ins hidden bit, guard */
				577	return UFT_FIN; /* finite */
				578	}
				579
				580	/* Normalize - input must be zero, finite, or denorm */
				581
				582	void ieee_norm (UFP *r)
				583	{
				584	int32 i;
				585	static t_uint64 normmask[5] = {
				586	0xc000000000000000, 0xf000000000000000, 0xff00000000000000,
				587	0xffff000000000000, 0xffffffff00000000
				588	};
				589	static int32 normtab[6] = { 1, 2, 4, 8, 16, 32 };
				590
				591	r->frac = r->frac & M64;
				592	if (r->frac == 0) { /* if fraction = 0 */
				593	r->sign = 0;
				594	r->exp = 0; /* result is 0 */
				595	return;
				596	}
				597	while ((r->frac & UF_NM) == 0) { /* normalized? */
				598	for (i = 0; i < 5; i++) { /* find first 1 */
				599	if (r->frac & normmask[i]) break;
				600	}
				601	r->frac = r->frac << normtab[i]; /* shift frac */
				602	r->exp = r->exp - normtab[i]; /* decr exp */
				603	}
				604	return;
				605	}
				606
				607	/* Round and pack
				608
				609	Much of the treachery of the IEEE standard is buried here
				610	- Rounding modes (chopped, +infinity, nearest, -infinity)
				611	- Inexact (set if there are any rounding bits, regardless of rounding)
				612	- Overflow (result is infinite if rounded, max if not)
				613	- Underflow (no denorms!)
				614
				615	Underflow handling is particularly complicated
				616	- Result is always 0
				617	- UNF and INE are always set in FPCR
				618	- If /U is set,
				619	o If /S is clear, trap
				620	o If /S is set, UNFD is set, but UNFZ is clear, ignore UNFD and
				621	trap, because the hardware cannot produce denormals
				622	o If /S is set, UNFD is set, and UNFZ is set, do not trap
				623	- If /SUI is set, and INED is clear, trap */
				624
				625	t_uint64 ieee_rpack (UFP *r, uint32 ir, uint32 dp)
				626	{
				627	static const t_uint64 stdrnd[2] = { UF_SRND, UF_TRND };
				628	static const t_uint64 infrnd[2] = { UF_SINF, UF_TINF };
				629	static const int32 expmax[2] = { T_BIAS - S_BIAS + S_M_EXP - 1, T_M_EXP - 1 };
				630	static const int32 expmin[2] = { T_BIAS - S_BIAS, 0 };
				631	t_uint64 rndadd, rndbits, res;
				632	uint32 rndm;
				633
Mark Pizzolato	66dba79	2015-03-30 10:24:24 -0700	[diff] [blame]	634	if (r->frac == 0) /* result 0? */
				635	return ((t_uint64) r->sign << FPR_V_SIGN);
Mark Pizzolato	fffad7c	2012-03-19 16:05:24 -0700	[diff] [blame]	636	rndm = I_GETFRND (ir); /* inst round mode */
				637	if (rndm == I_FRND_D) rndm = FPCR_GETFRND (fpcr); /* dynamic? use FPCR */
				638	rndbits = r->frac & infrnd[dp]; /* isolate round bits */
				639	if (rndm == I_FRND_N) rndadd = stdrnd[dp]; /* round to nearest? */
				640	else if (((rndm == I_FRND_P) && !r->sign) \|\| /* round to inf and */
				641	((rndm == I_FRND_M) && r->sign)) /* right sign? */
				642	rndadd = infrnd[dp];
				643	else rndadd = 0;
				644	r->frac = (r->frac + rndadd) & M64; /* round */
				645	if ((r->frac & UF_NM) == 0) { /* carry out? */
				646	r->frac = (r->frac >> 1) \| UF_NM; /* renormalize */
				647	r->exp = r->exp + 1;
				648	}
				649	if (rndbits) /* inexact? */
				650	ieee_trap (TRAP_INE, Q_SUI (ir), FPCR_INED, ir); /* set inexact */
				651	if (r->exp > expmax[dp]) { /* ovflo? */
				652	ieee_trap (TRAP_OVF, 1, FPCR_OVFD, ir); /* set overflow trap */
				653	ieee_trap (TRAP_INE, Q_SUI (ir), FPCR_INED, ir); /* set inexact */
				654	if (rndadd) /* did we round? */
				655	return (r->sign? FMINF: FPINF); /* return infinity */
				656	return (r->sign? FMMAX: FPMAX); /* no, return max */
				657	}
				658	if (r->exp <= expmin[dp]) { /* underflow? */
				659	ieee_trap (TRAP_UNF, ir & I_FTRP_U, /* set underflow trap */
				660	(fpcr & FPCR_UNDZ)? FPCR_UNFD: 0, ir); /* (dsbl only if UNFZ set) */
				661	ieee_trap (TRAP_INE, Q_SUI (ir), FPCR_INED, ir); /* set inexact */
				662	return 0; /* underflow to +0 */
				663	}
				664	res = (((t_uint64) r->sign) << FPR_V_SIGN) \| /* form result */
				665	(((t_uint64) r->exp) << FPR_V_EXP) \|
				666	((r->frac >> FPR_GUARD) & FPR_FRAC);
				667	if ((rndm == I_FRND_N) && (rndbits == stdrnd[dp])) /* nearest and halfway? */
				668	res = res & ~1; /* clear lo bit */
				669	return res;
				670	}
				671
				672	/* IEEE arithmetic trap - only one can be set at a time! */
				673
				674	void ieee_trap (uint32 trap, uint32 instenb, uint32 fpcrdsb, uint32 ir)
				675	{
				676	fpcr = fpcr \| (trap << 19); /* FPCR to trap summ offset */
				677	if ((instenb == 0) \|\| /* not enabled in inst? ignore */
				678	((ir & I_FTRP_S) && (fpcr & fpcrdsb))) return; /* /S and disabled? ignore */
				679	arith_trap (trap, ir); /* set Alpha trap */
				680	return;
				681	}
				682
				683	/* Fraction square root routine - code from SoftFloat */
				684
				685	t_uint64 fsqrt64 (t_uint64 asig, int32 exp)
				686	{
				687	t_uint64 zsig, remh, reml, t;
				688	uint32 sticky = 0;
				689
				690	zsig = estimateSqrt32 (exp, (uint32) (asig >> 32));
				691
				692	/* Calculate the final answer in two steps. First, do one iteration of
				693	Newton's approximation. The divide-by-2 is accomplished by clever
				694	positioning of the operands. Then, check the bits just below the
				695	(double precision) rounding bit to see if they are close to zero
				696	(that is, the rounding bits are close to midpoint). If so, make
				697	sure that the result^2 is <below> the input operand */
				698
				699	asig = asig >> ((exp & 1)? 3: 2); /* leave 2b guard */
				700	zsig = estimateDiv128 (asig, 0, zsig << 32) + (zsig << 30 );
				701	if ((zsig & 0x1FF) <= 5) { /* close to even? */
				702	reml = uemul64 (zsig, zsig, &remh); /* result^2 */
				703	remh = asig - remh - (reml? 1:0); /* arg - result^2 */
				704	reml = NEG_Q (reml);
				705	while (Q_GETSIGN (remh) != 0) { /* if arg < result^2 */
				706	zsig = zsig - 1; /* decr result */
				707	t = (zsig << 1) \| 1; /* incr result^2 */
				708	reml = reml + t; /* and retest */
				709	remh = remh + (zsig >> 63) + ((reml < t)? 1: 0);
				710	}
				711	if ((remh \| reml) != 0 ) sticky = 1; /* not exact? */
				712	}
				713	return zsig;
				714	}
				715
				716	/* Estimate 32b SQRT
				717
				718	Calculate an approximation to the square root of the 32-bit significand given
				719	by 'a'. Considered as an integer, 'a' must be at least 2^31. If bit 0 of
				720	'exp' (the least significant bit) is 1, the integer returned approximates
				721	2^31*sqrt('a'/2^31), where 'a' is considered an integer. If bit 0 of 'exp'
				722	is 0, the integer returned approximates 2^31*sqrt('a'/2^30). In either
				723	case, the approximation returned lies strictly within +/-2 of the exact
				724	value. */
				725
				726	uint32 estimateSqrt32 (uint32 exp, uint32 a)
				727	{
				728	uint32 index, z;
				729	static const uint32 sqrtOdd[] = {
				730	0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
				731	0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
				732	};
				733	static const uint32 sqrtEven[] = {
				734	0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
				735	0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
				736	};
				737
				738	index = (a >> 27) & 0xF; /* bits<30:27> */
				739	if (exp & 1) { /* odd exp? */
				740	z = 0x4000 + (a >> 17) - sqrtOdd[index]; /* initial guess */
				741	z = ((a / z) << 14) + (z << 15); /* Newton iteration */
				742	a = a >> 1;
				743	}
				744	else {
				745	z = 0x8000 + (a >> 17) - sqrtEven[index]; /* initial guess */
				746	z = (a / z) + z; /* Newton iteration */
				747	z = (z >= 0x20000) ? 0xFFFF8000: (z << 15);
				748	if (z <= a) z = (a >> 1) \| 0x80000000;
				749	}
				750	return (uint32) ((((((t_uint64) a) << 31) / ((t_uint64) z)) + (z >> 1)) & M32);
				751	}
				752
				753	/* Estimate 128b unsigned divide */
				754
				755	t_uint64 estimateDiv128 (t_uint64 a0, t_uint64 a1, t_uint64 b)
				756	{
				757	t_uint64 b0, b1;
				758	t_uint64 rem0, rem1, term0, term1;
				759	t_uint64 z;
				760
				761	if (b <= a0) return 0xFFFFFFFFFFFFFFFF;
				762	b0 = b >> 32;
				763	z = ((b0 << 32) <= a0)? 0xFFFFFFFF00000000: ((a0 / b0) << 32);
				764	term1 = uemul64 (b, z, &term0);
				765	rem0 = a0 - term0 - (a1 < term1);
				766	rem1 = a1 - term1;
				767	while (Q_GETSIGN (rem0)) {
				768	z = z - ((t_uint64) 0x100000000);
				769	b1 = b << 32;
				770	rem1 = b1 + rem1;
				771	rem0 = b0 + rem0 + (rem1 < b1);
				772	}
				773	rem0 = (rem0 << 32) \| (rem1 >> 32);
				774	z \|= (((b0 << 32) <= rem0)? 0xFFFFFFFF : (rem0 / b0));
				775	return z;
				776	}