| /* hp2100_fp1.c: HP 1000 multiple-precision floating point routines | |
| Copyright (c) 2005-2008, J. David Bryan | |
| Permission is hereby granted, free of charge, to any person obtaining a | |
| copy of this software and associated documentation files (the "Software"), | |
| to deal in the Software without restriction, including without limitation | |
| the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
| and/or sell copies of the Software, and to permit persons to whom the | |
| Software is furnished to do so, subject to the following conditions: | |
| The above copyright notice and this permission notice shall be included in | |
| all copies or substantial portions of the Software. | |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
| THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |
| IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
| CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
| Except as contained in this notice, the name of the author shall not be used | |
| in advertising or otherwise to promote the sale, use or other dealings in | |
| this Software without prior written authorization from the author. | |
| 08-Jun-08 JDB Quieted bogus gcc warning in fp_exec | |
| 10-May-08 JDB Fixed uninitialized return in fp_accum when setting | |
| 19-Mar-08 JDB Reworked "complement" to avoid inlining bug in gcc-4.x | |
| 01-Dec-06 JDB Reworked into generalized multiple-precision ops for FPP | |
| 12-Oct-06 JDB Altered x_trun for F-Series FFP compatibility | |
| Added F-Series ..TCM FFP helpers | |
| Primary references: | |
| - HP 1000 M/E/F-Series Computers Engineering and Reference Documentation | |
| (92851-90001, Mar-1981) | |
| - HP 1000 M/E/F-Series Computers Technical Reference Handbook | |
| (5955-0282, Mar-1980) | |
| - DOS/RTE Relocatable Library Reference Manual | |
| (24998-90001, Oct-1981) | |
| This module implements multiple-precision floating-point operations to | |
| support the 1000 F-Series hardware Floating Point Processor. It employs | |
| 64-bit integer arithmetic for speed and simplicity of implementation. The | |
| host compiler must support 64-bit integers, and the HAVE_INT64 symbol must be | |
| defined during compilation. If this symbol is not defined, then FPP support | |
| is not available. | |
| HP 2100/1000 computers used a proprietary floating-point format. The 2100 | |
| had optional firmware that provided two-word floating-point add, subtract, | |
| multiply, and divide, as well as single-integer fix and float. The 1000-M/E | |
| provided the same two-word firmware operations as standard equipment. | |
| Three-word extended-precision instructions for the 2100 and 1000-M/E were | |
| provided by the optional Fast FORTRAN Processor firmware. | |
| The 1000-F substituted a hardware floating point processor for the firmware | |
| in previous machines. In addition to the two- and three-word formats, the | |
| F-Series introduced a four-word double-precision format. A five-word format | |
| that provided extra range in the exponent by unpacking it from the mantissa | |
| was also provided, although this capability was not documented in the user | |
| manual. In addition, the FPP improved the accuracy of floating-point | |
| calculations, as the firmware versions sacrificed a few bits of precision to | |
| gain speed. Consequently, operations on the F-Series may return results that | |
| differ slightly from the same operations on the M/E-Series or the 2100. | |
| F-Series units after date code 1920 also provided two-word double-integer | |
| instructions in firmware, as well as double-integer fix and float operations. | |
| The original 32-bit floating-point format is as follows: | |
| 15 14 0 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| |MS| mantissa high | : M | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa low | exponent |XS| : M + 1 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| 15 8 7 1 0 | |
| Both 23-bit mantissa and 7-bit exponent are in twos-complement form. The | |
| exponent sign bit has been rotated into the LSB of the second word. | |
| The extended-precision floating-point format is a 48-bit extension of the | |
| 32-bit format used for single precision. A packed extended-precision value | |
| consists of a 39-bit mantissa and a 7-bit exponent. The format is as | |
| follows: | |
| 15 14 0 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| |MS| mantissa high | : M | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa middle | : M + 1 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa low | exponent |XS| : M + 2 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| 15 8 7 1 0 | |
| The double-precision floating-point format is similar to the 48-bit | |
| extended-precision format, although with a 55-bit mantissa: | |
| 15 14 0 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| |MS| mantissa high | : M | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa middle high | : M + 1 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa middle low | : M + 2 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa low | exponent |XS| : M + 3 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| 15 8 7 1 0 | |
| The FPP also supports a special five-word expanded-exponent format: | |
| 15 14 0 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| |MS| mantissa high | : M | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa middle high | : M + 1 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa middle low | : M + 2 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | mantissa low | : M + 3 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| | exponent |XS| : M + 4 | |
| +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | |
| 15 8 7 1 0 | |
| The exponent is a full 16-bit twos-complement value, but the allowed range is | |
| only 10 bits, i.e., -512 to +511. | |
| In a normalized value, the sign and MSB of the mantissa differ. Zero is | |
| represented by all words = 0. | |
| Internally, unpacked floating-point values are contained in a structure | |
| having a signed 64-bit mantissa and a signed 32-bit exponent. Mantissas are | |
| left-justified with the unused bits masked to zero. Exponents are | |
| right-justified. The precision is indicated by the value of a structure | |
| field. | |
| HP terminology for the three-word floating-point format is confused. Some | |
| documents refer to it as "double precision," while others use "extended | |
| precision." The instruction mnemonics begin with "X" (e.g., .XADD), | |
| suggesting the extended-precision term. | |
| HP apparently intended that the four-word double-precision format would be | |
| called "triple-precision," as the instruction mnemonics begin with "T" (e.g., | |
| ".TADD" for the four-word add instruction). The source files for the | |
| software simulations of these instructions for the M/E-Series also explicitly | |
| refer to "triple precision math." However, the engineering documentation and | |
| the F-Series reference manual both use the double-precision term. | |
| This module adopts the single/extended/double terminology and uses the | |
| initial letters of the instructions (F/X/T) to indicate the precision used. | |
| The FPP hardware consisted of two circuit boards that interfaced to the main | |
| CPU via the Microprogammable Processor Port (MPP) that had been introduced | |
| with the 1000 E-Series. One board contained argument registers and ALUs, | |
| split into separate mantissa and exponent parts. The other contained a state | |
| machine sequencer. FPP results were copied automatically to the argument | |
| registers in addition to being available over the MPP, so that chained | |
| operations could be executed from these "accumulators" without reloading. | |
| The FPP operated independently of the CPU. An opcode, specifying one of the | |
| six operations (add, subtract, multiply, divide, fix, or float) was sent to | |
| the FPP, and a start command was given. Operands of appropriate precision | |
| were then supplied to the FPP. Once the operands were received, the FPP | |
| would execute and set a flag when the operation was complete. The result | |
| would then be retrieved from the FPP. The floating-point instruction | |
| firmware in the CPU initiated the desired FPP operation and handled operand | |
| reads from and result writes to main memory. | |
| Under simulation, "fp_exec" provides the six arithmetic operations analogous | |
| to FPP execution. The remainder of the functions are helpers that were | |
| provided by firmware in the 1000-F but that can reuse code needed to simulate | |
| the FPP hardware. As with the hardware, "fp_exec" retains the last result | |
| in an internal accumulator that may be referenced in subsequent operations. | |
| NOTE: this module also provides the floating-point support for the firmware | |
| single-precision 1000-M/E base set and extended-precision FFP instructions. | |
| Because the firmware and hardware implementations returned slightly different | |
| results, particularly with respect to round-off, conditional checks are | |
| implemented in the arithmetic routines. In some cases, entirely different | |
| algorithms are used to ensure fidelity with the real machines. Functionally, | |
| this means that the 2100/1000-M/E and 1000-F floating-point diagnostics are | |
| not interchangeable, and failures are to be expected if a diagnostic is run | |
| on the wrong machine. | |
| */ | |
| #include "hp2100_defs.h" | |
| #include "hp2100_cpu.h" | |
| #include "hp2100_cpu1.h" | |
| #include "hp2100_fp1.h" | |
| #if defined (HAVE_INT64) /* we need int64 support */ | |
| /* Field widths. */ | |
| #define IN_W_SIGN 1 | |
| #define IN_W_SMAGN 15 | |
| #define IN_W_DMAGN 31 | |
| #define FP_W_MSIGN 1 | |
| #define FP_W_FMANT 23 | |
| #define FP_W_XMANT 39 | |
| #define FP_W_TMANT 55 | |
| #define FP_W_EMANT 55 | |
| #define FP_W_EXPANDEXP 9 | |
| #define FP_W_EXP 7 | |
| #define FP_W_ESIGN 1 | |
| /* Starting bit numbers. */ | |
| #define IN_V_SIGN (64 - IN_W_SIGN) | |
| #define IN_V_SNUM (64 - IN_W_SIGN - IN_W_SMAGN) | |
| #define IN_V_DNUM (64 - IN_W_SIGN - IN_W_DMAGN) | |
| #define FP_V_FNUM (64 - FP_W_MSIGN - FP_W_FMANT - FP_W_EXP - FP_W_ESIGN) | |
| #define FP_V_XNUM (64 - FP_W_MSIGN - FP_W_XMANT - FP_W_EXP - FP_W_ESIGN) | |
| #define FP_V_TNUM (64 - FP_W_MSIGN - FP_W_TMANT - FP_W_EXP - FP_W_ESIGN) | |
| #define FP_V_ENUM (64 - FP_W_MSIGN - FP_W_EMANT - FP_W_EXP - FP_W_ESIGN) | |
| #define FP_V_MSIGN (64 - FP_W_MSIGN) | |
| #define FP_V_FMANT (64 - FP_W_MSIGN - FP_W_FMANT) | |
| #define FP_V_XMANT (64 - FP_W_MSIGN - FP_W_XMANT) | |
| #define FP_V_TMANT (64 - FP_W_MSIGN - FP_W_TMANT) | |
| #define FP_V_EMANT (64 - FP_W_MSIGN - FP_W_EMANT) | |
| #define FP_V_EXP 1 | |
| #define FP_V_ESIGN 0 | |
| /* Right-aligned field masks. */ | |
| #define IN_M_SIGN (((t_uint64) 1 << IN_W_SIGN) - 1) | |
| #define IN_M_SMAGN (((t_uint64) 1 << IN_W_SMAGN) - 1) | |
| #define IN_M_DMAGN (((t_uint64) 1 << IN_W_DMAGN) - 1) | |
| #define FP_M_MSIGN (((t_uint64) 1 << FP_W_MSIGN) - 1) | |
| #define FP_M_FMANT (((t_uint64) 1 << FP_W_FMANT) - 1) | |
| #define FP_M_XMANT (((t_uint64) 1 << FP_W_XMANT) - 1) | |
| #define FP_M_TMANT (((t_uint64) 1 << FP_W_TMANT) - 1) | |
| #define FP_M_EMANT (((t_uint64) 1 << FP_W_EMANT) - 1) | |
| #define FP_M_EXPANDEXP ((1 << FP_W_EXPANDEXP) - 1) | |
| #define FP_M_EXP ((1 << FP_W_EXP) - 1) | |
| #define FP_M_ESIGN ((1 << FP_W_ESIGN) - 1) | |
| /* In-place field masks. */ | |
| #define IN_SIGN (IN_M_SIGN << IN_V_SIGN) | |
| #define IN_SMAGN (IN_M_SMAGN << IN_V_SNUM) | |
| #define IN_DMAGN (IN_M_DMAGN << IN_V_DNUM) | |
| #define FP_MSIGN (FP_M_MSIGN << FP_V_MSIGN) | |
| #define FP_FMANT (FP_M_FMANT << FP_V_FMANT) | |
| #define FP_XMANT (FP_M_XMANT << FP_V_XMANT) | |
| #define FP_TMANT (FP_M_TMANT << FP_V_TMANT) | |
| #define FP_EMANT (FP_M_EMANT << FP_V_EMANT) | |
| #define FP_EXP (FP_M_EXP << FP_V_EXP) | |
| #define FP_ESIGN (FP_M_ESIGN << FP_V_ESIGN) | |
| /* In-place record masks. */ | |
| #define IN_SSMAGN (IN_SIGN | IN_SMAGN) | |
| #define IN_SDMAGN (IN_SIGN | IN_DMAGN) | |
| #define FP_SFMANT (FP_MSIGN | FP_FMANT) | |
| #define FP_SXMANT (FP_MSIGN | FP_XMANT) | |
| #define FP_STMANT (FP_MSIGN | FP_TMANT) | |
| #define FP_SEMANT (FP_MSIGN | FP_EMANT) | |
| #define FP_SEXP (FP_ESIGN | FP_EXP) | |
| /* Minima and maxima. */ | |
| #define FP_ONEHALF ((t_int64) 1 << (FP_V_MSIGN - 1)) /* mantissa = 0.5 */ | |
| #define FP_MAXPMANT ((t_int64) FP_EMANT) /* maximum pos mantissa */ | |
| #define FP_MAXNMANT ((t_int64) FP_MSIGN) /* maximum neg mantissa */ | |
| #define FP_MAXPEXP (FP_M_EXPANDEXP) /* maximum pos expanded exponent */ | |
| #define FP_MAXNEXP (-(FP_MAXPEXP + 1)) /* maximum neg expanded exponent */ | |
| /* Floating-point helpers. */ | |
| #define DENORM(x) ((((x) ^ (x) << 1) & FP_MSIGN) == 0) | |
| #define TO_EXP(e) (int8) ((e >> FP_V_EXP & FP_M_EXP) | \ | |
| (e & FP_M_ESIGN ? ~FP_M_EXP : 0)) | |
| /* Property constants. */ | |
| static const t_int64 p_half_lsb[6] = { ((t_int64) 1 << IN_V_SNUM) - 1, /* different than FP! */ | |
| ((t_int64) 1 << IN_V_DNUM) - 1, /* different than FP! */ | |
| (t_int64) 1 << (FP_V_FMANT - 1), | |
| (t_int64) 1 << (FP_V_XMANT - 1), | |
| (t_int64) 1 << (FP_V_TMANT - 1), | |
| (t_int64) 1 << (FP_V_EMANT - 1) }; | |
| static const t_int64 n_half_lsb[6] = { 0, | |
| 0, | |
| ((t_int64) 1 << (FP_V_FMANT - 1)) - 1, | |
| ((t_int64) 1 << (FP_V_XMANT - 1)) - 1, | |
| ((t_int64) 1 << (FP_V_TMANT - 1)) - 1, | |
| ((t_int64) 1 << (FP_V_EMANT - 1)) - 1 }; | |
| static const uint32 op_start[6] = { IN_V_SNUM, | |
| IN_V_DNUM, | |
| FP_V_FMANT, | |
| FP_V_XMANT, | |
| FP_V_TMANT, | |
| FP_V_EMANT }; | |
| static const t_int64 mant_mask[6] = { IN_SSMAGN, | |
| IN_SDMAGN, | |
| FP_SFMANT, | |
| FP_SXMANT, | |
| FP_STMANT, | |
| FP_SEMANT }; | |
| static const uint32 op_bits[6] = { IN_W_SMAGN, | |
| IN_W_DMAGN, | |
| FP_W_FMANT + FP_W_MSIGN, | |
| FP_W_XMANT + FP_W_MSIGN, | |
| FP_W_TMANT + FP_W_MSIGN, | |
| FP_W_EMANT + FP_W_MSIGN }; | |
| static const t_int64 op_mask[6] = { ~(t_int64) 0 << IN_V_SNUM, | |
| ~(t_int64) 0 << IN_V_DNUM, | |
| ~(t_int64) 0 << FP_V_FNUM, | |
| ~(t_int64) 0 << FP_V_XNUM, | |
| ~(t_int64) 0 << FP_V_TNUM, | |
| ~(t_int64) 0 << FP_V_ENUM }; | |
| static const uint32 int_p_max[2] = { IN_M_SMAGN, | |
| IN_M_DMAGN }; | |
| /* Internal unpacked floating-point representation. */ | |
| typedef struct { | |
| t_int64 mantissa; | |
| int32 exponent; | |
| OPSIZE precision; | |
| } FPU; | |
| /* Low-level helper routines. */ | |
| /* Arithmetic shift right for mantissa only. | |
| Returns TRUE if any one-bits are shifted out (for F-series only). | |
| */ | |
| static t_bool asr (FPU *operand, int32 shift) | |
| { | |
| t_uint64 mask; | |
| t_bool bits_lost; | |
| if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */ | |
| mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */ | |
| bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */ | |
| } | |
| else | |
| bits_lost = FALSE; | |
| operand->mantissa = operand->mantissa >> shift; /* mantissa is int, so ASR */ | |
| return bits_lost; | |
| } | |
| /* Logical shift right for mantissa and exponent. | |
| Shifts mantissa and corrects exponent for mantissa overflow. | |
| Returns TRUE if any one-bits are shifted out (for F-series only). | |
| */ | |
| static t_bool lsrx (FPU *operand, int32 shift) | |
| { | |
| t_uint64 mask; | |
| t_bool bits_lost; | |
| if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */ | |
| mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */ | |
| bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */ | |
| } | |
| else | |
| bits_lost = FALSE; | |
| operand->mantissa = (t_uint64) operand->mantissa >> shift; /* uint, so LSR */ | |
| operand->exponent = operand->exponent + shift; /* correct exponent */ | |
| return bits_lost; | |
| } | |
| /* Unpack an operand into a long integer. | |
| Returns a left-aligned integer or mantissa. Does not mask to precision; this | |
| should be done subsequently if desired. | |
| */ | |
| static t_int64 unpack_int (OP packed, OPSIZE precision) | |
| { | |
| uint32 i; | |
| t_uint64 unpacked = 0; | |
| if (precision == in_s) | |
| unpacked = (t_uint64) packed.word << 48; /* unpack single integer */ | |
| else if (precision == in_d) | |
| unpacked = (t_uint64) packed.dword << 32; /* unpack double integer */ | |
| else { | |
| if (precision == fp_e) /* five word operand? */ | |
| precision = fp_t; /* only four mantissa words */ | |
| for (i = 0; i < 4; i++) /* unpack fp 2 to 4 words */ | |
| if (i < TO_COUNT (precision)) | |
| unpacked = unpacked << 16 | packed.fpk[i]; | |
| else | |
| unpacked = unpacked << 16; | |
| } | |
| return (t_int64) unpacked; | |
| } | |
| /* Unpack a packed operand. | |
| The packed value is split into separate mantissa and exponent variables. The | |
| multiple words of the mantissa are concatenated into a single 64-bit signed | |
| value, and the exponent is shifted with recovery of the sign. | |
| */ | |
| static FPU unpack (OP packed, OPSIZE precision) | |
| { | |
| FPU unpacked; | |
| unpacked.precision = precision; /* set value's precision */ | |
| unpacked.mantissa = /* unpack and mask mantissa */ | |
| unpack_int (packed, precision) & mant_mask[precision]; | |
| switch (precision) { | |
| case fp_f: | |
| case fp_x: | |
| case fp_t: | |
| unpacked.exponent = /* unpack exponent from correct word */ | |
| TO_EXP (packed.fpk[(uint32) precision - 1]); | |
| break; | |
| case fp_e: | |
| unpacked.exponent = /* unpack expanded exponent */ | |
| (int16) (packed.fpk[4] >> FP_V_EXP | /* rotate sign into place */ | |
| (packed.fpk[4] & 1 ? SIGN : 0)); | |
| break; | |
| case fp_a: /* no action for value in accum */ | |
| case in_s: /* integers don't use exponent */ | |
| case in_d: /* integers don't use exponent */ | |
| default: | |
| unpacked.exponent = 0; | |
| break; | |
| } | |
| return unpacked; | |
| } | |
| /* Pack a long integer into an operand. */ | |
| static OP pack_int (t_int64 unpacked, OPSIZE precision) | |
| { | |
| int32 i; | |
| OP packed; | |
| if (precision == in_s) | |
| packed.word = (uint16) (unpacked >> 48) & DMASK; /* pack single integer */ | |
| else if (precision == in_d) | |
| packed.dword = (uint32) (unpacked >> 32) & DMASK32; /* pack double integer */ | |
| else { | |
| if (precision == fp_e) /* five word operand? */ | |
| precision = fp_t; /* only four mantissa words */ | |
| for (i = 3; i >= 0; i--) { /* pack fp 2 to 4 words */ | |
| packed.fpk[i] = (uint16) unpacked & DMASK; | |
| unpacked = unpacked >> 16; | |
| } | |
| } | |
| return packed; | |
| } | |
| /* Pack an unpacked floating-point number. | |
| The 64-bit mantissa is split into the appropriate number of 16-bit words. | |
| The exponent is rotated to incorporate the sign bit and merged into the | |
| appropriate word. | |
| */ | |
| static OP pack (FPU unpacked) | |
| { | |
| OP packed; | |
| uint8 exp; | |
| packed = pack_int (unpacked.mantissa, unpacked.precision); /* pack mantissa */ | |
| exp = ((uint8) unpacked.exponent << FP_V_EXP) | /* rotate exponent */ | |
| ((unpacked.exponent < 0) << FP_V_ESIGN); | |
| switch (unpacked.precision) { /* merge exponent into correct word */ | |
| case in_s: /* no action for integers */ | |
| case in_d: | |
| break; | |
| case fp_f: /* merge into last word */ | |
| case fp_x: | |
| case fp_t: | |
| packed.fpk[(uint32) unpacked.precision - 1] = | |
| (packed.fpk[(uint32) unpacked.precision - 1] & ~FP_SEXP) | exp; | |
| break; | |
| case fp_e: /* place in separate word */ | |
| packed.fpk[4] = ((uint16) unpacked.exponent << FP_V_EXP) | | |
| ((unpacked.exponent < 0) << FP_V_ESIGN); | |
| break; | |
| case fp_a: /* no action for value in accum */ | |
| break; | |
| } | |
| return packed; | |
| } | |
| /* Normalize an unpacked floating-point number. | |
| Floating-point numbers are in normal form if the sign bit and the MSB of the | |
| mantissa differ. Unnormalized numbers are shifted as needed with appropriate | |
| exponent modification. | |
| */ | |
| static void normalize (FPU *unpacked) | |
| { | |
| if (unpacked->mantissa) /* non-zero? */ | |
| while (DENORM (unpacked->mantissa)) { /* normal form? */ | |
| unpacked->exponent = unpacked->exponent - 1; /* no, so left shift */ | |
| unpacked->mantissa = unpacked->mantissa << 1; | |
| } | |
| else | |
| unpacked->exponent = 0; /* clean for zero */ | |
| return; | |
| } | |
| /* Round an unpacked floating-point number and check for overflow. | |
| An unpacked floating-point number is rounded by adding one-half of the LSB | |
| value, maintaining symmetry around zero. If rounding resulted in a mantissa | |
| overflow, the result logically is shifted to the right with an appropriate | |
| exponent modification. Finally, the result is checked for exponent underflow | |
| or overflow, and the appropriate approximation (zero or infinity) is | |
| returned. | |
| Rounding in hardware involves a special mantissa extension register that | |
| holds three "guard" bits and one "sticky" bit. These represent the value of | |
| bits right-shifted out the mantissa register. Under simulation, we track | |
| such right-shifts and utilize the lower eight bits of the 64-bit mantissa | |
| value to simulate the extension register. | |
| Overflow depends on whether the FPP expanded-exponent form is being used | |
| (this expands the exponent range by two bits). If overflow is detected, the | |
| value representing infinity is dependent on whether the operation is on | |
| behalf of the Fast FORTRAN Processor. The F-Series FPP returns positive | |
| infinity on both positive and negative overflow for all precisions. The 2100 | |
| and M/E-Series FFPs return negative infinity on negative overflow of | |
| extended-precision values. Single-precision overflows on these machines | |
| always return positive infinity. | |
| The number to be rounded must be normalized upon entry. | |
| */ | |
| static uint32 roundovf (FPU *unpacked, t_bool expand) | |
| { | |
| uint32 overflow; | |
| t_bool sign; | |
| sign = (unpacked->mantissa < 0); /* save mantissa sign */ | |
| if (sign) /* round and mask the number */ | |
| unpacked->mantissa = | |
| (unpacked->mantissa + n_half_lsb[unpacked->precision]) & | |
| mant_mask[unpacked->precision]; | |
| else | |
| unpacked->mantissa = | |
| (unpacked->mantissa + p_half_lsb[unpacked->precision]) & | |
| mant_mask[unpacked->precision]; | |
| if (sign != (unpacked->mantissa < 0)) /* mantissa overflow? */ | |
| lsrx (unpacked, 1); /* correct by shifting */ | |
| else | |
| normalize (unpacked); /* renorm may be needed */ | |
| if (unpacked->mantissa == 0) { /* result zero? */ | |
| unpacked->mantissa = 0; /* return zero */ | |
| unpacked->exponent = 0; | |
| overflow = 0; /* with overflow clear */ | |
| } | |
| else if (unpacked->exponent < /* result underflow? */ | |
| (FP_MAXNEXP >> (expand ? 0 : 2))) { | |
| unpacked->mantissa = 0; /* return zero */ | |
| unpacked->exponent = 0; | |
| overflow = 1; /* and set overflow */ | |
| } | |
| else if (unpacked->exponent > /* result overflow? */ | |
| (FP_MAXPEXP >> (expand ? 0 : 2))) { | |
| if (sign && /* negative value? */ | |
| (unpacked->precision == fp_x) && /* extended precision? */ | |
| (UNIT_CPU_MODEL != UNIT_1000_F)) { /* not F-series? */ | |
| unpacked->mantissa = FP_MAXNMANT; /* return negative infinity */ | |
| unpacked->exponent = FP_MAXPEXP & FP_M_EXP; | |
| } | |
| else { | |
| unpacked->mantissa = FP_MAXPMANT; /* return positive infinity */ | |
| unpacked->exponent = FP_MAXPEXP & FP_M_EXP; | |
| } | |
| overflow = 1; /* and set overflow */ | |
| } | |
| else | |
| overflow = 0; /* value is in range */ | |
| return overflow; | |
| } | |
| /* Normalize, round, and pack an unpacked floating-point number. */ | |
| static uint32 nrpack (OP *packed, FPU unpacked, t_bool expand) | |
| { | |
| uint32 overflow; | |
| normalize (&unpacked); /* normalize for rounding */ | |
| overflow = roundovf (&unpacked, expand); /* round and check for overflow */ | |
| *packed = pack (unpacked); /* pack result */ | |
| return overflow; | |
| } | |
| /* Low-level arithmetic routines. */ | |
| /* Complement an unpacked number. */ | |
| static void complement (FPU *result) | |
| { | |
| if (result->mantissa == FP_MAXNMANT) { /* maximum negative? */ | |
| result->mantissa = FP_ONEHALF; /* complement of -1.0 * 2 ^ n */ | |
| result->exponent = result->exponent + 1; /* is 0.5 * 2 ^ (n + 1) */ | |
| } | |
| else | |
| result->mantissa = -result->mantissa; /* negate mantissa */ | |
| return; | |
| } | |
| /* Add two unpacked numbers. | |
| The mantissas are first aligned if necessary by scaling the smaller of the | |
| two operands. If the magnitude of the difference between the exponents is | |
| greater than the number of significant bits, then the smaller number has been | |
| scaled to zero (swamped), and so the sum is simply the larger operand. | |
| Otherwise, the sum is computed and checked for overflow, which has occured if | |
| the signs of the operands are the same but differ from that of the result. | |
| Scaling and renormalization is perfomed if overflow occurred. | |
| */ | |
| static void add (FPU *sum, FPU augend, FPU addend) | |
| { | |
| int32 magn; | |
| t_bool bits_lost; | |
| if (augend.mantissa == 0) | |
| *sum = addend; /* X + 0 = X */ | |
| else if (addend.mantissa == 0) | |
| *sum = augend; /* 0 + X = X */ | |
| else { | |
| magn = augend.exponent - addend.exponent; /* difference exponents */ | |
| if (magn > 0) { /* addend smaller? */ | |
| *sum = augend; /* preset augend */ | |
| bits_lost = asr (&addend, magn); /* align addend */ | |
| } | |
| else { /* augend smaller? */ | |
| *sum = addend; /* preset addend */ | |
| magn = -magn; /* make difference positive */ | |
| bits_lost = asr (&augend, magn); /* align augend */ | |
| } | |
| if (magn <= (int32) op_bits[augend.precision]) { /* value swamped? */ | |
| sum->mantissa = /* no, add mantissas */ | |
| addend.mantissa + augend.mantissa; | |
| if (((addend.mantissa < 0) == (augend.mantissa < 0)) && /* mantissa overflow? */ | |
| ((addend.mantissa < 0) != (sum->mantissa < 0))) { | |
| bits_lost = bits_lost | lsrx (sum, 1); /* restore value */ | |
| sum->mantissa = /* restore sign */ | |
| sum-> mantissa | (addend.mantissa & FP_MSIGN); | |
| } | |
| if (bits_lost) /* any bits lost? */ | |
| sum->mantissa = sum->mantissa | 1; /* include one for rounding */ | |
| } | |
| } | |
| return; | |
| } | |
| /* Multiply two unpacked numbers. | |
| The single-precision firmware (FMP) operates differently from the firmware | |
| extended-precision (.XMPY) and the hardware multiplies of any precision. | |
| Firmware implementations form 16-bit x 16-bit = 32-bit partial products and | |
| sum them to form the result. The hardware uses a series of shifts and adds. | |
| This means that firmware FMP and hardware FMP return slightly different | |
| values, as may be seen by attempting to run the firmware FMP diagnostic on | |
| the FPP. | |
| The FMP microcode calls a signed multiply routine to calculate three partial | |
| products (all but LSB * LSB). Because the LSBs are unsigned, i.e., all bits | |
| significant, the two MSB * LSB products are calculated using LSB/2. The | |
| unsigned right-shift ensures a positive LSB with no significant bits lost, | |
| because the lower eight bits are unused (they held the vacated exponent). In | |
| order to sum the partial products, the LSB of the result of MSB * MSB is also | |
| right-shifted before addition. Note, though, that this loses a significant | |
| bit. After summation, the result is left-shifted to correct for the original | |
| right shifts. | |
| The .XMPY microcode negates both operands as necessary to produce positive | |
| values and then forms six of the nine 16-bit x 16-bit = 32-bit unsigned | |
| multiplications required for a full 96-bit product. Given a 48-bit | |
| multiplicand "a1a2a3" and a 48-bit multiplier "b1b2b3", the firmware performs | |
| these calculations to develop a 48-bit product: | |
| a1 a2 a3 | |
| +-------+-------+-------+ | |
| b1 b2 b3 | |
| +-------+-------+-------+ | |
| _________________________ | |
| a1 * b3 [p1] | |
| +-------+-------+ | |
| a2 * b2 [p2] | |
| +-------+-------+ | |
| a1 * b2 [p3] | |
| +-------+-------+ | |
| a3 * b1 [p4] | |
| +-------+-------+ | |
| a2 * b1 [p5] | |
| +-------+-------+ | |
| a1 * b1 [p6] | |
| +-------+-------+ | |
| _________________________________ | |
| product | |
| +-------+-------+-------+ | |
| The least-significant words of partial products [p1], [p2], and [p4] are used | |
| only to develop a carry bit into the 48-bit sum. The product is complemented | |
| as necessary to restore the sign. | |
| The basic FPP hardware algorithm scans the multiplier and adds a shifted copy | |
| of the multiplicand whenever a one-bit is detected. To avoid successive adds | |
| when a string of ones is encountered (because adds are more expensive than | |
| shifts), the hardware instead adds the multiplicand shifted by N+1+P and | |
| subtracts the multiplicand shifted by P to obtain the equivalent value with a | |
| maximum of two operations. | |
| Instead of implementing either the .XMPY firmware algorithm or the hardware | |
| shift-and-add algorithm directly, it is more efficient under simulation to | |
| use 32 x 32 = 64-bit multiplications, thereby reducing the number required | |
| from six to four (64-bit "c1c2" x 64-bit "d1d2"): | |
| ah al | |
| +-------+-------+ | |
| bh bl | |
| +-------+-------+ | |
| _________________ | |
| al * bl [ll] | |
| +-------+-------+ | |
| ah * bl [hl] | |
| +-------+-------+ | |
| al * bh [lh] | |
| +-------+-------+ | |
| ah * bh [hh] | |
| +-------+-------+ | |
| _________________________________ | |
| product | |
| +-------+-------+ | |
| However, the FMP algorithm is implemented directly from the microcode to | |
| preserve the fidelity of the simulation, i.e., to lose the same amount | |
| of precision. | |
| */ | |
| static void multiply (FPU *product, FPU multiplicand, FPU multiplier) | |
| { | |
| uint32 ah, al, bh, bl, sign = 0; | |
| t_uint64 hh, hl, lh, ll, carry; | |
| int16 ch, cl, dh, dl; | |
| t_bool firmware; | |
| product->precision = multiplicand.precision; /* set precision */ | |
| if ((multiplicand.mantissa == 0) || /* 0 * X = 0 */ | |
| (multiplier.mantissa == 0)) /* X * 0 = 0 */ | |
| product->mantissa = product->exponent = 0; | |
| else { | |
| firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */ | |
| if (!firmware || (product->precision != fp_f)) { /* hardware? */ | |
| if (multiplicand.mantissa < 0) { /* negative? */ | |
| complement (&multiplicand); /* complement operand */ | |
| sign = ~sign; /* track sign */ | |
| } | |
| if (multiplier.mantissa < 0) { /* negative? */ | |
| complement (&multiplier); /* complement operand */ | |
| sign = ~sign; /* track sign */ | |
| } | |
| } | |
| product->exponent = /* compute exponent */ | |
| multiplicand.exponent + multiplier.exponent + 1; | |
| ah = (uint32) (multiplicand.mantissa >> 32); /* split multiplicand */ | |
| al = (uint32) (multiplicand.mantissa & DMASK32); /* into high and low parts */ | |
| bh = (uint32) (multiplier.mantissa >> 32); /* split multiplier */ | |
| bl = (uint32) (multiplier.mantissa & DMASK32); /* into high and low parts */ | |
| if (firmware && (product->precision == fp_f)) { /* single-precision firmware? */ | |
| ch = (int16) (ah >> 16) & DMASK; /* split 32-bit multiplicand */ | |
| cl = (int16) (ah & 0xfffe); /* into high and low parts */ | |
| dh = (int16) (bh >> 16) & DMASK; /* split 32-bit multiplier */ | |
| dl = (int16) (bh & 0xfffe); /* into high and low parts */ | |
| hh = (t_uint64) (((int32) ch * dh) & ~1); /* form cross products */ | |
| hl = (t_uint64) (((t_int64) ch * (t_int64) (uint16) dl + | |
| (t_int64) dh * (t_int64) (uint16) cl) & | |
| 0xfffffffffffe0000); | |
| product->mantissa = (t_uint64) (((t_int64) hh << 32) + /* sum partials */ | |
| ((t_int64) hl << 16)); | |
| } | |
| else { | |
| hh = ((t_uint64) ah * bh); /* form four cross products */ | |
| hl = ((t_uint64) ah * bl); /* using 32 x 32 = */ | |
| lh = ((t_uint64) al * bh); /* 64-bit multiplies */ | |
| ll = ((t_uint64) al * bl); | |
| carry = ((ll >> 32) + (uint32) hl + (uint32) lh) >> 32; /* form carry */ | |
| product->mantissa = hh + (hl >> 32) + (lh >> 32) + carry; /* sum partials */ | |
| if (sign) /* negate if required */ | |
| complement (product); | |
| } | |
| } | |
| return; | |
| } | |
| /* Divide two unpacked numbers. | |
| As with multiply, the single-precision firmware (FDV) operates differently | |
| from the firmware extended-precision (.XDIV) and the hardware divisions of | |
| any precision. Firmware implementations utilize a "divide and correct" | |
| algorithm, wherein the quotient is estimated and then corrected by comparing | |
| the dividend to the product of the quotient and the divisor. The hardware | |
| uses a series of shifts and subtracts. This means that firmware FDV and | |
| hardware FDV once again return slightly different values. | |
| Under simulation, the classic divide-and-correct method is employed, using | |
| 64-bit / 32-bit = 32-bit divisions. This involves dividing the 64-bit | |
| dividend "a1a2a3a4" by the first 32-bit digit "b1b2" of the 64-bit divisor | |
| "b1b2b3b4". The resulting 32-bit quotient is ... | |
| The microcoded single-precision division avoids overflows by right-shifting | |
| some values, which leads to a loss of precision in the LSBs. We duplicate | |
| the firmware algorithm here to preserve the fidelity of the simulation. | |
| */ | |
| static void divide (FPU *quotient, FPU dividend, FPU divisor) | |
| { | |
| uint32 sign = 0; | |
| t_int64 bh, bl, r1, r0, p1, p0; | |
| t_uint64 q, q1, q0; | |
| t_bool firmware; | |
| int32 ah, div, cp; | |
| int16 dh, dl, pq1, pq2, cq; | |
| quotient->precision = dividend.precision; /* set precision */ | |
| if (divisor.mantissa == 0) { /* division by zero? */ | |
| if (dividend.mantissa < 0) | |
| quotient->mantissa = FP_MSIGN; /* return minus infinity */ | |
| else | |
| quotient->mantissa = ~FP_MSIGN; /* or plus infinity */ | |
| quotient->exponent = FP_MAXPEXP + 1; | |
| } | |
| else if (dividend.mantissa == 0) /* dividend zero? */ | |
| quotient->mantissa = quotient->exponent = 0; /* yes; result is zero */ | |
| else { | |
| firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */ | |
| if (!firmware || (quotient->precision != fp_f)) { /* hardware or FFP? */ | |
| if (dividend.mantissa < 0) { /* negative? */ | |
| complement (÷nd); /* complement operand */ | |
| sign = ~sign; /* track sign */ | |
| } | |
| if (divisor.mantissa < 0) { /* negative? */ | |
| complement (&divisor); /* complement operand */ | |
| sign = ~sign; /* track sign */ | |
| } | |
| } | |
| quotient->exponent = /* division subtracts exponents */ | |
| dividend.exponent - divisor.exponent; | |
| bh = divisor.mantissa >> 32; /* split divisor */ | |
| bl = divisor.mantissa & DMASK32; /* into high and low parts */ | |
| if (firmware && (quotient->precision == fp_f)) { /* single-precision firmware? */ | |
| quotient->exponent = quotient->exponent + 1; /* fix exponent */ | |
| ah = (int32) (dividend.mantissa >> 32); /* split dividend */ | |
| dh = (int16) (bh >> 16); /* split divisor again */ | |
| dl = (int16) bh; | |
| div = ah >> 2; /* ASR 2 to prevent overflow */ | |
| pq1 = div / dh; /* form first partial quotient */ | |
| div = ((div % dh) & ~1) << 15; /* ASR 1, move rem to upper */ | |
| pq2 = div / dh; /* form second partial quotient */ | |
| div = (uint16) dl << 13; /* move divisor LSB to upper, LSR 3 */ | |
| cq = div / dh; /* form correction quotient */ | |
| cp = -cq * pq1; /* and correction product */ | |
| cp = (((cp >> 14) & ~3) + (int32) pq2) << 1; /* add corr prod and 2nd partial quo */ | |
| quotient->mantissa = /* add 1st partial quo and align */ | |
| (t_uint64) (((int32) pq1 << 16) + cp) << 32; | |
| } | |
| else { /* hardware or FFP */ | |
| q1 = (t_uint64) (dividend.mantissa / bh); /* form 1st trial quotient */ | |
| r1 = dividend.mantissa % bh; /* and remainder */ | |
| p1 = (r1 << 24) - (bl >> 8) * q1; /* calculate correction */ | |
| while (p1 < 0) { /* correction needed? */ | |
| q1 = q1 - 1; /* trial quotient too large */ | |
| p1 = p1 + (divisor.mantissa >> 8); /* increase remainder */ | |
| } | |
| q0 = (t_uint64) ((p1 << 8) / bh); /* form 2nd trial quotient */ | |
| r0 = (p1 << 8) % bh; /* and remainder */ | |
| p0 = (r0 << 24) - (bl >> 8) * q0; /* calculate correction */ | |
| while (p0 < 0) { /* correction needed? */ | |
| q0 = q0 - 1; /* trial quotient too large */ | |
| p0 = p0 + (divisor.mantissa >> 8); /* increase remainder */ | |
| } | |
| q = (q1 << 32) + q0; /* sum quotient digits */ | |
| if (q1 & 0xffffffff00000000) { /* did we lose MSB? */ | |
| q = (q >> 1) | 0x8000000000000000; /* shift right and replace bit */ | |
| quotient->exponent = quotient->exponent + 1;/* bump exponent for shift */ | |
| } | |
| if (q & 0x8000000000000000) /* lose normalization? */ | |
| q = q >> 1; /* correct */ | |
| quotient->mantissa = (t_int64) q; | |
| } | |
| if (sign) | |
| complement (quotient); /* negate if required */ | |
| } | |
| return; | |
| } | |
| /* Fix an unpacked number. | |
| A floating-point value is converted to an integer. The desired precision of | |
| the result (single or double integer) must be set before calling. | |
| Values less than 0.5 (i.e., with negative exponents) underflow to zero. If | |
| the value exceeds the specified integer range, the maximum integer value is | |
| returned and overflow is set. Otherwise, the floating-point value is | |
| right-shifted to zero the exponent. The result is then rounded. | |
| */ | |
| static uint32 fix (FPU *result, FPU operand) | |
| { | |
| uint32 overflow; | |
| t_bool bits_lost; | |
| if (operand.exponent < 0) { /* value < 0.5? */ | |
| result->mantissa = 0; /* result rounds to zero */ | |
| overflow = 0; /* clear for underflow */ | |
| } | |
| else if (operand.exponent > /* value > integer size? */ | |
| (int32) op_bits[result->precision]) { | |
| result->mantissa = /* return max int value */ | |
| (t_uint64) int_p_max[result->precision] << | |
| op_start[result->precision]; | |
| overflow = 1; /* and set overflow */ | |
| } | |
| else { /* value in range */ | |
| bits_lost = asr (&operand, /* shift to zero exponent */ | |
| op_bits[result->precision] - operand.exponent); | |
| if (operand.mantissa < 0) { /* value negative? */ | |
| if (bits_lost) /* bits lost? */ | |
| operand.mantissa = operand.mantissa | 1; /* include one for rounding */ | |
| operand.mantissa = operand.mantissa + /* round result */ | |
| p_half_lsb[result->precision]; | |
| } | |
| result->mantissa = operand.mantissa & /* mask to precision */ | |
| op_mask[result->precision]; | |
| overflow = 0; | |
| } | |
| result->exponent = 0; /* tidy up for integer value */ | |
| return overflow; | |
| } | |
| /* Float an integer to an unpacked number. | |
| An integer is converted to a floating-point value. The desired precision of | |
| the result must be set before calling. | |
| Conversion is simply a matter of copying the integer value, setting an | |
| exponent that reflects the right-aligned position of the bits, and | |
| normalizing. | |
| */ | |
| static void ffloat (FPU *result, FPU operand) | |
| { | |
| result->mantissa = operand.mantissa; /* set value */ | |
| result->exponent = op_bits[operand.precision]; /* set exponent */ | |
| normalize (result); /* normalize */ | |
| return; | |
| } | |
| /* High-level floating-point routines. */ | |
| /* Determine operand precisions. | |
| The precisions of the operands and result are determined by decoding an | |
| operation opcode and returned to the caller. Pass NULL for both of the | |
| operands if only the result precision is wanted. Pass NULL for the result if | |
| only the operand precisions are wanted. | |
| Implementation note: | |
| 1. gcc-4.3.0 complains at -O3 that operand_l/r may not be initialized. | |
| Because of the mask, the switch statement covers all cases, but gcc | |
| doesn't realize this. The "default" case is redundant but eliminates the | |
| warning. | |
| */ | |
| void fp_prec (uint16 opcode, OPSIZE *operand_l, OPSIZE *operand_r, OPSIZE *result) | |
| { | |
| OPSIZE fp_size, int_size; | |
| fp_size = (OPSIZE) ((opcode & 0003) + 2); /* fp_f, fp_x, fp_t, fp_e */ | |
| int_size = (OPSIZE) ((opcode & 0004) >> 2); /* in_s, in_d */ | |
| if (operand_l && operand_r) { /* want operand precisions? */ | |
| switch (opcode & 0120) { /* mask out opcode bit 5 */ | |
| case 0000: /* add/mpy */ | |
| case 0020: /* sub/div */ | |
| *operand_l = fp_size; /* assume first op is fp */ | |
| if (opcode & 0004) /* operand internal? */ | |
| *operand_r = fp_a; /* second op is accum */ | |
| else | |
| *operand_r = fp_size; /* second op is fp */ | |
| break; | |
| case 0100: /* fix/accum as integer */ | |
| *operand_l = fp_size; /* first op is fp */ | |
| *operand_r = fp_a; /* second op is always null */ | |
| break; | |
| case 0120: /* flt/accum as float */ | |
| default: /* keeps compiler quiet for uninit warning */ | |
| *operand_l = int_size; /* first op is integer */ | |
| *operand_r = fp_a; /* second op is always null */ | |
| break; | |
| } | |
| if (opcode & 0010) /* operand internal? */ | |
| *operand_l = fp_a; /* first op is accum */ | |
| } | |
| if (result) /* want result precision? */ | |
| if ((opcode & 0120) == 0100) /* fix? */ | |
| *result = int_size; /* result is integer */ | |
| else /* all others */ | |
| *result = fp_size; /* result is fp */ | |
| return; | |
| } | |
| /* Floating Point Processor executor. | |
| The executor simulates the MPP interface between the CPU and the FPP. The | |
| operation to be performed is specified by the supplied opcode, which conforms | |
| to the FPP hardware interface, as follows: | |
| Bits Value Action | |
| ---- ----- ---------------------------------------------- | |
| 7 0 Exponent range is standard (+/-127) | |
| 1 Exponent range is expanded (+/-511) | |
| 6-4 000 Add | |
| 001 Subtract | |
| 010 Multiply | |
| 011 Divide | |
| 100 Fix | |
| 101 Float | |
| 110 (diagnostic) | |
| 111 (diagnostic) | |
| 3 0 Left operand is supplied | |
| 1 Left operand in accumulator | |
| 2 0 Right operand is supplied (ADD/SUB/MPY/DIV) | |
| Single integer operation (FIX/FLT) | |
| 1 Right operand in accumulator (ADD/SUB/MPY/DIV) | |
| Double integer operation (FIX/FLT) | |
| 1-0 00 2-word operation | |
| 01 3-word operation | |
| 10 4-word operation | |
| 11 5-word operation | |
| If the opcode specifies that the left (or right) operand is in the | |
| accumulator, then the value supplied for that parameter is not used. All | |
| results are automatically left in the accumulator. If the result is not | |
| needed externally, then NULL may be passed for the result parameter. | |
| To support accumulator set/get operations under simulation, the opcode is | |
| expanded to include a special mode, indicated by bit 15 = 1. In this mode, | |
| if the result parameter is NULL, then the accumulator is set from the value | |
| passed as operand_l. If the result parameter is not null, then the | |
| accumulator value is returned as the result, and operand_l is ignored. The | |
| precision of the operation is performed as specified by the OPSIZE value | |
| passed in bits 2-0 of the opcode. | |
| The function returns 1 if the operation overflows and 0 if not. | |
| */ | |
| uint32 fp_exec (uint16 opcode, OP *result, OP operand_l, OP operand_r) | |
| { | |
| static FPU accumulator; | |
| FPU uoperand_l, uoperand_r; | |
| OPSIZE op_l_prec, op_r_prec, rslt_prec; | |
| uint32 overflow; | |
| if (opcode & SIGN) { /* accumulator mode? */ | |
| rslt_prec = (OPSIZE) (opcode & 0017); /* get operation precision */ | |
| if (result) { /* get accumulator? */ | |
| op_l_prec = accumulator.precision; /* save accum prec temp */ | |
| accumulator.precision = rslt_prec; /* set desired precision */ | |
| *result = pack (accumulator); /* pack accumulator */ | |
| accumulator.precision = op_l_prec; /* restore correct prec */ | |
| } | |
| else /* set accumulator */ | |
| accumulator = unpack (operand_l, rslt_prec); /* unpack from operand */ | |
| return 0; /* no overflow from accum ops */ | |
| } | |
| fp_prec (opcode, &op_l_prec, &op_r_prec, &rslt_prec); /* calc precs from opcode */ | |
| if (op_l_prec == fp_a) /* left operand in accum? */ | |
| uoperand_l = accumulator; /* copy it */ | |
| else /* operand supplied */ | |
| uoperand_l = unpack (operand_l, op_l_prec); /* unpack from parameter */ | |
| if (op_r_prec == fp_a) /* right operand in accum? */ | |
| uoperand_r = accumulator; /* copy it */ | |
| else /* operand supplied */ | |
| uoperand_r = unpack (operand_r, op_r_prec); /* unpack from parameter */ | |
| switch (opcode & 0160) { /* dispatch operation */ | |
| case 0000: /* add */ | |
| add (&accumulator, uoperand_l, uoperand_r); | |
| break; | |
| case 0020: /* subtract */ | |
| complement (&uoperand_r); | |
| add (&accumulator, uoperand_l, uoperand_r); | |
| break; | |
| case 0040: /* multiply */ | |
| multiply (&accumulator, uoperand_l, uoperand_r); | |
| break; | |
| case 0060: /* divide */ | |
| divide (&accumulator, uoperand_l, uoperand_r); | |
| break; | |
| case 0100: /* fix */ | |
| accumulator.precision = rslt_prec; | |
| overflow = fix (&accumulator, uoperand_l); | |
| if (result) /* result wanted? */ | |
| *result = pack_int (accumulator.mantissa, /* pack integer */ | |
| rslt_prec); | |
| return overflow; | |
| case 0120: /* float */ | |
| accumulator.precision = rslt_prec; | |
| ffloat (&accumulator, uoperand_l); | |
| if (result) /* result wanted? */ | |
| *result = pack (accumulator); /* pack FP (FLT does not round) */ | |
| return 0; | |
| case 0140: /* (diagnostic) */ | |
| case 0160: /* (diagnostic) */ | |
| return 0; | |
| } | |
| if (UNIT_CPU_MODEL != UNIT_1000_F) /* firmware implementation? */ | |
| accumulator.mantissa = accumulator.mantissa & /* mask to precision */ | |
| op_mask[accumulator.precision]; | |
| normalize (&accumulator); /* normalize */ | |
| overflow = roundovf (&accumulator, opcode & 0200); /* round and check for overflow */ | |
| if (result) /* result wanted? */ | |
| *result = pack (accumulator); /* pack result */ | |
| return overflow; | |
| } | |
| /* Set or get accumulator at desired precision. | |
| This function provides access to the FPP accumulator. In hardware, the | |
| accumulator may be read at a given precision by sending the FPP an opcode | |
| encoded with the desired precision and then reading words from the FPP | |
| /without/ initiating the operation, i.e., without starting the processor. | |
| Under simulation, pass this function a NULL operand and the desired | |
| precision to read the accumulator. Pass a pointer to an operand and the | |
| desired precision to set the accumulator; the return value in this case is | |
| not defined. | |
| */ | |
| OP fp_accum (const OP *operand, OPSIZE precision) | |
| { | |
| OP result = NOP; | |
| uint16 opcode = (uint16) precision | SIGN; /* add special mode bit */ | |
| if (operand) | |
| fp_exec (opcode, NULL, *operand, NOP); /* set accum */ | |
| else | |
| fp_exec (opcode, &result, NOP, NOP); /* get accum */ | |
| return result; | |
| } | |
| /* Pack an unpacked floating-point number. | |
| An unpacked mantissa is passed as a "packed" number with an unused exponent. | |
| The mantissa and separately-passed exponent are packed into the in-memory | |
| floating-point format. Note that all bits are significant in the mantissa | |
| (no masking is done). | |
| */ | |
| uint32 fp_pack (OP *result, OP mantissa, int32 exponent, OPSIZE precision) | |
| { | |
| FPU unpacked; | |
| unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */ | |
| unpacked.exponent = exponent; /* set exponent */ | |
| unpacked.precision = precision; /* set precision */ | |
| *result = pack (unpacked); /* pack them */ | |
| return 0; | |
| } | |
| /* Normalize, round, and pack an unpacked floating-point number. | |
| An unpacked mantissa is passed as a "packed" number with an unused exponent. | |
| The mantissa and separately-passed exponent are normalized, rounded, and | |
| packed into the in-memory floating-point format. Note that all bits are | |
| significant in the mantissa (no masking is done). | |
| */ | |
| uint32 fp_nrpack (OP *result, OP mantissa, int32 exponent, OPSIZE precision) | |
| { | |
| FPU unpacked; | |
| unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */ | |
| unpacked.exponent = exponent; /* set exponent */ | |
| unpacked.precision = precision; /* set precision */ | |
| return nrpack (result, unpacked, FALSE); /* norm/rnd/pack them */ | |
| } | |
| /* Unpack a packed floating-point number. | |
| A floating-point number, packed into the in-memory format, is unpacked into | |
| separate mantissa and exponent values. The unpacked mantissa is returned in | |
| a "packed" structure with an exponent of zero. Mantissa or exponent may be | |
| null if that part isn't wanted. | |
| */ | |
| uint32 fp_unpack (OP *mantissa, int32 *exponent, OP packed, OPSIZE precision) | |
| { | |
| FPU unpacked; | |
| unpacked = unpack (packed, precision); /* unpack mantissa and exponent */ | |
| if (exponent) /* exponent wanted? */ | |
| *exponent = unpacked.exponent; /* return exponent */ | |
| if (mantissa) /* mantissa wanted? */ | |
| *mantissa = pack_int (unpacked.mantissa, fp_t); /* return full-size mantissa */ | |
| return 0; | |
| } | |
| /* Complement an unpacked mantissa. | |
| An unpacked mantissa is passed as a "packed" number with a zero exponent. | |
| The exponent increment, i.e., either zero or one, depending on whether a | |
| renormalization was required, is returned. Note that all bits are | |
| significant in the mantissa. | |
| */ | |
| uint32 fp_ucom (OP *mantissa, OPSIZE precision) | |
| { | |
| FPU unpacked; | |
| unpacked.mantissa = unpack_int (*mantissa, precision); /* unpack mantissa */ | |
| unpacked.exponent = 0; /* clear undefined exponent */ | |
| unpacked.precision = precision; /* set precision */ | |
| complement (&unpacked); /* negate it */ | |
| *mantissa = pack_int (unpacked.mantissa, precision); /* replace mantissa */ | |
| return (uint32) unpacked.exponent; /* return exponent increment */ | |
| } | |
| /* Complement a floating-point number. */ | |
| uint32 fp_pcom (OP *packed, OPSIZE precision) | |
| { | |
| FPU unpacked; | |
| unpacked = unpack (*packed, precision); /* unpack the number */ | |
| complement (&unpacked); /* negate it */ | |
| return nrpack (packed, unpacked, FALSE); /* and norm/rnd/pack */ | |
| } | |
| /* Truncate a floating-point number. */ | |
| uint32 fp_trun (OP *result, OP source, OPSIZE precision) | |
| { | |
| t_bool bits_lost; | |
| FPU unpacked; | |
| FPU one = { FP_ONEHALF, 1, 0 }; /* 0.5 * 2 ** 1 = 1.0 */ | |
| OP zero = { { 0, 0, 0, 0, 0 } }; /* 0.0 */ | |
| t_uint64 mask = mant_mask[precision] & ~FP_MSIGN; | |
| unpacked = unpack (source, precision); | |
| if (unpacked.exponent < 0) /* number < 0.5? */ | |
| *result = zero; /* return 0 */ | |
| else if (unpacked.exponent >= (int32) op_bits[precision]) /* no fractional bits? */ | |
| *result = source; /* already integer */ | |
| else { | |
| mask = (mask >> unpacked.exponent) & mask; /* mask fractional bits */ | |
| bits_lost = ((unpacked.mantissa & mask) != 0); /* flag if bits lost */ | |
| unpacked.mantissa = unpacked.mantissa & ~mask; /* mask off fraction */ | |
| if ((unpacked.mantissa < 0) && bits_lost) /* negative? */ | |
| add (&unpacked, unpacked, one); /* truncate toward zero */ | |
| nrpack (result, unpacked, FALSE); /* (overflow cannot occur) */ | |
| } | |
| return 0; /* clear overflow on return */ | |
| } | |
| /* Convert a floating-point number from one precision to another. */ | |
| uint32 fp_cvt (OP *result, OPSIZE source_precision, OPSIZE dest_precision) | |
| { | |
| FPU unpacked; | |
| unpacked = unpack (*result, source_precision); | |
| unpacked.precision = dest_precision; | |
| return nrpack (result, unpacked, FALSE); /* norm/rnd/pack */ | |
| } | |
| #endif /* end of int64 support */ |