{File name: LispFPTBase.mc
Last edited by cal 21-Oct-83 11:57:59
Last edited by Sturgis 15-Sep-83 16:40:30
Descrition: Floating point code for Lisp {modified from Cedar fpt code}
Created by H. Sturgis
}
{***********************************************************************************************
De normalization code
enter with T, ~Q holding the 32 bit quantity, L0 prepared for return, TT holding shift count
(note that Q is negated)
contents of TT is destroyed
De normalized result is in T, Q
Q[15] is a sticky bit
***********************************************************************************************}
DeNormC1: c1;
DeNormC2: c2;
DeNormC3:
TT ← -TT - 1, c3;
DeNormA:
TT ← TT + 1, NZeroBr, c1;
Ybus ← Q and 1, ZeroBr, BRANCH[DeNormA1, $], c2;
T ← DRShift1 T, SE ← 0, BRANCH[DeNormA, DeNormB], c3;
DeNormA1: {stcky bit is 0}
Q ← ~Q, L0Disp, CANCELBR[$], c3;
RET[DeNormRets], c1;
{sticky bit will be 1}
DeNormB:
TT ← TT + 1, NZeroBr, c1;
BRANCH[DeNormB1, $], c2;
T ← DRShift1 T, SE ← 0, GOTO[DeNormB], c3;
DeNormB1: {stcky bit is 1}
Q ← ~Q, L0Disp, c3;
Q ← Q or 1, RET[DeNormRets], c1;
{****************************************************************************
multiply code
This procedure calls unpack2 to break the arguments up into components
Sign1 Exp1 HighHalf1 LowHalf1
Sign2 Exp2 HighHalf2 LowHalf2
where the high halves have 16 bits and the lowhalves have 8 bits
We compute four products to be added together to form a 48 bit product
LowHalf1*LowHalf2 | B | A |
HighHalf1*LowHalf2 | E | C |
LowHalf1*HighHalf2 | F | D |
HighHalf1*HighHalf2 | H | G |
48 bit product | Z | Y | X | W |
}
{the arguments are now unpacked, and a ZeroBr is pending if either are 0}
{first we add the exponents and xor the signs}
{note: in the case of 1*1, the result fraction will have first sign bit at x1, thus the result exponent must be 128 so that subsequent normalization will reduce exponet to 127, the correct value for 1. This is obtained by adding exponents and subtracting 126.}
T ← uExp1, c2, at[L2.FptMul, 10, FptPrepRet];
TT ← uExp2, c3;
T ← T + TT, c1;
T ← T - 126'd, c2;
uExp1 ← T, c3;
TT ← uSign1, c1;
Q ← uSign2, c2;
uSign1 ← TT xor Q, c3;
{now compute uLowHalf1*uLowHalf2}
T ← uLowHalf1, c1;
T ← T LRot8, c2;
uLowHalf1 ← Q ← T, c3;
TOS ← uLowHalf2, c1;
TOS ← TOS LRot8, c2;
uLowHalf2 ← Rx ← TOS, c3;
T ← 0, L0 ← L0.mult2, c1;
TT ← 7, CALL[FptMultLoopC3], c2;
TT ← 0FF or Q, c1, at[L0.mult2, 10, FptMultLoopRets];
TT ← ~TT, c2;
uStickyBit ← TT, {A(=W)} c3;
{at this point, uStickyBit is non zero iff A(=W) is non zero, and T contains B}
{now compute T+uHighHalf1*uLowHalf2, which is <E+carry,B+C+carry>}
TT ← T, c1;
T ← uLowHalf2, {already rotated} c2;
Q ← T, c3;
T ← uHighHalf1, L0 ← L0.mult3, c1;
Rx ← T, c2;
T ← TT, c3;
TT ← 7, CALL[FptMultLoopC2], c1;
uLowHalf2 ← T, {E+carry} c1, at[L0.mult3, 10, FptMultLoopRets];
Rx ← ~Q, c2;
Rx ← Rx LRot8, c3;
TT ← Rx and 377'b, {B+C+Carry} c1;
{now compute TT+uLowHalf1*uHighHalf2, which is <F+carry, B+C+D+carry>}
T ← uLowHalf1, {already rotated} c2;
Q ← T, c3;
T ← uHighHalf2, L0 ← L0.mult4, c1;
Rx ← T, c2;
T ← TT, c3;
TT ← 7, CALL[FptMultLoopC2], c1;
uLowHalf1 ← T, {F+carry} c1, at[L0.mult4, 10, FptMultLoopRets];
Rx ← ~Q, c2;
Rx ← Rx LRot8, c3;
Q ← Rx and 377'b, {B+C+D+Carry(=X)} c1;
Q ← Q or uStickyBit, {T#0 iff X#0 or W#0} c2;
uStickyBit ← Q, c3;
{now compute uLowHalf1(=F+carry)+uLowHalf2(=E+carry)+uHighHalf1*uHighHalf2 = <H+carry, E+F+G+carry> = <Z,Y>}
Q ← uHighHalf1, c1;
Rx ← uHighHalf2, c2;
T ← uLowHalf1, {F+carry} c3;
TT ← uLowHalf2, {E+carry} c1;
T ← T + TT, CarryBr, L0 ← L0.mult5, c2;
TT ← 15'd, BRANCH[mull0, mull1], c3;
mull1: uLowHalf1 ← ~TOS xor TOS, {uLowHalf1←-1, save the carry}
CALL[FptMultLoopC2], c1;
mull0: uLowHalf1 ← 0, {save the carry}CALL[FptMultLoopC2], c1;
TT ← uLowHalf1, c1, at[L0.mult5, 10, FptMultLoopRets];
T ← T-TT, {include the carry} c2;
uHighHalf1 ← T, {Z} c3;
uLowHalf1 ← ~Q, {Y} GOTO[RePackC2], c1;
{****************************************************************************
fpt multiply loop
at entry:
TT holds bit count-1 (n-1) of multiplier (n must be at least 2)
i.e. for an 8 bit multiplier, enter with 7 in TT
Q holds multiplier, right adjusted
T holds initial product
Rx holds multiplicand
can be called from any cycle, uses an integral number of clicks
at end:
high 16 bits of product is in T
low n bits of product is complimented in top n bits of Q
(bottom 16-n bits of Q holds top 16-n bits of original multiplier)
returns via L0
time: n clicks
****************************************************************************}
FptMultLoopC2: c2;
FptMultLoopC3: c3;
FptMultLoopC1: [] ← Q and 1, NZeroBr, c1;
TT ← TT-1, ZeroBr,
BRANCH[FptMultLoop0, FptMultLoop1], c2;
FptMultLoop0: T ← DARShift1(T+0),
BRANCH[FptMultLoopC1, FptMultLoopEnd], c3;
FptMultLoop1: T ← DARShift1(T+Rx),
BRANCH[FptMultLoopC1, FptMultLoopEnd], c3;
FptMultLoopEnd: [] ← Q and 1, NZeroBr, c1;
L0Disp, BRANCH[FptMultLoopEnd0, FptMultLoopEnd1], c2;
FptMultLoopEnd0: T ← DARShift1(T+0), RET[FptMultLoopRets], c3;
FptMultLoopEnd1: T ← DARShift1(T+Rx), RET[FptMultLoopRets], c3;
{issues remaining:
divide by zero trap (now forces infinity)
one might be able to unrole the division loop
at the moment, the remainder is corrected after first sixteen subtractions, need not do so if one were more careful
adjust starting cycle of division loop?
move L0 disp earlier in loop exit sequence?
}
{****************************************************************************
division code
This procedure calls unpack2 to break the arguments up into components
Sign1 Exp1 HighHalf1 LowHalf1
Sign2 Exp2 HighHalf2 LowHalf2
where the high halves have 16 bits and the lowhalves have 8 bits
****************************************************************************}
{the arguments are now unpacked}
T ← uExp1, c2, at[L2.FptDiv, 10, FptPrepRet];
TT ← uExp2, c3;
T ← T - TT, c1;
T ← T + 127'd, {re bias exponent} c2;
uExp1 ← T, c3;
TT ← uSign1, c1;
Q ← uSign2, c2;
uSign1 ← TT xor Q, c3;
uDivCount ← divCount, {save a non temporary R reg} c1;
uDivResult ← divResult, {save a non temporary R reg} c2;
{now load the operands, right shifted by 1 bit to allow room to shift left during the divide loop, also test for zero divisor}
divisorHigh ← uHighHalf2, ZeroBr, c3;
Q ← uLowHalf2, ZeroBr, BRANCH[FptDivB, FptDivA], c1;
FptDivA: Q ← ~Q, BRANCH[$, FptDivZero], c2;
divisorHigh ← DRShift1 divisorHigh, SE ← 0,
GOTO[FptDivC], c3;
FptDivB: Q ← ~Q, CANCELBR[$], c2;
divisorHigh ← DRShift1 divisorHigh, SE ← 0,
GOTO[FptDivC], c3;
FptDivC: divisorLow ← ~Q, c1;
T ← uHighHalf1, c2;
Q ← ~ uLowHalf1, c3;
T ← DRShift1 T, SE ← 0, c1;
Q ← ~Q, L0 ← L0.div2, c2;
{as per Ed Tafts Dorado code, we will do a total of 26 iterations, 24 for quotient bits, + 2 guard bits. They are done by first doing 16 iterations, then 10 iterations}
divCount ← 16'd, CALL[FptDivLoopC1], c3;
uHighHalf1 ← divResult, c2, at[L0.div2, 10, FptDivLoopRets];
T ← DLShift1 T, SE ← 1, {puts a 0 into Q.15} L0 ← L0.div3, {remainder did not get its last left shift}
c3;
divCount ← 10'd, CALL[FptDivLoopC2], c1;
divResult ← divResult LRot4, c2, at[L0.div3, 10, FptDivLoopRets];
[] ← T or Q, NZeroBr, c3;
divResult ← LShift1 (divResult), SE ← 0, BRANCH[FptDivD, FptDivE], c1;
{now we install the correct sticky bit}
FptDivD: divResult ← LShift1 (divResult), SE ← 0, GOTO[FptDivF], c2;
FptDivE: divResult ← LShift1 (divResult), SE ← 1, GOTO[FptDivF], c2;
FptDivF: uLowHalf1 ← divResult , GOTO[FptDivExit], c3;
{divide by zero}
FptDivZero: divCount ← uDivCount, {restore a non temporary R reg} c3;
divResult ← uDivResult, {restore a non temporary R reg} c1;
GOTO[FPTrapsC3], c2;
FptDivExit: divCount ← uDivCount, {restore a non temporary R reg} c1;
divResult ← uDivResult, {restore a non temporary R reg} c2;
uStickyBit ← 0, GOTO[RePackC1], c3;
{***********************************************************************************************
divide loop
dividend in <T,Q>
divisor in <divisorHigh, divisorLow>
bit count in divCount, performs bitCOunt subtractions
return point in L0
will accumulate bits in divResult
<T,Q> will hold the remainder
***********************************************************************************************}
FptDivLoopC1: c1;
FptDivLoopC2: c2;
FptDivLoopC3:
divResult ← 0, GOTO[FptDivSubA], c3;
{in this part of the loop the contents of <T,Q> are positive}
FptDivSub: {delay} c2;
divResult ← LShift1 (divResult), SE ← 1, c3;
FptDivSubA: Q ← Q-divisorLow, CarryBr, c1;
divCount ← divCount-1, ZeroBr,
BRANCH[FptDivSub1, FptDivSub0], c2;
FptDivSub0: T ← T-divisorHigh, CarryBr, BRANCH[$, FptDivSubExit1], c3;
T ← DLShift1 T, SE ← 1, {puts a 0 into Q.15}
BRANCH[FptDivAdd, FptDivSub], c1;
FptDivSub1: T ← T-divisorHigh-1, CarryBr, BRANCH[$ , FptDivSubExit2], c3;
T ← DLShift1 T, SE ← 1, {puts a 0 into Q.15}
BRANCH[FptDivAdd, FptDivSub], c1;
FptDivSubExit1: BRANCH[FptDivAddExit, FptDivSubExit], c1;
FptDivSubExit2: BRANCH[FptDivAddExit, FptDivSubExit], c1;
FptDivSubExit: divResult ← LShift1 (divResult), SE ← 1, GOTO[FptDivLoopExit], c2;
{in this part of the loop the contents of <T,Q> are negative (in 2's complement)}
FptDivAdd: {delay} c2;
divResult ← LShift1 (divResult), SE ← 0, c3;
Q ← Q+divisorLow, CarryBr, c1;
divCount ← divCount-1, ZeroBr,
BRANCH[FptDivAdd0, FptDivAdd1], c2;
FptDivAdd0: T ← T+divisorHigh, CarryBr, BRANCH[$, FptDivAddExit1], c3;
T ← DLShift1 T, SE ← 1, {puts a 0 into Q.15}
BRANCH[FptDivAdd, FptDivSub], c1;
FptDivAdd1: T ← T+divisorHigh+1, CarryBr, BRANCH[$, FptDivAddExit2], c3;
T ← DLShift1 T, SE ← 1, {puts a 0 into Q.15}
BRANCH[FptDivAdd, FptDivSub], c1;
FptDivAddExit1: BRANCH[FptDivAddExit, FptDivSubExit], c1;
FptDivAddExit2: BRANCH[FptDivAddExit, FptDivSubExit], c1;
{we have subtracted one too many times, so add back in to get correct remainder}
FptDivAddExit: divResult ← LShift1 (divResult), SE ← 0, c2;
Q ← Q + divisorLow, CarryBr, c3;
{delay} BRANCH[FptDivAX0, FptDivAX1], c1;
FptDivAX1: T ← T + divisorHigh + 1, GOTO[FptDivLoopExit], c2;
FptDivAX0: T ← T + divisorHigh, GOTO[FptDivLoopExit], c2;
FptDivLoopExit: L0Disp, c3;
RET[FptDivLoopRets], c1;
{issues remaining:
efficiency: The choice of which where arguments are at unNormed is bad, by some juggling of arg1 and arg2 should be able to reduce some of the costs of the three de-norm cases.
}
{****************************************************************************
sub code
****************************************************************************}
{the arguments are now unpacked}
T ← uSign2, c2, at[L2.FptSub, 10, FptPrepRet];
T ← T xor 1, {change sign} c3;
uSign2 ← T,GOTO[add1], c1;
{****************************************************************************
add code
****************************************************************************}
{the arguments are now unpacked, subtract enters here}
add1: T ← uExp1, c2, at[L2.FptAdd, 10, FptPrepRet];
TT ← uExp2, c3;
TT ← T - TT, NegBr, c1;
Ybus ← TT, ZeroBr, BRANCH[unNorm2, unNorm1], c2;
{Exp2 > Exp1, so we have to shift fractional part of arg1 to the right, TT has negative of shift count}
unNorm1: Q ← uLowHalf1, CANCELBR[$], c3;
Rx ← uSign2, c1;
T ← uSign1, c2;
uSign2 ← T, c3;
uSign1 ← Rx, c1;
Rx ← uExp2, c2;
uExp1 ← Rx, c3;
T ← uHighHalf1, c1;
TT ← -TT, GOTO[unNorm], c2;
{Exp1 >= Exp2, so we may have to shift fract part of arg2, TT has shift count}
unNorm2: Q ← uHighHalf1, BRANCH[$, unNormNeither], c3;
T ← uHighHalf2, c1;
uHighHalf2 ← Q, c2;
Rx ← uLowHalf1, c3;
Q ← uLowHalf2, c1;
uLowHalf2 ← Rx, GOTO[unNorm], c2;
unNormNeither:
T ← uSign1, c1;
Q ← uSign2, c2;
uSign1 ← Q, c3;
uSign2 ← T, c1;
T ← uHighHalf1, c2;
Q ← uLowHalf1, GOTO[unNormedC1], c3;
{The fractional part of the argument with smaller exponent is in T,Q; the amount of shift is in TT, uSign1 contains sign of argument with larger exponent (the nominal sign), uExp1 contains nominal exponent, uSign2 contains other sign}
{The fractional part of the argument with the higher exponent is in <uHighHalf2, uLowhalf2>}
unNorm: Rx ← 20'b, c3;
TT ← TT - Rx, NegBr, c1;
Ybus ← TT - Rx , NegBr, BRANCH[$, unNormLt20], c2;
Ybus ← T or Q, NZeroBr, BRANCH[$, unNormLt40], c3;
{total shift >= 40'b bits}
T ← 0, BRANCH[unNormZ, unNormNZ], c1;
unNormZ: Q ← 0, GOTO[unNormedC3], c2;
unNormNZ: Q ← 1, GOTO[unNormedC3], c2;
{40'b > total indicated shift >= 20'b, TT holds "indicated shift - 20'b"}
unNormLt40:
Ybus ← Q, NZeroBr, CANCELBR[$], c1;
BRANCH[unNormLt40A, unNormLt40B], c2;
unNormLt40A:
Q ← T , GOTO[unNormLt40C], c3;
unNormLt40B:
Q ← T or 1, GOTO[unNormLt40C], c3;
unNormLt40C:
T ← 0, L0 ← L0.add1, c1;
Q ← ~Q, CALL[DeNormC3], c2;
{20'b > total indicated shift , TT holds "indicated shift -20'b", Rx contains 20'b}
unNormLt20:
TT ← TT+Rx, CANCELBR[$], c3;
L0 ← L0.add1, c1;
Q ← ~Q, CALL[DeNormC3], c2;
{T,Q holds the shifted version of the argument with the small exponent, <uHighHalf2, uLowHalf2> contains the other argument. uSign1 contains sign of argument in <uHighHalf2, uLowHalf2> (the nominal sign), uExp1 contains nominal exponent, uSign2 contains other sign)}
unNormedC3: {delay} c3;
unNormedC1: {delay} c1;
unNormed: Rx ← uSign1, c2, at[L0.add1, 10, DeNormRets];
TT ← uSign2, c3;
[] ← Rx xor TT, NZeroBr, c1;
TT ← uLowHalf2, BRANCH[addUnNormed, subUnNormed], c2;
{equal signs, so add}
addUnNormed:
Q ← TT + Q, CarryBr, c3;
TT ← uHighHalf2, BRANCH[addUnNormedA, addUnNormedB], c1;
addUnNormedA:
T ← TT+T, CarryBr, GOTO[addUnNormedC], c2;
addUnNormedB:
T ← TT+T+1, CarryBr, GOTO[addUnNormedC], c2;
addUnNormedC:
Ybus ← Q and 1, NZeroBr, BRANCH[$, addUnNormedD], c3;
uLowHalf1 ← Q, ZeroBr, CANCELBR[unNormedB], c1;
{addition overflowed, so have to shift right and adjust exponents}
addUnNormedD:
Q ← ~Q, BRANCH[addUnNormedE, addUnNormedF], c1;
addUnNormedE: {Q ← Q or 0} GOTO[addUnNormedG], c2;
addUnNormedF: Q ← Q and ~2, {Q ← Q or 22} GOTO[addUnNormedG], c2;
addUnNormedG:
T ← DRShift1 T, SE ← 1, c3;
Q ← ~Q, c1;
Rx ← uExp1, c2;
Rx ← Rx+1, c3;
uExp1 ← Rx, c1;
{delay} c2;
{delay} GOTO[unNormedA], c3;
{non equal signs, so subtract shifted fraction}
{carryBr seems to take on a subtraction if there is no borrow}
subUnNormed:
Q ← TT - Q, CarryBr, c3;
TT ← uHighHalf2, BRANCH[subUnNormedB, subUnNormedA], c1;
subUnNormedA:
T ← TT - T, CarryBr, GOTO[subUnNormedC], c2;
subUnNormedB:
T ← TT - T - 1, CarryBr, GOTO[subUnNormedC], c2;
subUnNormedC:
{delay} BRANCH[subUnNormedD, $], c3;
uLowHalf1 ← Q, ZeroBr, GOTO[unNormedB], c1;
{last subtraction produced overflow, so use opposite sign (from uSign2), and subtract tentative result from 0 to produce a sign magnitude result}
subUnNormedD:
Q ← 0 - Q, CarryBr, c1;
TT ← uSign2, BRANCH[subUnNormedD1, subUnNormedD0], c2;
subUnNormedD1: T ← 0 - T - 1, GOTO[subUnNormedE], c3;
subUnNormedD0: T ← 0 - T, GOTO[subUnNormedE], c3;
subUnNormedE:
uSign1 ← TT, c1;
{delay} c2;
{delay} c3;
unNormedA:
uLowHalf1 ← Q, GOTO[$], c1;
unNormedB:
uHighHalf1 ← T, NZeroBr, BRANCH[$, unNormedC], c2;
uStickyBit ← 0, CANCELBR[RePackC1], c3;
unNormedC:
uStickyBit ← 0, BRANCH[$, RePackC1], c3;
uSign1 ← 0, {force positive on zero}, GOTO[RePackC2], c1;
{issues remaining:
most negative long integer
most negative integer
}
{****************************************************************************
compare code
****************************************************************************}
{the arguments are now unpacked}
Rx ← uSign2, c2, at[L2.FptCmp, 10, FptPrepRet];
TT ← uSign1, c3;
[] ← Rx xor TT, NZeroBr, c1;
TT ← uExp1, BRANCH[$, cmpNeSigns], c2;
T ← uExp2, c3;
T ← T-TT, CarryBr, c1;
[] ← T, NZeroBr, BRANCH[CmpAbs1High3, $], c2;
TT ← uHighHalf1, BRANCH[$, CmpAbs2High1], c3;
T ← uHighHalf2, c1;
T ← T - TT, CarryBr, c2;
[] ← T, NZeroBr, BRANCH[CmpAbs1High1, $], c3;
TT ← uLowHalf1, BRANCH[$, CmpAbs2High2], c1;
T ← uLowHalf2, c2;
T ← T - TT, CarryBr, c3;
[] ← T, NZeroBr, BRANCH[CmpAbs1High2, $], c1;
TOS ← 0, BRANCH[cmpExit3, CmpAbs2High3], c2;
{signs are unequal, Rx contains uSign2}
cmpNeSigns:
T ← uExp2, c3;
T ← T or TT, c1;
TT ← uHighHalf2, c2;
T ← T or TT, c3;
TT ← uHighHalf1, c1;
T ← T or TT, c2;
TT ← uLowHalf2, c3;
T ← T or TT, c1;
TT ← uLowHalf1, c2;
T ← T or TT, ZeroBr, c3;
BRANCH[$, CmpZeros], c1;
GOTO[CmpAbs2High3], c2;
{un equal signs, but arguments are both zero}
CmpZeros: TOS ← 0, GOTO[cmpExit3], c2;
CmpAbs1High1: {delay} CANCELBR[$], c1;
GOTO[CmpAbs1High3], c2;
CmpAbs1High2: {delay} CANCELBR[$], c2;
[] ← Rx, NZeroBr, GOTO[CmpNEa], c3;
CmpAbs1High3: [] ← Rx, NZeroBr, CANCELBR[CmpNEb], c3;
CmpAbs2High1: {delay} c1;
CmpAbs2High2: {delay} c2;
{next inst also covers unequal sign case, Rx holding usign2}
CmpAbs2High3: [] ← Rx, ZeroBr, GOTO[CmpNEa], c3;
CmpNEa: BRANCH[Cmp1High, Cmp2High], c1;
CmpNEb: BRANCH[Cmp1High, Cmp2High], c1;
Cmp1High: TOS ← KTval, GOTO[cmpExit3], c2;
Cmp2High: TOS ← 0, GOTO[cmpExit3], c2;
{ E N D }