FloatOpsImpl.mesa
Copyright Ó 1988, 1991 by Xerox Corporation. All rights reserved.
Russ Atkinson (RRA) December 27, 1990 10:37 pm PST
Bryan Lyles October 2, 1988 4:06:46 pm PDT
Michael Plass, August 27, 1991 4:03 pm PDT
Willie-s, August 6, 1991 12:51 pm PDT
DIRECTORY
Basics,
Basics32,
FloatOps,
Real,
RealFns USING [Power];
FloatOpsImpl: <<CEDAR>> PROGRAM
IMPORTS Basics32, Real, RealFns
EXPORTS FloatOps
= BEGIN
NOTE: This module is really all safe, but to coerce mimosa into punting bounds and sign checks, it is coded as if it were TRUSTED. - mfp (mebbe rra knows a better way)
checking: BOOL = FALSE;
princOps: BOOL = BITS[WORD] = 16;
Types & constants
signStart: NAT = IF princOps THEN 16 ELSE 0;
signEnd: NAT = signStart;
expStart: NAT = signEnd+1;
expEnd: NAT = expStart+BITS[ExpRange]-1;
m1Start: NAT = expEnd+1;
m1End: NAT = m1Start+6;
m2Start: NAT = IF princOps THEN 0 ELSE 16;
m2End: NAT = m2Start+15;
Float: TYPE = FloatOps.Float;
PosZero: Float = Float[sign: FALSE, exp: 0, m1: 0, m2: 0];
NegZero: Float = Float[sign: TRUE, exp: 0, m1: 0, m2: 0];
ExpRange: TYPE = INTEGER[0..377B];
nanExpon: ExpRange = ExpRange.LAST;
maxExpon: ExpRange = nanExpon.PRED;
hiddenBit: Float = [m2: 0, sign: FALSE, exp: 1, m1: 0];
hiddenBitAsCard: CARD32 = LOOPHOLE[hiddenBit, CARD32];
signBit: Float = [m2: 0, sign: TRUE, exp: 0, m1: 0];
signBitAsCard: CARD32 = LOOPHOLE[signBit, CARD32];
maxExponDelta: ExpRange = 24;
expBias: NAT = 127;
This is the exponent for 1.0
All powers of two with normalized exponents have m1 = 0 and m2 = 0
FloatAnd: PROC [x: Float, y: Float] RETURNS [Float] = INLINE {
RETURN [LOOPHOLE[Basics32.BITAND[LOOPHOLE[x, CARD32], LOOPHOLE[y, CARD32]]]];
};
QuickAbs: PROC [x: Float] RETURNS [Float] = INLINE {
RETURN [LOOPHOLE[(LOOPHOLE[x, CARD32]*2)/2]];
};
QuickGt: PROC [x, y: Float] RETURNS [BOOL] = INLINE {
Either x or y must be non-negative (and not -0.0)
RETURN [LOOPHOLE[x, INT32] > LOOPHOLE[y, INT32]];
};
QuickGe: PROC [x, y: Float] RETURNS [BOOL] = INLINE {
Either x or y must be non-negative (and not -0.0)
RETURN [LOOPHOLE[x, INT32] >= LOOPHOLE[y, INT32]];
};
QuickLt: PROC [x, y: Float] RETURNS [BOOL] = INLINE {
Either x or y must be non-negative (and not -0.0)
RETURN [LOOPHOLE[x, INT32] <= LOOPHOLE[y, INT32]];
};
QuickLe: PROC [x, y: Float] RETURNS [BOOL] = INLINE {
Either x or y must be non-negative (and not -0.0)
RETURN [LOOPHOLE[x, INT32] < LOOPHOLE[y, INT32]];
};
QuickEq: PROC [x, y: Float] RETURNS [BOOL] = INLINE {
Both x and y must be non-negative (and not -0.0)
RETURN [LOOPHOLE[x, INT32] = LOOPHOLE[y, INT32]];
};
ExternalNames
ExternalNames: PROC [] = TRUSTED MACHINE CODE {
"^ExternalNames\n";
"SoftSub XR←RealSub\n";
"SoftAdd XR←RealAdd\n";
"SoftMul XR←RealMul\n";
"SoftDiv XR←RealDiv\n";
"FloatInt XR𡤏loatInt\n";
"FloatCard XR𡤏loatCard\n";
"SoftFix XR←REAL32𡤏ix\n";
"SoftRnd XR←REAL32←Round\n";
"SoftFloor XR←REAL32𡤏loor\n";
"SoftCeiling XR←REAL32�iling\n";
"SoftAbs XR←RealAbs\n";
"RealMin XR←RealMin\n";
"RealMax XR←RealMax\n";
SoftCmp
"SoftGt XR←RealGt\n";
"SoftGe XR←RealGe\n";
"SoftEq XR←RealEq\n";
"SoftLt XR←RealLt\n";
"SoftLe XR←RealLe\n";
"SignedPwr XR←SignedPwr\n";
"UnsignedPwr XR←UnsignedPwr\n";
"RealPwr XR←RealPwr\n";
"SoftNeg XR←RealNeg\n";
};
Operations
SoftSub: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [Float] = TRUSTED {
sMask: CARD32 ¬ LOOPHOLE[Float[m2: 0, sign: TRUE, exp: 0, m1: 0]];
r1 ¬ LOOPHOLE[Basics32.BITXOR[LOOPHOLE[r1, CARD32], sMask]];
This is the only difference between Add and Sub
IF Basics32.BITOR[LOOPHOLE[r0, CARD32], sMask]
< Basics32.BITOR[LOOPHOLE[r1, CARD32], sMask] THEN {
temp: Float ¬ r0;
r0 ¬ r1;
r1 ¬ temp;
};
At this point r0 holds the real with largest absolute value.
{
guard: CARD32 ¬ 0;
r0s: CARD32 = Basics32.BITAND[LOOPHOLE[r0, CARD32], sMask];
r1s: CARD32 = Basics32.BITAND[LOOPHOLE[r1, CARD32], sMask];
resExp: NAT ¬ r0.exp;
dExp: NAT ¬ resExp-r1.exp;
SELECT TRUE FROM
resExp = nanExpon => RETURN [RaiseFloatError[nan]];
dExp > maxExponDelta => RETURN [r0];
ENDCASE => {
mnr: CARD32 ¬ hiddenBitAsCard;
mMask: CARD32 ¬ hiddenBitAsCard-1;
r0m: CARD32 ¬ Basics32.BITAND[LOOPHOLE[r0, CARD32], mMask];
mant: CARD32 ¬ Basics32.BITAND[LOOPHOLE[r1, CARD32], mMask] + mnr;
IF dExp = resExp THEN {
r1.exp = 0 => denormalized number
IF resExp > maxExponDelta THEN RETURN [r0];
mant ¬ Basics32.BITAND[mant, mMask];
IF resExp = 0 THEN {
Both are denormalized (no adjustment necessary).
IF r0s = r1s
THEN mant ¬ r0m + mant
ELSE mant ¬ r0m - mant;
RETURN [LOOPHOLE[mant + r0s]];
};
mant ¬ mant + mant;
};
r0m ¬ Basics32.BITOR[r0m, mnr];
IF dExp # 0 THEN {
guard ¬ Basics32.BITLSHIFT[mant, 32-dExp];
mant ¬ Basics32.BITRSHIFT[mant, dExp];
};
IF r0s = r1s
THEN {
Addition, which is cheaper
mant ¬ mant + r0m;
IF mant >= mnr+mnr THEN {
Have to renormalize this puppy
guard ¬ Basics32.BITRSHIFT[guard, 1] + Basics32.BITLSHIFT[mant, 32-1];
mant ¬ Basics32.BITRSHIFT[mant, 1];
resExp ¬ resExp + 1;
};
}
ELSE {
Subtraction, which is more expensive
mant ¬ r0m - mant;
IF guard # 0 THEN {mant ¬ mant - 1; guard ¬ 0 - guard};
IF mant < mnr THEN {
In this case the subtraction has changed the exponent.
shift: NAT ¬ 0;
temp: CARD32 ¬ 12;
IF mant = 0 THEN RETURN [PosZero];
IF Basics32.BITRSHIFT[mant, 12] # 0 THEN shift ¬ 12;
IF Basics32.BITRSHIFT[mant, shift+8] # 0 THEN shift ¬ shift+8;
IF Basics32.BITRSHIFT[mant, shift+4] # 0 THEN shift ¬ shift+4;
IF Basics32.BITRSHIFT[mant, shift+2] # 0 THEN shift ¬ shift+2;
IF Basics32.BITRSHIFT[mant, shift+1] # 0 THEN shift ¬ shift+1;
shift ¬ 23-shift;
mant ¬ Basics32.BITLSHIFT[mant, shift] + Basics32.BITRSHIFT[guard, 32-shift];
guard ¬ Basics32.BITLSHIFT[guard, shift];
resExp ¬ resExp - shift;
IF resExp <= 0 THEN {
This sucker is going to be denormalized
shift: NAT ¬ 1-resExp;
guard ¬ Basics32.BITRSHIFT[guard, shift] + Basics32.BITLSHIFT[mant, 32-shift];
mant ¬ Basics32.BITRSHIFT[mant, shift];
resExp ¬ 0;
};
};
};
IF LOOPHOLE[guard, INT32] < 0 THEN {
We probably have to round
IF LOOPHOLE[guard, INT32] = INT32.FIRST
THEN mant ¬ mant + Basics32.BITAND[mant, 1]
ELSE mant ¬ mant + 1;
resExp ¬ resExp + BYTE[Basics32.BITRSHIFT[mant, 24]];
Can only add 0 or 1 (on carry from the above add)
};
IF resExp > maxExpon THEN RETURN [RaiseFloatError[overflow]];
RETURN [LOOPHOLE[Basics32.BITAND[mant, mMask] + resExp*hiddenBitAsCard + r0s]];
};
};
};
SoftAdd: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [Float] = TRUSTED {
sMask: CARD32 ¬ LOOPHOLE[Float[m2: 0, sign: TRUE, exp: 0, m1: 0]];
IF Basics32.BITOR[LOOPHOLE[r0, CARD32], sMask]
< Basics32.BITOR[LOOPHOLE[r1, CARD32], sMask] THEN {
temp: Float ¬ r0;
r0 ¬ r1;
r1 ¬ temp;
};
At this point r0 holds the real with largest absolute value.
{
guard: CARD32 ¬ 0;
r0s: CARD32 = Basics32.BITAND[LOOPHOLE[r0, CARD32], sMask];
r1s: CARD32 = Basics32.BITAND[LOOPHOLE[r1, CARD32], sMask];
resExp: NAT ¬ r0.exp;
dExp: NAT ¬ resExp-r1.exp;
SELECT TRUE FROM
resExp = nanExpon => RETURN [RaiseFloatError[nan]];
dExp > maxExponDelta => RETURN [r0];
ENDCASE => {
mnr: CARD32 ¬ hiddenBitAsCard;
mMask: CARD32 ¬ hiddenBitAsCard-1;
r0m: CARD32 ¬ Basics32.BITAND[LOOPHOLE[r0, CARD32], mMask];
mant: CARD32 ¬ Basics32.BITAND[LOOPHOLE[r1, CARD32], mMask] + mnr;
IF dExp = resExp THEN {
r1.exp = 0 => denormalized number
IF resExp > maxExponDelta THEN RETURN [r0];
mant ¬ Basics32.BITAND[mant, mMask];
IF resExp = 0 THEN {
Both are denormalized (no adjustment necessary).
IF r0s = r1s
THEN mant ¬ r0m + mant
ELSE mant ¬ r0m - mant;
RETURN [LOOPHOLE[mant + r0s]];
};
mant ¬ mant + mant;
};
r0m ¬ Basics32.BITOR[r0m, mnr];
IF dExp # 0 THEN {
guard ¬ Basics32.BITLSHIFT[mant, 32-dExp];
mant ¬ Basics32.BITRSHIFT[mant, dExp];
};
IF r0s = r1s
THEN {
Addition, which is cheaper
mant ¬ mant + r0m;
IF mant >= mnr+mnr THEN {
Have to renormalize this puppy
guard ¬ Basics32.BITRSHIFT[guard, 1] + Basics32.BITLSHIFT[mant, 32-1];
mant ¬ Basics32.BITRSHIFT[mant, 1];
resExp ¬ resExp + 1;
};
}
ELSE {
Subtraction, which is more expensive
mant ¬ r0m - mant;
IF guard # 0 THEN {mant ¬ mant - 1; guard ¬ 0 - guard};
IF mant < mnr THEN {
In this case the subtraction has changed the exponent.
shift: NAT ¬ 0;
temp: CARD32 ¬ 12;
IF mant = 0 THEN RETURN [PosZero];
IF Basics32.BITRSHIFT[mant, 12] # 0 THEN shift ¬ 12;
IF Basics32.BITRSHIFT[mant, shift+8] # 0 THEN shift ¬ shift+8;
IF Basics32.BITRSHIFT[mant, shift+4] # 0 THEN shift ¬ shift+4;
IF Basics32.BITRSHIFT[mant, shift+2] # 0 THEN shift ¬ shift+2;
IF Basics32.BITRSHIFT[mant, shift+1] # 0 THEN shift ¬ shift+1;
shift ¬ 23-shift;
mant ¬ Basics32.BITLSHIFT[mant, shift] + Basics32.BITRSHIFT[guard, 32-shift];
guard ¬ Basics32.BITLSHIFT[guard, shift];
resExp ¬ resExp - shift;
IF resExp <= 0 THEN {
This sucker is going to be denormalized
shift: NAT ¬ 1-resExp;
guard ¬ Basics32.BITRSHIFT[guard, shift] + Basics32.BITLSHIFT[mant, 32-shift];
mant ¬ Basics32.BITRSHIFT[mant, shift];
resExp ¬ 0;
};
};
};
IF LOOPHOLE[guard, INT32] < 0 THEN {
We probably have to round
IF LOOPHOLE[guard, INT32] = INT32.FIRST
THEN mant ¬ mant + Basics32.BITAND[mant, 1]
ELSE mant ¬ mant + 1;
resExp ¬ resExp + BYTE[Basics32.BITRSHIFT[mant, 24]];
};
IF resExp > maxExpon THEN RETURN [RaiseFloatError[overflow]];
RETURN [LOOPHOLE[Basics32.BITAND[mant, mMask] + resExp*hiddenBitAsCard + r0s]];
};
};
};
SoftMul: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [Float] = TRUSTED {
sign: CARD32 = Basics32.BITAND[signBitAsCard,
Basics32.BITXOR[LOOPHOLE[r0, CARD32], LOOPHOLE[r1, CARD32]]];
r0exp: INTEGER = r0.exp;
r1exp: INTEGER = r1.exp;
resExp: INTEGER ¬ r0exp+r1exp - expBias;
mMask: CARD32 ¬ hiddenBitAsCard-1;
Note: leave this a variable so it will go into a register
r0Mant: CARD32 ¬ Basics32.BITAND[mMask, LOOPHOLE[r0, CARD32]];
r1Mant: CARD32 ¬ Basics32.BITAND[mMask, LOOPHOLE[r1, CARD32]];
mant: CARD32 ¬ 0;
guard: CARD32 ¬ 0;
IF r0exp = nanExpon THEN RETURN [RaiseFloatError[nan]];
IF r1exp = nanExpon THEN RETURN [RaiseFloatError[nan]];
IF r0exp = 0 THEN {
Denormalized or 0.0
IF r0Mant = 0 THEN RETURN [LOOPHOLE[sign]];
DO
r0Mant ¬ r0Mant + r0Mant;
IF r0Mant >= hiddenBitAsCard THEN EXIT;
resExp ¬ resExp - 1;
ENDLOOP;
};
IF r1exp = 0 THEN {
Denormalized or 0.0
IF r1Mant = 0 THEN RETURN [LOOPHOLE[sign]];
DO
r1Mant ¬ r1Mant + r1Mant;
IF r1Mant >= hiddenBitAsCard THEN EXIT;
resExp ¬ resExp - 1;
ENDLOOP;
};
r0Mant ¬ Basics32.BITOR[r0Mant, hiddenBitAsCard];
r1Mant ¬ Basics32.BITOR[r1Mant, hiddenBitAsCard];
Do the multiplication
Initially: hiddenBitAsCard <= r0Mant, r1Mant <= hiddenBitAsCard*2-1
We can get by with just 3 multiplies, since we only have two 24-bit numbers
{
umul: PROC [x: BYTE] RETURNS [CARD32] = INLINE {
RETURN [IF x = 0 THEN 0 ELSE x*r1Mant];
};
pLo: CARD32 = umul[Basics32.BITAND[r0Mant, 255]];
pMid: CARD32 = umul[Basics32.BITAND[Basics32.BITRSHIFT[r0Mant, 8], 255]];
pHi: CARD32 = Basics32.BITRSHIFT[r0Mant, 16] * r1Mant;
sumLo: CARD32 = Basics32.BITLSHIFT[Basics32.BITAND[377B, pHi], 16]
+ Basics32.BITLSHIFT[Basics32.BITAND[177777B, pMid], 8]
+ Basics32.BITAND[77777777B, pLo];
mant ¬ Basics32.BITRSHIFT[pHi, 8]
+ Basics32.BITRSHIFT[pMid, 16]
+ Basics32.BITRSHIFT[pLo, 24]
+ Basics32.BITRSHIFT[sumLo, 24];
guard ¬ Basics32.BITLSHIFT[sumLo, 8];
IF mant < hiddenBitAsCard
THEN {
mant ¬ mant + mant + Basics32.BITRSHIFT[guard, 32-1];
guard ¬ guard + guard;
IF checking AND mant < hiddenBitAsCard THEN ERROR;
}
ELSE {
resExp ¬ resExp + 1;
IF checking AND mant >= hiddenBitAsCard*2 THEN ERROR;
};
};
IF resExp <= 0 THEN {
The resulting number is denormalized (or zero)
shift: NAT ¬ 1-resExp;
IF shift > 24 THEN RETURN [LOOPHOLE[sign]] ELSE {
guardOut: CARD32 ¬ Basics32.BITLSHIFT[guard, 32-shift];
guard ¬ Basics32.BITRSHIFT[guard, shift] + Basics32.BITLSHIFT[mant, 32-shift];
IF guardOut # 0 THEN guard ¬ Basics32.BITOR[guard, 1];
mant ¬ Basics32.BITRSHIFT[mant, shift];
resExp ¬ 0;
};
};
IF LOOPHOLE[guard, INT32] < 0 THEN {
We have to round the result, which is why we held onto the guard bits
IF guard = signBitAsCard
THEN mant ¬ mant + Basics32.BITAND[mant, 1]
ELSE mant ¬ mant + 1;
resExp ¬ resExp + BYTE[Basics32.BITRSHIFT[mant, 24]];
};
IF resExp > maxExpon THEN RETURN [RaiseFloatError[overflow]];
RETURN [LOOPHOLE[Basics32.BITAND[mant, mMask] + Basics32.BITLSHIFT[resExp, 23] + sign]];
};
SoftDiv: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [Float] = TRUSTED {
guard: CARD32 ¬ 0;
sign: CARD32 = Basics32.BITAND[signBitAsCard,
Basics32.BITXOR[LOOPHOLE[r0, CARD32], LOOPHOLE[r1, CARD32]]];
r0exp: NAT = r0.exp;
r1exp: NAT = r1.exp;
resExp: INTEGER ¬ (r0exp - r1exp) + expBias;
mMask: CARD32 ¬ hiddenBitAsCard-1;
Note: leave this a variable so it will go into a register
r0Mant: CARD32 ¬ Basics32.BITAND[mMask, LOOPHOLE[r0, CARD32]];
r1Mant: CARD32 ¬ Basics32.BITAND[mMask, LOOPHOLE[r1, CARD32]];
mant: CARD32 ¬ 0;
IF r0exp = nanExpon THEN RETURN [RaiseFloatError[nan]];
IF r1exp = nanExpon THEN RETURN [RaiseFloatError[nan]];
IF r1exp = 0 THEN {
Denormalized or 0.0
IF r1Mant = 0 THEN RETURN [RaiseFloatError[divByZero]];
DO
r1Mant ¬ r1Mant + r1Mant;
IF r1Mant >= hiddenBitAsCard THEN EXIT;
resExp ¬ resExp - 1;
ENDLOOP;
};
IF r0exp = 0 THEN {
Denormalized or 0.0
IF r0Mant = 0 THEN RETURN [LOOPHOLE[sign]];
DO
r0Mant ¬ r0Mant + r0Mant;
IF r0Mant >= hiddenBitAsCard THEN EXIT;
resExp ¬ resExp - 1;
ENDLOOP;
};
r0Mant ¬ Basics32.BITOR[r0Mant, hiddenBitAsCard];
r1Mant ¬ Basics32.BITOR[r1Mant, hiddenBitAsCard];
{
bit: CARD32 ¬ hiddenBitAsCard;
mant ¬ hiddenBitAsCard;
IF r0Mant < r1Mant THEN {
r0Mant ¬ r0Mant + r0Mant;
resExp ¬ resExp - 1;
};
r0Mant ¬ r0Mant - r1Mant;
IF r0Mant = 0 THEN GO TO earlyExit;
DO
r0Mant ¬ r0Mant + r0Mant;
IF bit = 1 THEN {
IF r0Mant >= r1Mant THEN {
Have to round
IF r0Mant = r1Mant
THEN mant ¬ mant + Basics32.BITAND[mant, 1]
ELSE mant ¬ mant + 1;
IF mant = hiddenBitAsCard+hiddenBitAsCard THEN {
mant ¬ hiddenBitAsCard;
resExp ¬ resExp + 1;
};
};
EXIT;
};
bit ¬ Basics32.BITRSHIFT[bit, 1];
IF r0Mant >= r1Mant THEN {
mant ¬ mant + bit;
r0Mant ¬ r0Mant - r1Mant;
IF r0Mant = 0 THEN GO TO earlyExit;
};
ENDLOOP;
EXITS earlyExit => {};
};
IF resExp <= 0 THEN {
The resulting number is denormalized (or zero)
DO
guard ¬ Basics32.BITOR[
Basics32.BITAND[guard, 1],
Basics32.BITRSHIFT[guard, 1] + Basics32.BITLSHIFT[mant, 32-1]];
mant ¬ Basics32.BITRSHIFT[mant, 1];
IF resExp = 0 THEN EXIT;
resExp ¬ resExp + 1;
ENDLOOP;
};
IF LOOPHOLE[guard, INT32] < 0 THEN {
We have to round the result, which is why we held onto the guard bits
IF guard = signBitAsCard
THEN mant ¬ mant + Basics32.BITAND[mant, 1]
ELSE mant ¬ mant + 1;
resExp ¬ resExp + BYTE[Basics32.BITRSHIFT[mant, 24]];
};
IF resExp > maxExpon THEN RETURN [RaiseFloatError[overflow]];
RETURN [LOOPHOLE[Basics32.BITAND[mant, mMask] + Basics32.BITLSHIFT[resExp, 23] + sign]];
};
FloatInt: PUBLIC SAFE PROC [i: INT32] RETURNS [Float] ~ TRUSTED {
sign: CARD32 ¬ 0;
c: CARD32 ¬ LOOPHOLE[i];
shift: NAT ¬ 0;
temp: CARD32 ¬ 0;
guard: CARD32;
exp: NAT;
mMask: CARD32 ¬ hiddenBitAsCard-1;
IF i = 0 THEN RETURN[LOOPHOLE[PosZero]];
IF i < 0 THEN {c ¬ -i; sign ¬ signBitAsCard};
okay now determine the leading bit. Remember that this is unsigned
IF Basics32.BITRSHIFT[c, 16] # 0 THEN shift ¬ 16;
IF Basics32.BITRSHIFT[c, shift+8] # 0 THEN shift ¬ shift+8;
IF Basics32.BITRSHIFT[c, shift+4] # 0 THEN shift ¬ shift+4;
IF Basics32.BITRSHIFT[c, shift+2] # 0 THEN shift ¬ shift+2;
IF Basics32.BITRSHIFT[c, shift+1] # 0 THEN shift ¬ shift+1;
exp ¬ shift+expBias;
IF shift <= 23
THEN {
bits fit in mantisa completely, no rounding needed
temp ¬ Basics32.BITLSHIFT[c, 23-shift];
}
ELSE {
going to have to round
temp ¬ Basics32.BITRSHIFT[c, shift-23];
guard ¬ Basics32.BITLSHIFT[c, 32-shift];
IF LOOPHOLE[guard, INT32] < 0 THEN {
We may have to round
IF LOOPHOLE[guard, INT32] = INT32.FIRST
THEN temp ¬ temp + Basics32.BITAND[temp, 1]
ELSE temp ¬ temp + 1;
exp ¬ exp + BYTE[Basics32.BITRSHIFT[temp, 24]];
};
};
RETURN [LOOPHOLE[Basics32.BITAND[temp, mMask] + exp*hiddenBitAsCard+sign]];
};
FloatCard: PUBLIC SAFE PROC [c: CARD32] RETURNS [Float] ~ TRUSTED {
shift: NAT ¬ 0;
temp: CARD32 ¬ 0;
exp: NAT;
guard: CARD32;
mMask: CARD32 ¬ hiddenBitAsCard-1;
IF c = 0 THEN RETURN[LOOPHOLE[PosZero]];
okay now determine the leading bit. Remember that this is unsigned
IF Basics32.BITRSHIFT[c, 16] # 0 THEN shift ¬ 16;
IF Basics32.BITRSHIFT[c, shift+8] # 0 THEN shift ¬ shift+8;
IF Basics32.BITRSHIFT[c, shift+4] # 0 THEN shift ¬ shift+4;
IF Basics32.BITRSHIFT[c, shift+2] # 0 THEN shift ¬ shift+2;
IF Basics32.BITRSHIFT[c, shift+1] # 0 THEN shift ¬ shift+1;
exp ¬ expBias+shift;
IF shift <= 23
THEN {
bits fit in mantisa completely, no rounding needed
temp ¬ Basics32.BITLSHIFT[c, 23-shift];
}
ELSE {
going to have to round
temp ¬ Basics32.BITRSHIFT[c, shift-23];
guard ¬ Basics32.BITLSHIFT[c, 32-shift];
IF LOOPHOLE[guard, INT32] < 0 THEN {
We may have to round
IF LOOPHOLE[guard, INT32] = INT32.FIRST
THEN temp ¬ temp + Basics32.BITAND[temp, 1]
ELSE temp ¬ temp + 1;
exp ¬ exp + BYTE[Basics32.BITRSHIFT[temp, 24]];
};
};
RETURN [LOOPHOLE[Basics32.BITAND[temp, mMask] + exp*hiddenBitAsCard]];
};
SoftFix: PUBLIC SAFE PROC [c: Float] RETURNS [INT32] ~ TRUSTED {
exponent: NAT ¬ c.exp;
newInt: INT32;
neg0: Float ¬ NegZero;
IF c = neg0 OR c = PosZero THEN RETURN[0];
IF exponent = nanExpon THEN RETURN[LOOPHOLE[RaiseFloatError[nan]]];
IF exponent > expBias+31 THEN RETURN[LOOPHOLE[RaiseFloatError[overflow]]];
the semantics of Fix (truncation) allows us to ignore numbers less than 1.0
IF exponent < expBias THEN RETURN[0];
since we previously checked for zero we have to add in the hidden bit
newInt ¬ c.m2 + Basics32.BITLSHIFT[c.m1, 16] + hiddenBitAsCard;
newInt ¬ (IF exponent >= expBias+maxExponDelta
THEN Basics32.BITLSHIFT[newInt, exponent-expBias-23]
ELSE
Basics32.BITRSHIFT[newInt, 23-(exponent-expBias)]);
RETURN[IF LOOPHOLE[c, INT32] < 0 THEN -newInt ELSE newInt];
};
SoftRnd: PUBLIC SAFE PROC [c: Float] RETURNS [INT32] ~ TRUSTED {
half: Float ¬ Float[sign: FALSE, exp: expBias-1, m1: 0, m2: 0];
abs: Float = QuickAbs[c];
fix: INT32 ¬ SoftFix[abs];
flt: Float = FloatInt[fix];
delta: Float = SoftSub[abs, flt];
SELECT TRUE FROM
QuickLt[delta, half] => {};
QuickGt[delta, half] => fix ¬ fix + 1;
(LOOPHOLE[fix, CARD32] MOD 2) = 1 => fix ¬ fix + 1;
ENDCASE;
IF abs # c THEN fix ¬ - fix;
RETURN [fix];
};
SoftFloor: PUBLIC SAFE PROC [c: REAL] RETURNS [INT32] ~ TRUSTED {
temp: INT32 = SoftFix[LOOPHOLE[c, Float]];
IF c >= 0 THEN RETURN [temp];
IF c = temp THEN RETURN [temp] ELSE RETURN [temp-1];
};
SoftCeiling: PUBLIC SAFE PROC [c: REAL] RETURNS [INT32] ~ TRUSTED {
temp: INT32 = SoftFix[LOOPHOLE[c, Float]];
IF c <= 0 THEN RETURN [temp];
IF c = temp THEN RETURN [temp] ELSE RETURN [temp+1];
};
SoftNeg: PUBLIC SAFE PROC [a: Float] RETURNS [Float] ~ TRUSTED {
IF a.exp = nanExpon THEN RETURN [RaiseFloatError[nan]];
RETURN [LOOPHOLE[Basics32.BITXOR[LOOPHOLE[a, CARD32], signBitAsCard], Float]];
};
SoftAbs: PUBLIC SAFE PROC [a: Float] RETURNS [Float] ~ TRUSTED {
bogus: CARD32 = LOOPHOLE[Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0]];
c: CARD32 ¬ (LOOPHOLE[a, CARD32]*2)/2;
IF c >= bogus THEN RETURN [RaiseFloatError[nan]];
RETURN [LOOPHOLE[c]];
};
RealMin: PUBLIC SAFE PROC [a, b: Float] RETURNS [Float] ~ TRUSTED {
neg0: Float ¬ NegZero;
nanLow: Float ¬ Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0];
IF a = neg0 THEN a ¬ PosZero;
IF b = neg0 THEN b ¬ PosZero;
IF FloatAnd[a, nanLow] = nanLow OR FloatAnd[b, nanLow] = nanLow THEN
RETURN [RaiseFloatError[nan]];
IF LOOPHOLE[FloatAnd[a, b], INT32] < 0 THEN
RETURN [(IF LOOPHOLE[a, INT32] > LOOPHOLE[b, INT32] THEN a ELSE b)];
IF LOOPHOLE[a, INT32] > LOOPHOLE[b, INT32] THEN RETURN[b] ELSE RETURN[a];
};
RealMax: PUBLIC SAFE PROC [a, b: Float] RETURNS [Float] ~ TRUSTED {
neg0: Float ¬ NegZero;
nanLow: Float ¬ Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0];
IF a = neg0 THEN a ¬ PosZero;
IF b = neg0 THEN b ¬ PosZero;
IF FloatAnd[a, nanLow] = nanLow OR FloatAnd[b, nanLow] = nanLow THEN
RETURN [RaiseFloatError[nan]];
IF LOOPHOLE[FloatAnd[a, b], INT32] < 0 THEN
RETURN [(IF LOOPHOLE[a, INT32] > LOOPHOLE[b, INT32] THEN b ELSE a)];
IF LOOPHOLE[a, INT32] > LOOPHOLE[b, INT32] THEN RETURN [a] ELSE RETURN [b];
};
SoftGt: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [BOOL] = TRUSTED {
neg0: Float ¬ NegZero;
nanLow: Float ¬ Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0];
IF r0 = neg0 THEN r0 ¬ PosZero;
IF r1 = neg0 THEN r1 ¬ PosZero;
IF FloatAnd[r0, nanLow] = nanLow OR FloatAnd[r1, nanLow] = nanLow THEN {
[] ¬ RaiseFloatError[nan];
RETURN [FALSE];
};
IF LOOPHOLE[FloatAnd[r0, r1], INT32] < 0 THEN
RETURN[LOOPHOLE[r0, INT32] < LOOPHOLE[r1, INT32]];
RETURN [LOOPHOLE[r0, INT32] > LOOPHOLE[r1, INT32]];
};
SoftGe: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [BOOL] = TRUSTED {
neg0: Float ¬ NegZero;
nanLow: Float ¬ Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0];
IF r0 = neg0 THEN r0 ¬ PosZero;
IF r1 = neg0 THEN r1 ¬ PosZero;
IF FloatAnd[r0, nanLow] = nanLow OR FloatAnd[r1, nanLow] = nanLow THEN {
[] ¬ RaiseFloatError[nan];
RETURN [FALSE];
};
IF LOOPHOLE[FloatAnd[r0, r1], INT32] < 0 THEN
RETURN[LOOPHOLE[r0, INT32] <= LOOPHOLE[r1, INT32]];
RETURN [LOOPHOLE[r0, INT32] >= LOOPHOLE[r1, INT32]];
};
SoftEq: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [BOOL] = TRUSTED {
neg0: Float ¬ NegZero;
nanLow: Float ¬ Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0];
IF r0 = neg0 THEN r0 ¬ PosZero;
IF r1 = neg0 THEN r1 ¬ PosZero;
IF FloatAnd[r0, nanLow] = nanLow OR FloatAnd[r1, nanLow] = nanLow THEN {
[] ¬ RaiseFloatError[nan];
RETURN [FALSE];
};
RETURN [LOOPHOLE[r0, INT32] = LOOPHOLE[r1, INT32]];
};
SoftCmp: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [Basics.PartialComparison] = TRUSTED {
neg0: Float ¬ NegZero;
nanLow: Float ¬ Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0];
IF r0 = neg0 THEN r0 ¬ PosZero;
IF r1 = neg0 THEN r1 ¬ PosZero;
IF FloatAnd[r0, nanLow] = nanLow OR FloatAnd[r0, nanLow] = nanLow THEN
RETURN [incomparable];
IF LOOPHOLE[FloatAnd[r0, r1], INT32] < 0 THEN {
IF LOOPHOLE[r0, INT32] < LOOPHOLE[r1, INT32] THEN RETURN [greater];
IF LOOPHOLE[r1, INT32] < LOOPHOLE[r0, INT32] THEN RETURN [less];
RETURN [equal];
};
IF LOOPHOLE[r0, INT32] < LOOPHOLE[r1, INT32] THEN RETURN [less];
IF LOOPHOLE[r1, INT32] < LOOPHOLE[r0, INT32] THEN RETURN [greater];
RETURN [equal];
};
SoftLt: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [BOOL] = TRUSTED {
neg0: Float ¬ NegZero;
nanLow: Float ¬ Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0];
IF r0 = neg0 THEN r0 ¬ PosZero;
IF r1 = neg0 THEN r1 ¬ PosZero;
IF FloatAnd[r0, nanLow] = nanLow OR FloatAnd[r1, nanLow] = nanLow THEN {
[] ¬ RaiseFloatError[nan];
RETURN [FALSE];
};
IF LOOPHOLE[FloatAnd[r0, r1], INT32] < 0 THEN
RETURN[LOOPHOLE[r0, INT32] > LOOPHOLE[r1, INT32]];
RETURN [LOOPHOLE[r0, INT32] < LOOPHOLE[r1, INT32]];
};
SoftLe: PUBLIC SAFE PROC [r0, r1: Float] RETURNS [BOOL] = TRUSTED {
neg0: Float ¬ NegZero;
nanLow: Float ¬ Float[sign: FALSE, exp: nanExpon, m1: 0, m2: 0];
IF r0 = neg0 THEN r0 ¬ PosZero;
IF r1 = neg0 THEN r1 ¬ PosZero;
IF FloatAnd[r0, nanLow] = nanLow OR FloatAnd[r1, nanLow] = nanLow THEN {
[] ¬ RaiseFloatError[nan];
RETURN [FALSE];
};
IF LOOPHOLE[FloatAnd[r0, r1], INT32] < 0 THEN
RETURN[LOOPHOLE[r0, INT32] >= LOOPHOLE[r1, INT32]];
RETURN [LOOPHOLE[r0, INT32] <= LOOPHOLE[r1, INT32]];
};
SignedPwr: PUBLIC SAFE PROC [base, exp: INT32] RETURNS [INT32] ~ TRUSTED {
p: INT32 ¬ 1;
b: INT32 ¬ base;
e: INT32 ¬ exp;
SELECT b FROM
0 => {IF e <= 0 THEN RETURN[LOOPHOLE[RaiseFloatError[other]]]; RETURN[0];};
1 => RETURN[1];
-1 => IF Basics.OddInt[e] THEN RETURN[-1] ELSE RETURN[1];
ENDCASE => NULL;
IF e < 0 THEN RETURN[0];
invariant: p * b**e = base**exp
WHILE TRUE DO
IF Basics.OddInt[e] THEN { p ¬ p*b; e ¬ e-1;};
IF e ~= 0
THEN { b ¬ b*b; e ¬ e/2; }
ELSE {
IF base ~= 0 THEN RETURN[p];
RETURN[LOOPHOLE[RaiseFloatError[other]]];
};
ENDLOOP;
RETURN[0]; -- keep compiler happy
};
UnsignedPwr: PUBLIC SAFE PROC [base, exp: CARD32] RETURNS [CARD32] ~ TRUSTED {
p: CARD32 ¬ 1;
b: CARD32 ¬ base;
e: CARD32 ¬ exp;
invariant: p * b**e = base**exp
WHILE TRUE DO
IF Basics.OddCard[e] THEN { p ¬ p*b; e ¬ e-1;};
IF e ~= 0
THEN { b ¬ b*b; e ¬ e/2; }
ELSE {
IF base ~= 0 THEN RETURN[p];
RETURN[LOOPHOLE[RaiseFloatError[other]]];
};
ENDLOOP;
RETURN[0]; -- keep compiler happy
};
RealPwr: PUBLIC SAFE PROC [base, exp: REAL32] RETURNS [REAL32] ~ TRUSTED {
RETURN[RealFns.Power[base, exp]];
};
Error handling
FloatErrorKind: TYPE = {none, nan, overflow, underflow, divByZero, other};
floatErrorCount: CARD ¬ 0;
floatErrorFlags: Real.ExceptionFlags;
SetExceptionFlag: PROC
[which: FloatErrorKind] RETURNS [fault: Real.ExceptionFlags ¬ Real.NoExceptions] ~ {
SELECT which FROM
none => NULL;
nan => fault[invalidOperation] ¬ TRUE;
overflow => fault[overflow] ¬ TRUE;
underflow => fault[underflow] ¬ TRUE;
divByZero => fault[divisionByZero] ¬ TRUE;
other => fault[invalidOperation] ¬ TRUE;
ENDCASE => ERROR;
};
RaiseFloatError: PROC [kind: FloatErrorKind] RETURNS [Float] = {
The reason to have a separate routine to raise the errors is that it is more efficient to do so, and gives us a single place to intercept (or count) errors.
floatErrorCount ¬ floatErrorCount + 1;
IF SIGNAL Real.RealException[SetExceptionFlag[kind], NIL] THEN ERROR;
RETURN [PosZero];
};
GetStickyFlags: PROC RETURNS [Real.ExceptionFlags] ~ {
RETURN [floatErrorFlags];
};
SetStickyFlags: PROC
[new: Real.ExceptionFlags ¬ Real.NoExceptions] RETURNS [old: Real.ExceptionFlags] ~ {
old ¬ floatErrorFlags;
floatErrorFlags ¬ new;
};
ExternalNames[];
END.