UnsafeTileImpl.mesa
Copyright Ó 1988, 1991 by Xerox Corporation. All rights reserved.
Russ Atkinson (RRA) October 10, 1988 9:24:30 pm PDT
Michael Plass, August 5, 1991 10:20 am PDT
Willie-s, May 24, 1991 2:02 pm PDT
DIRECTORY Basics, UnsafeTile;
UnsafeTileImpl: PROGRAM
IMPORTS Basics
EXPORTS UnsafeTile
= BEGIN OPEN Basics;
checking: BOOL = TRUE;
bpu: NAT = BITS[UNIT];
bpw: NAT = BITS[WORD];
upw: NAT = UNITS[WORD];
DstFunc: TYPE = UnsafeTile.DstFunc;
BitOffset: TYPE = CARDINAL[0..bpw);
BitCount: TYPE = CARDINAL[0..bpw];
Arg: TYPE = LONG POINTER TO UnsafeTile.ArgRecord;
BitsPtr: TYPE = LONG POINTER TO RawBits;
RawBits: TYPE = RECORD[PACKED SEQUENCE COMPUTED CARDINAL OF BIT];
WordPtr: TYPE = LONG POINTER TO WORD;
Op: PUBLIC UNSAFE PROC [arg: Arg] = UNCHECKED {
fSizeTile: NAT = arg.fSizeTile;
IF checking THEN {
Check the argument block for consistency
IF arg.firstBit >= fSizeTile THEN BadAssertion[];
IF arg.phase >= fSizeTile THEN BadAssertion[];
IF arg.firstLine >= arg.sSizeTile THEN BadAssertion[];
};
IF arg.fSize = 0 OR arg.sSize = 0 THEN RETURN;
IF fSizeTile <= bpw THEN {
We have some fast cases when the tile line fits in a single word
IF BITAND[fSizeTile, fSizeTile-1] = 0
THEN Fast1[arg]
The fastest case is when the src tile size is 2**N bits
ELSE Fast2[arg];
The medium case is when the src tile size is <= bpw bits
RETURN;
};
Fast3[arg];
};
Fast1: PUBLIC UNSAFE PROC [arg: Arg] = UNCHECKED {
tileLine: NAT ¬ arg.firstLine;
firstBit: BitOffset ¬ arg.firstBit;
srcBpl: NAT = arg.srcBpl;
srcBit: BitOffset ¬ arg.srcBit;
src: WordPtr ¬ arg.srcWord;
dst: WordPtr ¬ arg.dstWord;
dstBit: BitOffset ¬ arg.dstBit;
fSize: NAT = arg.fSize;
fSizeTile: BitCount = arg.fSizeTile;
sSizeTile: NAT = arg.sSizeTile;
sSize: NAT ¬ arg.sSize;
sw: WORD;
invert: WORD = arg.srcInvert;
FetchSource: PROC = INLINE {
This routine fetches a source word into sw, even if the source straddles a word boundary. We assume that 0 < fSizeTile <= bpw and that fSizeTile = 2**N for some N. If fSizeTile < bpw, then the source tile is replicated throughout sw.
k: BitCount ¬ fSizeTile;
sw ¬ BITXOR[src­, invert];
IF srcBit # 0 THEN {
source word needs to be left-justified (and may straddle a boundary)
sw ¬ BITLSHIFT[sw, srcBit];
IF srcBit+k > bpw THEN
sw ¬ sw + BITRSHIFT[BITXOR[(src+upw)­, invert], bpw-srcBit];
};
IF k < bpw THEN {
Replicate the tile throughout the word
sw ¬ BITAND[sw, BITLSHIFT[WORD.LAST, bpw-k]];
DO
sw ¬ sw + BITRSHIFT[sw, k];
k ¬ k + k;
IF k = bpw THEN EXIT;
ENDLOOP;
};
IF firstBit # 0 THEN sw ¬ BITLSHIFT[sw, firstBit] + BITRSHIFT[sw, bpw-firstBit];
The source tile needs to be rotated to get the first bit left-justified
};
DoFastLine1: PROC [df: DstFunc] = INLINE {
This routine merges the src word with the destination for one dst line
rem: NAT ¬ fSize;
dwp: WordPtr ¬ dst;
IF dstBit # 0 THEN {
The first word is partial, and the src tile is misaligned
dw: WORD ¬ dwp­;
mask: WORD ¬ BITRSHIFT[WORD.LAST, dstBit];
rw: WORD = BITRSHIFT[sw, dstBit];
rem ¬ rem + dstBit;
IF rem < bpw THEN mask ¬ BITXOR[mask, BITRSHIFT[WORD.LAST, rem]];
SELECT df FROM
null => dwp­ ¬ BITAND[dw, BITNOT[mask]] + BITAND[rw, mask];
and => dwp­ ¬ BITAND[dw, BITNOT[mask]] + BITAND[BITAND[dw, rw], mask];
or => dwp­ ¬ BITAND[dw, BITNOT[mask]] + BITAND[BITOR[dw, rw], mask];
xor => dwp­ ¬ BITAND[dw, BITNOT[mask]] + BITAND[BITXOR[dw, rw], mask];
ENDCASE;
IF rem <= bpw THEN GO TO done;
In this case the whole thing was done in one store
sw ¬ BITLSHIFT[sw, bpw-dstBit] + rw;
rem ¬ rem - bpw;
dwp ¬ dwp + upw;
};
WHILE rem >= bpw*2 DO
Store 2 words at a time
SELECT df FROM
null => {dwp­ ¬ sw; (dwp+upw)­ ¬ sw};
and => {dwp­ ¬ BITAND[dwp­, sw]; (dwp+upw)­ ¬ BITAND[(dwp+upw)­, sw]};
or => {dwp­ ¬ BITOR[dwp­, sw]; (dwp+upw)­ ¬ BITOR[(dwp+upw)­, sw]};
xor => {dwp­ ¬ BITXOR[dwp­, sw]; (dwp+upw)­ ¬ BITXOR[(dwp+upw)­, sw]};
ENDCASE;
rem ¬ rem - bpw*2;
dwp ¬ dwp + upw*2;
ENDLOOP;
IF rem >= bpw THEN {
Store a single word
SELECT df FROM
null => dwp­ ¬ sw;
and => dwp­ ¬ BITAND[dwp­, sw];
or => dwp­ ¬ BITOR[dwp­, sw];
ENDCASE => dwp­ ¬ BITXOR[dwp­, sw];
rem ¬ rem - bpw;
dwp ¬ dwp + upw;
};
IF rem # 0 THEN {
Store the last partial word
mask: WORD = BITRSHIFT[WORD.LAST, rem];
dw: WORD = dwp­;
SELECT df FROM
null => dwp­ ¬ BITAND[dw, mask] + BITAND[sw, BITNOT[mask]];
and => dwp­ ¬ BITAND[dw, mask] + BITAND[BITAND[dw, sw], BITNOT[mask]];
or => dwp­ ¬ BITAND[dw, mask] + BITAND[BITOR[dw, sw], BITNOT[mask]];
xor => dwp­ ¬ BITAND[dw, mask] + BITAND[BITXOR[dw, sw], BITNOT[mask]];
ENDCASE;
};
EXITS done => {};
};
BumpDst: PROC = INLINE {
This proc bumps the dst variables to the next line
next: NAT = dstBit + arg.dstBpl;
dstBit ¬ next MOD bpw;
dst ¬ dst + NAT[next-dstBit] / bpu;
};
BumpSrc: PROC = INLINE {
This proc bumps the src variables to the next line
next: NAT = srcBit + srcBpl;
srcBit ¬ next MOD bpw;
src ¬ src + NAT[next-srcBit] / bpu;
tileLine ¬ tileLine+1;
IF tileLine = sSizeTile THEN {
Take advantage of the fact that fSizeTile = 2**N
firstBit ¬ BITAND[firstBit + (fSizeTile-arg.phase), fSizeTile-1];
tileLine ¬ 0;
srcBit ¬ arg.srcBit;
src ¬ arg.srcWord;
};
};
WithFunction: PROC [df: DstFunc] = INLINE {
DO
FetchSource[];
DoFastLine1[df];
IF sSize = 1 THEN EXIT;
sSize ¬ sSize - 1;
BumpDst[];
BumpSrc[];
ENDLOOP;
};
IF checking AND BITAND[fSizeTile, fSizeTile-1] # 0 THEN BadAssertion[];
Make sure that the caller obeys the conventions
IF sSize # 0 THEN {
IF tileLine # 0 THEN {
We don't start at the beginning of the source tile
next: NAT ¬ srcBit + tileLine*srcBpl;
srcBit ¬ next MOD bpw;
src ¬ src + NAT[next-srcBit]/bpu;
};
Now to generate the special cases based on the dst function
SELECT arg.dstFunc FROM
null => WithFunction[null];
and => WithFunction[and];
or => WithFunction[or];
ENDCASE => WithFunction[xor];
};
};
Fast2: PUBLIC UNSAFE PROC [arg: Arg] = UNCHECKED {
This is a moderately fast for when the tile can fit into a single word, but need not be a power of two bits.
BumpDst: PROC = INLINE {
This proc bumps the dst variables to the next line
next: NAT = dstBit + arg.dstBpl;
dstBit ¬ next MOD bpw;
dst ¬ dst + NAT[next-dstBit] / bpu;
};
BumpSrc: PROC = INLINE {
This proc bumps the src variables to the next line
next: NAT = srcBit + arg.srcBpl;
srcBit ¬ next MOD bpw;
src ¬ src + NAT[next-srcBit] / bpu;
tileLine ¬ tileLine+1;
IF tileLine = arg.sSizeTile THEN {
phase: NAT = arg.phase;
IF firstBit < phase
THEN firstBit ¬ firstBit + (fSizeTile-phase)
ELSE firstBit ¬ firstBit - phase;
tileLine ¬ 0;
srcBit ¬ arg.srcBit;
src ¬ arg.srcWord;
};
};
GetSrc: PROC = INLINE {
Fetches the next src tile line into sw, replicating it as necessary to enlarge the tile for efficiency, and complements the pattern as necessary. The resulting tile is left-justified in sw. Garbage bits must be masked off.
comp: BitOffset = bpw-fSizeTile;
sw ¬ BITXOR[src­, invert];
IF srcBit # 0 THEN {
lim: NAT = srcBit+fSizeTile;
sw ¬ BITLSHIFT[sw, srcBit];
IF lim > bpw THEN
sw ¬ sw + BITRSHIFT[BITXOR[(src+upw)­, invert], bpw-srcBit];
};
IF fSizeTile <= bpw/2
THEN sw ¬ BITRSHIFT[sw, comp] * replicatorMultArray[fSizeTile]
This replicates the tile and masks off garbage
ELSE sw ¬ BITAND[sw, BITLSHIFT[WORD.LAST, comp]];
};
tileLine: NAT ¬ arg.firstLine;
firstBit: BitOffset ¬ arg.firstBit;
srcBit: BitOffset ¬ arg.srcBit;
src: WordPtr ¬ arg.srcWord;
dst: WordPtr ¬ arg.dstWord;
dstBit: BitOffset ¬ arg.dstBit;
fSizeTile: NAT = arg.fSizeTile;
tBits: NAT = replicatorBitsArray[fSizeTile];
sSize: NAT ¬ arg.sSize;
invert: WORD = arg.srcInvert;
df: DstFunc = arg.dstFunc;
sw: WORD ¬ 0;  -- initialized to keep compiler happy
IF checking AND fSizeTile > bpw THEN BadAssertion[];
Make sure that the caller obeys the conventions
IF sSize # 0 AND arg.fSize # 0 THEN {
IF tileLine # 0 THEN {
We don't start at the beginning of the source tile
next: NAT ¬ srcBit + tileLine*arg.srcBpl;
srcBit ¬ next MOD bpw;
src ¬ src + NAT[next-srcBit]/bpu;
};
DO
Each iteration handles one dst line
rem: NAT ¬ arg.fSize; -- remaining bits in dst line
xBits: NAT ¬ bpw-dstBit; -- bits to move
dwp: WordPtr ¬ dst; -- dst word ptr within line
shift: NAT ¬ firstBit;
dShift: NAT ¬ dstBit;
DoFastLine2: PROC [df: DstFunc] = INLINE {
DO
Each iteration fetches and stores just one dst word
tw: WORD ¬ BITLSHIFT[sw, shift];
valid: NAT ¬ tBits - shift;
IF rem < xBits THEN xBits ¬ rem;
WHILE valid < xBits DO
tw ¬ BITRSHIFT[sw, valid] + tw;
valid ¬ valid + tBits;
ENDLOOP;
PutFieldInline[dwp, dShift, tw, xBits, df];
IF rem = xBits THEN EXIT;
shift ¬ shift + xBits;
WHILE shift >= tBits DO shift ¬ shift - tBits; ENDLOOP;
dwp ¬ dwp + upw;
dShift ¬ 0;
rem ¬ rem - xBits;
xBits ¬ bpw;
IF rem < bpw THEN xBits ¬ rem;
ENDLOOP;
};
GetSrc[];
SELECT df FROM
null => DoFastLine2[null];
and => DoFastLine2[and];
or => DoFastLine2[or];
ENDCASE => DoFastLine2[xor];
IF sSize = 1 THEN EXIT;
sSize ¬ sSize - 1;
BumpSrc[];
BumpDst[];
ENDLOOP;
};
};
Fast3: PUBLIC PROC [arg: Arg] = {
This is a general-purpose routine, capable of handling all valid argument blocks, but it is (more-or-less) expected to be called for fSizeTile > bpw.
BumpDst: PROC = INLINE {
This proc bumps the dst variables to the next line
next: NAT = dstBit + arg.dstBpl;
dstBit ¬ next MOD bpw;
dst ¬ dst + NAT[next-dstBit] / bpu;
};
BumpSrc: PROC = INLINE {
This proc bumps the src variables to the next line
next: NAT = srcBit + arg.srcBpl;
srcBit ¬ next MOD bpw;
src ¬ src + NAT[next-srcBit] / bpu;
tileLine ¬ tileLine+1;
IF tileLine = arg.sSizeTile THEN {
phase: NAT = arg.phase;
IF firstBit < phase
THEN firstBit ¬ firstBit + (fSizeTile-phase)
ELSE firstBit ¬ firstBit - phase;
tileLine ¬ 0;
srcBit ¬ arg.srcBit;
src ¬ arg.srcWord;
};
};
tileLine: NAT ¬ arg.firstLine;
firstBit: NAT ¬ arg.firstBit;
srcBit: BitOffset ¬ arg.srcBit;
src: WordPtr ¬ arg.srcWord;
dst: WordPtr ¬ arg.dstWord;
dstBit: BitOffset ¬ arg.dstBit;
fSizeTile: NAT = arg.fSizeTile;
sSize: NAT ¬ arg.sSize;
IF sSize # 0 AND arg.fSize # 0 THEN {
IF tileLine # 0 THEN {
We don't start at the beginning of the source tile
next: NAT = srcBit + tileLine*arg.srcBpl;
srcBit ¬ next MOD bpw;
src ¬ src + NAT[next-srcBit]/bpu;
};
DO
Iterate through each dst line.
DoLine[arg, dst, dstBit, src, srcBit, firstBit];
IF sSize = 1 THEN EXIT;
sSize ¬ sSize - 1;
BumpSrc[];
BumpDst[];
ENDLOOP;
};
};
DoLine: PROC [arg: Arg, dwp: WordPtr, dstPos: BitOffset,
src: WordPtr, srcBit: BitOffset, firstBit: NAT] = {
Do a single dst line by repeatedly fetching fields from the source and storing them (with a function) to the destination. Each field is at most one word, and we try to get on a dst word boundary to speed things up, even though that may lead to double fetching from the src.
invert: WORD = arg.srcInvert;
dstRem: NAT ¬ arg.fSize;
fSizeTile: NAT = arg.fSizeTile;
srcRem: NAT ¬ fSizeTile-firstBit;
srcPos: BitOffset ¬ (firstBit+srcBit) MOD bpw;
swp: WordPtr ¬ src + NAT[(firstBit+srcBit) - srcPos] / bpu;
WithFunction: PROC [df: DstFunc] = --INLINE-- {
DO
next: NAT;
sw: WORD ¬ BITXOR[swp­, invert];
xBits: BitCount ¬ bpw-dstPos;
IF dstRem < xBits THEN xBits ¬ dstRem;
IF srcRem < xBits
THEN {
We have run off the end of the src tile line, so we peek around the corner to get some more bits from the start of the tile. We don't update the src variables, because that will be done after the transfer.
IF srcPos # 0 THEN {
source word needs to be left-justified (and may straddle a boundary)
sw ¬ BITLSHIFT[sw, srcPos];
IF srcPos+srcRem > bpw THEN
sw ¬ sw + BITRSHIFT[BITXOR[(swp+upw)­, invert], bpw-srcPos];
};
sw ¬ BITAND[sw, BITLSHIFT[WORD.LAST, bpw-srcRem]]
+ BITRSHIFT[BITLSHIFT[BITXOR[src­, invert], srcBit], srcRem];
next ¬ bpw-srcBit;
IF fSizeTile < next THEN next ¬ fSizeTile;
next now holds the number of valid bits from src^
next ¬ srcRem + next;
next now holds the number of valid bits in sw
IF next < xBits THEN xBits ¬ next;
Just in case we did not get enough for a full word
}
ELSE {
There are enough bits in the current src tile line
IF srcPos # 0 THEN {
source word needs to be left-justified (and may straddle a boundary)
sw ¬ BITLSHIFT[sw, srcPos];
IF srcPos+xBits > bpw THEN
sw ¬ sw + BITRSHIFT[BITXOR[(swp+upw)­, invert], bpw-srcPos];
};
};
IF xBits = bpw
THEN {
Ready to deal with full dst words
SELECT df FROM
null => dwp­ ¬ sw;
and => dwp­ ¬ BITAND[dwp­, sw];
or => dwp­ ¬ BITOR[dwp­, sw];
ENDCASE => dwp­ ¬ BITXOR[dwp­, sw];
IF xBits = dstRem THEN EXIT;
dwp ¬ dwp + upw;
dstRem ¬ dstRem - xBits;
IF srcRem > xBits THEN {
swp ¬ swp + upw;
srcRem ¬ srcRem - xBits;
LOOP;
};
}
ELSE {
Must deal with sub-word fields
PutFieldInline[dwp, dstPos, sw, xBits, df];
IF xBits = dstRem THEN EXIT;
Bump the dst to the next field
next ¬ dstPos+xBits;
dstPos ¬ next MOD bpw;
dwp ¬ dwp + NAT[next - dstPos] / bpu;
dstRem ¬ dstRem - xBits;
Bump the src to the next field
IF srcRem > xBits THEN {
next ¬ srcPos+xBits;
srcPos ¬ next MOD bpw;
swp ¬ swp + NAT[next - srcPos] / bpu;
srcRem ¬ srcRem - xBits;
LOOP;
};
};
IF srcRem < xBits THEN {
We have wrapped and already consumed some
next ¬ xBits - srcRem;
srcRem ¬ fSizeTile - next;
next ¬ srcBit + next;
srcPos ¬ next MOD bpw;
swp ¬ src + (next-srcPos) / bpu;
IF srcRem # 0 THEN LOOP;
};
Simple wraparound of the src tile
swp ¬ src;
srcPos ¬ srcBit;
srcRem ¬ fSizeTile;
ENDLOOP;
};
SELECT arg.dstFunc FROM
null => WithFunction[null];
and => WithFunction[and];
or => WithFunction[or];
ENDCASE => WithFunction[xor];
};
PutWordInline: PROC [dst: WordPtr, sw: WORD, df: DstFunc] = INLINE {
SELECT df FROM
null => dst­ ¬ sw;
and => dst­ ¬ BITAND[dst­, sw];
or => dst­ ¬ BITOR[dst­, sw];
ENDCASE => dst­ ¬ BITXOR[dst­, sw];
};
PutFieldInline: PROC
[dst: WordPtr, dstMod: BitOffset, w: WORD, bits: BitCount, df: DstFunc] = INLINE {
SELECT TRUE FROM
dstMod # 0 => {
The destination is not left-justified
dstLim: NAT = dstMod + bits;
mask: WORD ¬ BITRSHIFT[WORD.LAST, dstMod];
dstW: WORD ¬ dst­;
tw: WORD ¬ BITRSHIFT[w, dstMod];
IF dstLim < bpw THEN mask ¬ mask - BITRSHIFT[mask, bits];
SELECT df FROM
null => {};
and => tw ¬ BITAND[dstW, tw];
or => tw ¬ BITOR[dstW, tw];
ENDCASE => tw ¬ BITXOR[dstW, tw];
dst­ ¬ BITAND[dstW, BITNOT[mask]] + BITAND[tw, mask];
IF dstLim > bpw THEN {
The destination lies in two words, so store the second
mask ¬ BITRSHIFT[WORD.LAST, dstLim MOD bpw];
dstW ¬ (dst+upw)­;
tw ¬ BITLSHIFT[w, bpw-dstMod];
SELECT df FROM
null => {};
and => tw ¬ BITAND[dstW, tw];
or => tw ¬ BITOR[dstW, tw];
ENDCASE => tw ¬ BITXOR[dstW, tw];
(dst+upw)­ ¬ BITAND[tw, BITNOT[mask]] + BITAND[dstW, mask];
};
};
bits # bpw => {
dstMod = 0, but this field is partial
mask: WORD = BITRSHIFT[WORD.LAST, bits];
dstW: WORD = dst­;
tw: WORD ¬ w;
SELECT df FROM
null => {};
and => tw ¬ BITAND[dstW, tw];
or => tw ¬ BITOR[dstW, tw];
ENDCASE => tw ¬ BITXOR[dstW, tw];
dst­ ¬ BITAND[dstW, mask] + BITAND[tw, BITNOT[mask]];
};
ENDCASE =>
easily digested
SELECT df FROM
null => dst­ ¬ w;
and => dst­ ¬ BITAND[dst­, w];
or => dst­ ¬ BITOR[dst­, w];
ENDCASE => dst­ ¬ BITXOR[dst­, w];
};
DumbOp: PUBLIC UNSAFE PROC [arg: Arg] = UNCHECKED {
This is the exceedingly dumb bit at a time routine. It is simple enough that it can be used to check the faster versions.
tileLine: NAT ¬ arg.firstLine;
adjustedFirstBit: NAT ¬ arg.firstBit;
src: WordPtr = LOOPHOLE[arg.srcWord];
dst: BitsPtr = LOOPHOLE[arg.dstWord];
dstBpl: NAT = arg.dstBpl;
dstBit: NAT ¬ arg.dstBit;
srcBpl: NAT = arg.srcBpl;
phase: NAT = arg.phase;
fSize: NAT = arg.fSize;
fSizeTile: NAT = arg.fSizeTile;
sSizeTile: NAT = arg.sSizeTile;
sSize: NAT ¬ arg.sSize;
srcBit: NAT ¬ arg.srcBit+srcBpl*tileLine;
invert: WORD = arg.srcInvert;
df: DstFunc = arg.dstFunc;
IF checking THEN {
Check the argument block for consistency
IF arg.firstBit >= fSizeTile THEN BadAssertion[];
IF arg.phase >= fSizeTile THEN BadAssertion[];
IF arg.firstLine >= arg.sSizeTile THEN BadAssertion[];
};
IF arg.fSize = 0 OR arg.sSize = 0 THEN RETURN;
FOR s: NAT IN [0..arg.sSize) DO
dstLim: NAT = dstBit+fSize;
tileBit: NAT ¬ adjustedFirstBit;
dBit: NAT ¬ dstBit;
DO
sBit: NAT ¬ srcBit+tileBit;
mod: BitOffset ¬ sBit MOD bpw;
sw: WORD ¬ BITXOR[(src+(sBit - mod) / bpu)­, invert];
sb: BIT ¬ BITRSHIFT[BITLSHIFT[sw, mod], bpw - 1];
SELECT df FROM
null => dst[dBit] ¬ sb;
and => dst[dBit] ¬ BITAND[dst[dBit], sb];
or => dst[dBit] ¬ BITOR[dst[dBit], sb];
xor => dst[dBit] ¬ BITXOR[dst[dBit], sb];
ENDCASE => BadAssertion[];
dBit ¬ dBit + 1;
IF dBit = dstLim THEN EXIT;
tileBit ¬ tileBit + 1;
IF tileBit >= fSizeTile THEN tileBit ¬ 0;
ENDLOOP;
dstBit ¬ dstBit + dstBpl;
srcBit ¬ srcBit + srcBpl;
tileLine ¬ tileLine+1;
IF tileLine = sSizeTile THEN {
IF adjustedFirstBit < phase
THEN adjustedFirstBit ¬ adjustedFirstBit + (fSizeTile-phase)
ELSE adjustedFirstBit ¬ adjustedFirstBit - phase;
tileLine ¬ 0;
srcBit ¬ arg.srcBit;
};
ENDLOOP;
};
Funny way to compute Raster Op wordwise
FunnyOp: PROC [dw: WORD, sw: WORD, op: DstFunc] RETURNS [WORD] = {
Benchmark for forming tw (6 ops after mask formation)
nullMask: WORD = IF op = null THEN WORD.LAST ELSE 0;
andMask: WORD = IF op = and THEN WORD.LAST ELSE 0;
orMask: WORD = IF op = or THEN WORD.LAST ELSE 0;
xorMask: WORD = IF op = xor THEN WORD.LAST ELSE 0;
m1: WORD ¬ IF op = and OR op = or THEN WORD.LAST ELSE 0;
andMask+orMask
m2: WORD ¬ IF op # and THEN WORD.LAST ELSE 0;
nullMask+orMask+xorMask
m3: WORD ¬ IF op = or OR op = xor THEN WORD.LAST ELSE 0;
orMask+xorMask
Notes: if sw is known outside of a loop then w1 and w2 can be moved outside of the loop, saving two operations. If dw is known outside of a loop then w3 can be moved outside of the loop, saving one operation (but maybe we can switch this).
w1: WORD = BITAND[sw, m1];
null: zeros, and: sw, or: sw, xor: zeros
w2: WORD = BITAND[sw, m2];
null: sw, and: zeros, or: sw, xor: sw
w3: WORD = BITAND[dw, m3];
null: zeros, and: zeros, or: dw, xor: dw
w4: WORD = BITXOR[w2, w3];
null: sw, and: zeros, or: BITXOR[sw, dw], xor: BITXOR[sw, dw]
w5: WORD = BITAND[dw, w1];
null: zeros, and: BITAND[sw, dw], or: BITAND[sw, dw], xor: zeros
w6: WORD = BITOR[w4, w5];
null: sw, and: BITAND[sw, dw], or: BITOR[sw, dw], xor: BITXOR[sw, dw]
RETURN [w6];
};
FunWithMasks: PROC [dw, sw, m1, m2, m3: WORD] RETURNS [WORD] = INLINE {
RETURN [BITOR[BITXOR[BITAND[sw, m2], BITAND[dw, m3]], BITAND[dw, BITAND[sw, m1]]]];
};
Initialization
mask1Array: ARRAY DstFunc OF WORD = [
null: 0, and: WORD.LAST, or: WORD.LAST, xor: 0];
mask2Array: ARRAY DstFunc OF WORD = [
null: WORD.LAST, and: 0, or: WORD.LAST, xor: WORD.LAST];
mask3Array: ARRAY DstFunc OF WORD = [
null: 0, and: 0, or: WORD.LAST, xor: WORD.LAST];
replicatorMultArray: ReplicatorArray ¬ ALL[0];
replicatorBitsArray: ReplicatorArray ¬ ALL[0];
ReplicatorArray: TYPE = ARRAY [0..bpw] OF WORD;
BadAssertion: PROC = {
ERROR AssertionFailed;
};
AssertionFailed: ERROR = CODE;
InitReplicatorArray: PROC = {
replicatorMultArray[0] ¬ 0;
replicatorBitsArray[0] ¬ 0;
replicatorMultArray[1] ¬ WORD.LAST;
replicatorBitsArray[1] ¬ bpw;
replicatorMultArray[bpw] ¬ 1;
replicatorBitsArray[bpw] ¬ 1;
FOR i: NAT IN [2..bpw) DO
t: WORD ¬ BITRSHIFT[WORD.LAST, i-1]-BITRSHIFT[WORD.LAST, i];
tt: WORD ¬ 0;
WHILE t # 0 DO tt ¬ tt + t; t ¬ BITRSHIFT[t, i]; ENDLOOP;
replicatorMultArray[i] ¬ tt;
replicatorBitsArray[i] ¬ bpw - (bpw MOD i);
ENDLOOP;
};
InitReplicatorArray[];
END.