RasterOpForwardImpl.meta
Copyright Ó 1988, 1989, 1990, 1991, 1992 by Xerox Corporation. All rights reserved.
Michael Plass, October 1, 1991 4:37 pm PDT
Russ Atkinson (RRA) March 1, 1990 4:03:19 pm PST
Willie-s, June 13, 1991 4:28 pm PDT
DIRECTORY Basics, RasterBasics, RasterOp;
RasterOpForwardImpl: PROGRAM
IMPORTS Basics
EXPORTS RasterOp
~ BEGIN OPEN Basics;
bpw: CARDINAL ~ BITS[WORD];
BitOff: TYPE = [0..bpw);
BitCount: TYPE = [0..bpw];
DstFunc: TYPE ~ RasterBasics.DstFunc;
SrcFunc: TYPE ~ RasterBasics.SrcFunc;
BitAddress: TYPE ~ RasterBasics.BitAddress;
RawWords: TYPE ~ Basics.RawWords;
RawWordsPtr: TYPE ~ LONG POINTER TO RawWords;
WordPtr: TYPE = LONG POINTER TO WORD;
Procs
WordsForBits: PROC [bits: CARDINAL] RETURNS [CARDINAL] ~ INLINE {
RETURN [CARDINAL[bits+(bpw-1)]/bpw]
};
WordFloorUnitsForBits: PROC [bits: CARDINAL] RETURNS [CARDINAL] ~ INLINE {
RETURN [(CARDINAL[bits]/bpw)*CARDINAL[UNITS[WORD]]]
};
CombineUnderMask: PROC [mask, onesSrc, zerosSrc: WORD] RETURNS [WORD] ~ INLINE {
RETURN [BITXOR[BITAND[BITXOR[onesSrc, zerosSrc], mask], zerosSrc]]
};
rightJustifiedOnes: ARRAY BitCount OF WORD ~ InitRightJustifiedOnes[];
InitRightJustifiedOnes: PROC RETURNS [a: ARRAY BitCount OF WORD] ~ {
m: WORD ¬ 0;
FOR n: CARDINAL IN BitCount DO
a[n] ¬ m;
m ¬ m+m+1;
ENDLOOP;
};
RightJustifiedZeros: PROC [n: BitCount] RETURNS [WORD] ~ INLINE {
RETURN [BITNOT[rightJustifiedOnes[n]]]
};
Forward0: PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ~ {
MF: PROC [d, s, mask: WORD] RETURNS [WORD] ~ INLINE {
RETURN [
BITXOR[BITAND[BITXOR[ s, d], mask], d]
]
};
DoUniformLines: PROC [count: CARDINAL, dstPtr: WordPtr, dstBit: BitOff, srcPtr: WordPtr, srcBit: BitOff] ~ {
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
sRast: CARD ~ WordFloorUnitsForBits[srcBpl];
ndw: CARDINAL ~ WordsForBits[dstBit + fSize];
number of destination words per (this) line
lMask: WORD ~ rightJustifiedOnes[bpw-dstBit];
mask for the leftmost dest word (ones where bits are to go)
rMask: WORD ~ RightJustifiedZeros[
(LOOPHOLE[bpw-dstBit-fSize, CARDINAL]) MOD bpw];
mask for the rightmost dest word
lSA: BitOff ~ (LOOPHOLE[srcBit-dstBit, CARDINAL]) MOD bpw; -- left shift amount
w: WORD ¬ 0; -- source word, aligned with destination
dstLine: RawWordsPtr ¬ NIL; -- destination line word address
srcLine: RawWordsPtr ¬ NIL; -- source line word address
Inner0: PROC = INLINE {
Aligned case is simpler
fetchLastWord: BOOL ~ TRUE;
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
w ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the word at the given offset, no pointer change
w ¬ srcLine[wordOffset];
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ srcLine[wordOffset];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ w;
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ w;
FetchNextOff[2];
dstLine[2] ¬ w;
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ w;
FetchNextOff[1];
dstLine[1] ¬ w;
FetchNextOff[2];
dstLine[2] ¬ w;
FetchNextOff[3];
dstLine[3] ¬ w;
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ w;
FetchNextOff[1];
dstLine[1] ¬ w;
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ w;
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
Inner1: PROC = INLINE {
hi: WORD; -- left unshifted source word
lo: WORD ¬ 0; -- right unshifted source word
rSA: BitOff = bpw - lSA;
amount to shift source words right to line them up
nsw: CARDINAL = WordsForBits[srcBit + fSize];
fetchLastWord: BOOL = IF srcBit >= dstBit THEN (nsw>ndw) ELSE (nsw>=ndw);
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
hi ¬ lo;
lo ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the next word at the given offset, no pointer change
hi ¬ lo;
lo ¬ srcLine[wordOffset];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ BITLSHIFT[lo, lSA];
IF fetch THEN w ¬ w + BITRSHIFT[srcLine[wordOffset], rSA];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
IF srcBit >= dstBit THEN FetchNext[];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ w;
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ w;
FetchNextOff[2];
dstLine[2] ¬ w;
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ w;
FetchNextOff[1];
dstLine[1] ¬ w;
FetchNextOff[2];
dstLine[2] ¬ w;
FetchNextOff[3];
dstLine[3] ¬ w;
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ w;
FetchNextOff[1];
dstLine[1] ¬ w;
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ w;
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
IF lSA = 0 THEN { Inner0[] } ELSE { Inner1[] };
};
IF BITOR[sSize, fSize] # 0 THEN {
IF dstBpl MOD bpw = 0 AND srcBpl MOD bpw = 0
THEN {
don't need to do whole setup for each line
DoUniformLines[sSize, dst.word, dst.bit, src.word, src.bit];
}
ELSE {
need setup for every line
DO
DoUniformLines[1, dst.word, dst.bit, src.word, src.bit];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
src.word ¬ src.word + WordFloorUnitsForBits[(src.bit+srcBpl)];
src.bit ¬ CARDINAL[(src.bit+srcBpl)] MOD bpw;
ENDLOOP;
};
};
};
Forward1: PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ~ {
MF: PROC [d, s, mask: WORD] RETURNS [WORD] ~ INLINE {
RETURN [
BITXOR[BITAND[BITXOR[BITNOT[ s], d], mask], d]
]
};
DoUniformLines: PROC [count: CARDINAL, dstPtr: WordPtr, dstBit: BitOff, srcPtr: WordPtr, srcBit: BitOff] ~ {
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
sRast: CARD ~ WordFloorUnitsForBits[srcBpl];
ndw: CARDINAL ~ WordsForBits[dstBit + fSize];
number of destination words per (this) line
lMask: WORD ~ rightJustifiedOnes[bpw-dstBit];
mask for the leftmost dest word (ones where bits are to go)
rMask: WORD ~ RightJustifiedZeros[
(LOOPHOLE[bpw-dstBit-fSize, CARDINAL]) MOD bpw];
mask for the rightmost dest word
lSA: BitOff ~ (LOOPHOLE[srcBit-dstBit, CARDINAL]) MOD bpw; -- left shift amount
w: WORD ¬ 0; -- source word, aligned with destination
dstLine: RawWordsPtr ¬ NIL; -- destination line word address
srcLine: RawWordsPtr ¬ NIL; -- source line word address
Inner0: PROC = INLINE {
Aligned case is simpler
fetchLastWord: BOOL ~ TRUE;
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
w ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the word at the given offset, no pointer change
w ¬ srcLine[wordOffset];
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ srcLine[wordOffset];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITNOT[ w];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITNOT[ w];
FetchNextOff[2];
dstLine[2] ¬ BITNOT[ w];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITNOT[ w];
FetchNextOff[1];
dstLine[1] ¬ BITNOT[ w];
FetchNextOff[2];
dstLine[2] ¬ BITNOT[ w];
FetchNextOff[3];
dstLine[3] ¬ BITNOT[ w];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITNOT[ w];
FetchNextOff[1];
dstLine[1] ¬ BITNOT[ w];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITNOT[ w];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
Inner1: PROC = INLINE {
hi: WORD; -- left unshifted source word
lo: WORD ¬ 0; -- right unshifted source word
rSA: BitOff = bpw - lSA;
amount to shift source words right to line them up
nsw: CARDINAL = WordsForBits[srcBit + fSize];
fetchLastWord: BOOL = IF srcBit >= dstBit THEN (nsw>ndw) ELSE (nsw>=ndw);
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
hi ¬ lo;
lo ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the next word at the given offset, no pointer change
hi ¬ lo;
lo ¬ srcLine[wordOffset];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ BITLSHIFT[lo, lSA];
IF fetch THEN w ¬ w + BITRSHIFT[srcLine[wordOffset], rSA];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
IF srcBit >= dstBit THEN FetchNext[];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITNOT[ w];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITNOT[ w];
FetchNextOff[2];
dstLine[2] ¬ BITNOT[ w];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITNOT[ w];
FetchNextOff[1];
dstLine[1] ¬ BITNOT[ w];
FetchNextOff[2];
dstLine[2] ¬ BITNOT[ w];
FetchNextOff[3];
dstLine[3] ¬ BITNOT[ w];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITNOT[ w];
FetchNextOff[1];
dstLine[1] ¬ BITNOT[ w];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITNOT[ w];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
IF lSA = 0 THEN { Inner0[] } ELSE { Inner1[] };
};
IF BITOR[sSize, fSize] # 0 THEN {
IF dstBpl MOD bpw = 0 AND srcBpl MOD bpw = 0
THEN {
don't need to do whole setup for each line
DoUniformLines[sSize, dst.word, dst.bit, src.word, src.bit];
}
ELSE {
need setup for every line
DO
DoUniformLines[1, dst.word, dst.bit, src.word, src.bit];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
src.word ¬ src.word + WordFloorUnitsForBits[(src.bit+srcBpl)];
src.bit ¬ CARDINAL[(src.bit+srcBpl)] MOD bpw;
ENDLOOP;
};
};
};
Forward2: PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ~ {
MF: PROC [d, s, mask: WORD] RETURNS [WORD] ~ INLINE {
RETURN [
BITXOR[BITAND[BITXOR[BITAND[d, s], d], mask], d]
]
};
DoUniformLines: PROC [count: CARDINAL, dstPtr: WordPtr, dstBit: BitOff, srcPtr: WordPtr, srcBit: BitOff] ~ {
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
sRast: CARD ~ WordFloorUnitsForBits[srcBpl];
ndw: CARDINAL ~ WordsForBits[dstBit + fSize];
number of destination words per (this) line
lMask: WORD ~ rightJustifiedOnes[bpw-dstBit];
mask for the leftmost dest word (ones where bits are to go)
rMask: WORD ~ RightJustifiedZeros[
(LOOPHOLE[bpw-dstBit-fSize, CARDINAL]) MOD bpw];
mask for the rightmost dest word
lSA: BitOff ~ (LOOPHOLE[srcBit-dstBit, CARDINAL]) MOD bpw; -- left shift amount
w: WORD ¬ 0; -- source word, aligned with destination
dstLine: RawWordsPtr ¬ NIL; -- destination line word address
srcLine: RawWordsPtr ¬ NIL; -- source line word address
Inner0: PROC = INLINE {
Aligned case is simpler
fetchLastWord: BOOL ~ TRUE;
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
w ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the word at the given offset, no pointer change
w ¬ srcLine[wordOffset];
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ srcLine[wordOffset];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], w];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITAND[dstLine[2], w];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITAND[dstLine[2], w];
FetchNextOff[3];
dstLine[3] ¬ BITAND[dstLine[3], w];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], w];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], w];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
Inner1: PROC = INLINE {
hi: WORD; -- left unshifted source word
lo: WORD ¬ 0; -- right unshifted source word
rSA: BitOff = bpw - lSA;
amount to shift source words right to line them up
nsw: CARDINAL = WordsForBits[srcBit + fSize];
fetchLastWord: BOOL = IF srcBit >= dstBit THEN (nsw>ndw) ELSE (nsw>=ndw);
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
hi ¬ lo;
lo ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the next word at the given offset, no pointer change
hi ¬ lo;
lo ¬ srcLine[wordOffset];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ BITLSHIFT[lo, lSA];
IF fetch THEN w ¬ w + BITRSHIFT[srcLine[wordOffset], rSA];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
IF srcBit >= dstBit THEN FetchNext[];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], w];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITAND[dstLine[2], w];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITAND[dstLine[2], w];
FetchNextOff[3];
dstLine[3] ¬ BITAND[dstLine[3], w];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], w];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], w];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
IF lSA = 0 THEN { Inner0[] } ELSE { Inner1[] };
};
IF BITOR[sSize, fSize] # 0 THEN {
IF dstBpl MOD bpw = 0 AND srcBpl MOD bpw = 0
THEN {
don't need to do whole setup for each line
DoUniformLines[sSize, dst.word, dst.bit, src.word, src.bit];
}
ELSE {
need setup for every line
DO
DoUniformLines[1, dst.word, dst.bit, src.word, src.bit];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
src.word ¬ src.word + WordFloorUnitsForBits[(src.bit+srcBpl)];
src.bit ¬ CARDINAL[(src.bit+srcBpl)] MOD bpw;
ENDLOOP;
};
};
};
Forward3: PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ~ {
MF: PROC [d, s, mask: WORD] RETURNS [WORD] ~ INLINE {
RETURN [
BITXOR[BITAND[BITXOR[BITAND[d, BITNOT[ s]], d], mask], d]
]
};
DoUniformLines: PROC [count: CARDINAL, dstPtr: WordPtr, dstBit: BitOff, srcPtr: WordPtr, srcBit: BitOff] ~ {
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
sRast: CARD ~ WordFloorUnitsForBits[srcBpl];
ndw: CARDINAL ~ WordsForBits[dstBit + fSize];
number of destination words per (this) line
lMask: WORD ~ rightJustifiedOnes[bpw-dstBit];
mask for the leftmost dest word (ones where bits are to go)
rMask: WORD ~ RightJustifiedZeros[
(LOOPHOLE[bpw-dstBit-fSize, CARDINAL]) MOD bpw];
mask for the rightmost dest word
lSA: BitOff ~ (LOOPHOLE[srcBit-dstBit, CARDINAL]) MOD bpw; -- left shift amount
w: WORD ¬ 0; -- source word, aligned with destination
dstLine: RawWordsPtr ¬ NIL; -- destination line word address
srcLine: RawWordsPtr ¬ NIL; -- source line word address
Inner0: PROC = INLINE {
Aligned case is simpler
fetchLastWord: BOOL ~ TRUE;
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
w ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the word at the given offset, no pointer change
w ¬ srcLine[wordOffset];
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ srcLine[wordOffset];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], BITNOT[ w]];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITAND[dstLine[2], BITNOT[ w]];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITAND[dstLine[2], BITNOT[ w]];
FetchNextOff[3];
dstLine[3] ¬ BITAND[dstLine[3], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
Inner1: PROC = INLINE {
hi: WORD; -- left unshifted source word
lo: WORD ¬ 0; -- right unshifted source word
rSA: BitOff = bpw - lSA;
amount to shift source words right to line them up
nsw: CARDINAL = WordsForBits[srcBit + fSize];
fetchLastWord: BOOL = IF srcBit >= dstBit THEN (nsw>ndw) ELSE (nsw>=ndw);
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
hi ¬ lo;
lo ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the next word at the given offset, no pointer change
hi ¬ lo;
lo ¬ srcLine[wordOffset];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ BITLSHIFT[lo, lSA];
IF fetch THEN w ¬ w + BITRSHIFT[srcLine[wordOffset], rSA];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
IF srcBit >= dstBit THEN FetchNext[];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], BITNOT[ w]];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITAND[dstLine[2], BITNOT[ w]];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITAND[dstLine[2], BITNOT[ w]];
FetchNextOff[3];
dstLine[3] ¬ BITAND[dstLine[3], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITAND[dstLine[1], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITAND[dstLine[0], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
IF lSA = 0 THEN { Inner0[] } ELSE { Inner1[] };
};
IF BITOR[sSize, fSize] # 0 THEN {
IF dstBpl MOD bpw = 0 AND srcBpl MOD bpw = 0
THEN {
don't need to do whole setup for each line
DoUniformLines[sSize, dst.word, dst.bit, src.word, src.bit];
}
ELSE {
need setup for every line
DO
DoUniformLines[1, dst.word, dst.bit, src.word, src.bit];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
src.word ¬ src.word + WordFloorUnitsForBits[(src.bit+srcBpl)];
src.bit ¬ CARDINAL[(src.bit+srcBpl)] MOD bpw;
ENDLOOP;
};
};
};
Forward4: PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ~ {
MF: PROC [d, s, mask: WORD] RETURNS [WORD] ~ INLINE {
RETURN [
BITOR[d, BITAND[mask, s]]
]
};
DoUniformLines: PROC [count: CARDINAL, dstPtr: WordPtr, dstBit: BitOff, srcPtr: WordPtr, srcBit: BitOff] ~ {
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
sRast: CARD ~ WordFloorUnitsForBits[srcBpl];
ndw: CARDINAL ~ WordsForBits[dstBit + fSize];
number of destination words per (this) line
lMask: WORD ~ rightJustifiedOnes[bpw-dstBit];
mask for the leftmost dest word (ones where bits are to go)
rMask: WORD ~ RightJustifiedZeros[
(LOOPHOLE[bpw-dstBit-fSize, CARDINAL]) MOD bpw];
mask for the rightmost dest word
lSA: BitOff ~ (LOOPHOLE[srcBit-dstBit, CARDINAL]) MOD bpw; -- left shift amount
w: WORD ¬ 0; -- source word, aligned with destination
dstLine: RawWordsPtr ¬ NIL; -- destination line word address
srcLine: RawWordsPtr ¬ NIL; -- source line word address
Inner0: PROC = INLINE {
Aligned case is simpler
fetchLastWord: BOOL ~ TRUE;
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
w ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the word at the given offset, no pointer change
w ¬ srcLine[wordOffset];
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ srcLine[wordOffset];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], w];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITOR[dstLine[2], w];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITOR[dstLine[2], w];
FetchNextOff[3];
dstLine[3] ¬ BITOR[dstLine[3], w];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], w];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], w];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
Inner1: PROC = INLINE {
hi: WORD; -- left unshifted source word
lo: WORD ¬ 0; -- right unshifted source word
rSA: BitOff = bpw - lSA;
amount to shift source words right to line them up
nsw: CARDINAL = WordsForBits[srcBit + fSize];
fetchLastWord: BOOL = IF srcBit >= dstBit THEN (nsw>ndw) ELSE (nsw>=ndw);
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
hi ¬ lo;
lo ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the next word at the given offset, no pointer change
hi ¬ lo;
lo ¬ srcLine[wordOffset];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ BITLSHIFT[lo, lSA];
IF fetch THEN w ¬ w + BITRSHIFT[srcLine[wordOffset], rSA];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
IF srcBit >= dstBit THEN FetchNext[];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], w];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITOR[dstLine[2], w];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITOR[dstLine[2], w];
FetchNextOff[3];
dstLine[3] ¬ BITOR[dstLine[3], w];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], w];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], w];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
IF lSA = 0 THEN { Inner0[] } ELSE { Inner1[] };
};
IF BITOR[sSize, fSize] # 0 THEN {
IF dstBpl MOD bpw = 0 AND srcBpl MOD bpw = 0
THEN {
don't need to do whole setup for each line
DoUniformLines[sSize, dst.word, dst.bit, src.word, src.bit];
}
ELSE {
need setup for every line
DO
DoUniformLines[1, dst.word, dst.bit, src.word, src.bit];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
src.word ¬ src.word + WordFloorUnitsForBits[(src.bit+srcBpl)];
src.bit ¬ CARDINAL[(src.bit+srcBpl)] MOD bpw;
ENDLOOP;
};
};
};
Forward5: PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ~ {
MF: PROC [d, s, mask: WORD] RETURNS [WORD] ~ INLINE {
RETURN [
BITOR[d, BITAND[mask, BITNOT[s]]]
]
};
DoUniformLines: PROC [count: CARDINAL, dstPtr: WordPtr, dstBit: BitOff, srcPtr: WordPtr, srcBit: BitOff] ~ {
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
sRast: CARD ~ WordFloorUnitsForBits[srcBpl];
ndw: CARDINAL ~ WordsForBits[dstBit + fSize];
number of destination words per (this) line
lMask: WORD ~ rightJustifiedOnes[bpw-dstBit];
mask for the leftmost dest word (ones where bits are to go)
rMask: WORD ~ RightJustifiedZeros[
(LOOPHOLE[bpw-dstBit-fSize, CARDINAL]) MOD bpw];
mask for the rightmost dest word
lSA: BitOff ~ (LOOPHOLE[srcBit-dstBit, CARDINAL]) MOD bpw; -- left shift amount
w: WORD ¬ 0; -- source word, aligned with destination
dstLine: RawWordsPtr ¬ NIL; -- destination line word address
srcLine: RawWordsPtr ¬ NIL; -- source line word address
Inner0: PROC = INLINE {
Aligned case is simpler
fetchLastWord: BOOL ~ TRUE;
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
w ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the word at the given offset, no pointer change
w ¬ srcLine[wordOffset];
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ srcLine[wordOffset];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], BITNOT[ w]];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITOR[dstLine[2], BITNOT[ w]];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITOR[dstLine[2], BITNOT[ w]];
FetchNextOff[3];
dstLine[3] ¬ BITOR[dstLine[3], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
Inner1: PROC = INLINE {
hi: WORD; -- left unshifted source word
lo: WORD ¬ 0; -- right unshifted source word
rSA: BitOff = bpw - lSA;
amount to shift source words right to line them up
nsw: CARDINAL = WordsForBits[srcBit + fSize];
fetchLastWord: BOOL = IF srcBit >= dstBit THEN (nsw>ndw) ELSE (nsw>=ndw);
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
hi ¬ lo;
lo ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the next word at the given offset, no pointer change
hi ¬ lo;
lo ¬ srcLine[wordOffset];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ BITLSHIFT[lo, lSA];
IF fetch THEN w ¬ w + BITRSHIFT[srcLine[wordOffset], rSA];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
IF srcBit >= dstBit THEN FetchNext[];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], BITNOT[ w]];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITOR[dstLine[2], BITNOT[ w]];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITOR[dstLine[2], BITNOT[ w]];
FetchNextOff[3];
dstLine[3] ¬ BITOR[dstLine[3], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITOR[dstLine[1], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITOR[dstLine[0], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
IF lSA = 0 THEN { Inner0[] } ELSE { Inner1[] };
};
IF BITOR[sSize, fSize] # 0 THEN {
IF dstBpl MOD bpw = 0 AND srcBpl MOD bpw = 0
THEN {
don't need to do whole setup for each line
DoUniformLines[sSize, dst.word, dst.bit, src.word, src.bit];
}
ELSE {
need setup for every line
DO
DoUniformLines[1, dst.word, dst.bit, src.word, src.bit];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
src.word ¬ src.word + WordFloorUnitsForBits[(src.bit+srcBpl)];
src.bit ¬ CARDINAL[(src.bit+srcBpl)] MOD bpw;
ENDLOOP;
};
};
};
Forward6: PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ~ {
MF: PROC [d, s, mask: WORD] RETURNS [WORD] ~ INLINE {
RETURN [
BITXOR[BITAND[BITXOR[BITXOR[d, s], d], mask], d]
]
};
DoUniformLines: PROC [count: CARDINAL, dstPtr: WordPtr, dstBit: BitOff, srcPtr: WordPtr, srcBit: BitOff] ~ {
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
sRast: CARD ~ WordFloorUnitsForBits[srcBpl];
ndw: CARDINAL ~ WordsForBits[dstBit + fSize];
number of destination words per (this) line
lMask: WORD ~ rightJustifiedOnes[bpw-dstBit];
mask for the leftmost dest word (ones where bits are to go)
rMask: WORD ~ RightJustifiedZeros[
(LOOPHOLE[bpw-dstBit-fSize, CARDINAL]) MOD bpw];
mask for the rightmost dest word
lSA: BitOff ~ (LOOPHOLE[srcBit-dstBit, CARDINAL]) MOD bpw; -- left shift amount
w: WORD ¬ 0; -- source word, aligned with destination
dstLine: RawWordsPtr ¬ NIL; -- destination line word address
srcLine: RawWordsPtr ¬ NIL; -- source line word address
Inner0: PROC = INLINE {
Aligned case is simpler
fetchLastWord: BOOL ~ TRUE;
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
w ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the word at the given offset, no pointer change
w ¬ srcLine[wordOffset];
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ srcLine[wordOffset];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], w];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITXOR[dstLine[2], w];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITXOR[dstLine[2], w];
FetchNextOff[3];
dstLine[3] ¬ BITXOR[dstLine[3], w];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], w];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], w];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
Inner1: PROC = INLINE {
hi: WORD; -- left unshifted source word
lo: WORD ¬ 0; -- right unshifted source word
rSA: BitOff = bpw - lSA;
amount to shift source words right to line them up
nsw: CARDINAL = WordsForBits[srcBit + fSize];
fetchLastWord: BOOL = IF srcBit >= dstBit THEN (nsw>ndw) ELSE (nsw>=ndw);
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
hi ¬ lo;
lo ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the next word at the given offset, no pointer change
hi ¬ lo;
lo ¬ srcLine[wordOffset];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ BITLSHIFT[lo, lSA];
IF fetch THEN w ¬ w + BITRSHIFT[srcLine[wordOffset], rSA];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
IF srcBit >= dstBit THEN FetchNext[];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], w];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITXOR[dstLine[2], w];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], w];
FetchNextOff[2];
dstLine[2] ¬ BITXOR[dstLine[2], w];
FetchNextOff[3];
dstLine[3] ¬ BITXOR[dstLine[3], w];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], w];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], w];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], w];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
IF lSA = 0 THEN { Inner0[] } ELSE { Inner1[] };
};
IF BITOR[sSize, fSize] # 0 THEN {
IF dstBpl MOD bpw = 0 AND srcBpl MOD bpw = 0
THEN {
don't need to do whole setup for each line
DoUniformLines[sSize, dst.word, dst.bit, src.word, src.bit];
}
ELSE {
need setup for every line
DO
DoUniformLines[1, dst.word, dst.bit, src.word, src.bit];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
src.word ¬ src.word + WordFloorUnitsForBits[(src.bit+srcBpl)];
src.bit ¬ CARDINAL[(src.bit+srcBpl)] MOD bpw;
ENDLOOP;
};
};
};
Forward7: PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ~ {
MF: PROC [d, s, mask: WORD] RETURNS [WORD] ~ INLINE {
RETURN [
BITXOR[BITAND[BITXOR[BITXOR[d, BITNOT[ s]], d], mask], d]
]
};
DoUniformLines: PROC [count: CARDINAL, dstPtr: WordPtr, dstBit: BitOff, srcPtr: WordPtr, srcBit: BitOff] ~ {
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
sRast: CARD ~ WordFloorUnitsForBits[srcBpl];
ndw: CARDINAL ~ WordsForBits[dstBit + fSize];
number of destination words per (this) line
lMask: WORD ~ rightJustifiedOnes[bpw-dstBit];
mask for the leftmost dest word (ones where bits are to go)
rMask: WORD ~ RightJustifiedZeros[
(LOOPHOLE[bpw-dstBit-fSize, CARDINAL]) MOD bpw];
mask for the rightmost dest word
lSA: BitOff ~ (LOOPHOLE[srcBit-dstBit, CARDINAL]) MOD bpw; -- left shift amount
w: WORD ¬ 0; -- source word, aligned with destination
dstLine: RawWordsPtr ¬ NIL; -- destination line word address
srcLine: RawWordsPtr ¬ NIL; -- source line word address
Inner0: PROC = INLINE {
Aligned case is simpler
fetchLastWord: BOOL ~ TRUE;
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
w ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the word at the given offset, no pointer change
w ¬ srcLine[wordOffset];
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ srcLine[wordOffset];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], BITNOT[ w]];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITXOR[dstLine[2], BITNOT[ w]];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITXOR[dstLine[2], BITNOT[ w]];
FetchNextOff[3];
dstLine[3] ¬ BITXOR[dstLine[3], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
Inner1: PROC = INLINE {
hi: WORD; -- left unshifted source word
lo: WORD ¬ 0; -- right unshifted source word
rSA: BitOff = bpw - lSA;
amount to shift source words right to line them up
nsw: CARDINAL = WordsForBits[srcBit + fSize];
fetchLastWord: BOOL = IF srcBit >= dstBit THEN (nsw>ndw) ELSE (nsw>=ndw);
true if last source word needs to be fetched
FetchNext: PROC ~ INLINE {
fetches the next aligned source bits, and bumps source pointer
hi ¬ lo;
lo ¬ srcLine[0];
srcLine ¬ srcLine+SIZE[WORD];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchNextOff: PROC [wordOffset: CARDINAL] ~ INLINE {
fetches the next word at the given offset, no pointer change
hi ¬ lo;
lo ¬ srcLine[wordOffset];
w ¬ BITLSHIFT[hi, lSA]+BITRSHIFT[lo, rSA]
};
FetchLast: PROC [wordOffset: CARDINAL, fetch: BOOL] ~ INLINE {
fetches the final source bits on a line, avoiding a spurious fetch
w ¬ BITLSHIFT[lo, lSA];
IF fetch THEN w ¬ w + BITRSHIFT[srcLine[wordOffset], rSA];
};
BBLineSetup: PROC ~ INLINE {
dstLine ¬ LOOPHOLE[dstPtr];
srcLine ¬ LOOPHOLE[srcPtr];
IF srcBit >= dstBit THEN FetchNext[];
};
SELECT ndw FROM
1 => {
Special encoding of one destination word case for speed
bothMask: WORD ~ BITAND[lMask, rMask];
IF fetchLastWord
THEN
DO
BBLineSetup[];
FetchLast[0, TRUE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
ELSE
DO
BBLineSetup[];
FetchLast[0, FALSE];
dstLine[0] ¬ MF[dstLine[0], w, bothMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP
};
2 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchLast[1, fetchLastWord];
dstLine[1] ¬ MF[dstLine[1], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
3 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], BITNOT[ w]];
FetchLast[2, fetchLastWord];
dstLine[2] ¬ MF[dstLine[2], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
4 => {
DO
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITXOR[dstLine[2], BITNOT[ w]];
FetchLast[3, fetchLastWord];
dstLine[3] ¬ MF[dstLine[3], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
ENDCASE => {
checkUnroll: [2..256] = 4;
DO
nw: CARDINAL ¬ LOOPHOLE[ndw-2, CARDINAL];
BBLineSetup[];
FetchNextOff[0];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
WHILE nw >= 4 DO
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], BITNOT[ w]];
FetchNextOff[2];
dstLine[2] ¬ BITXOR[dstLine[2], BITNOT[ w]];
FetchNextOff[3];
dstLine[3] ¬ BITXOR[dstLine[3], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*4;
srcLine ¬ srcLine+SIZE[WORD]*4;
nw ¬ nw - 4;
ENDLOOP;
IF nw >= 2 THEN {
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], BITNOT[ w]];
FetchNextOff[1];
dstLine[1] ¬ BITXOR[dstLine[1], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD]*2;
srcLine ¬ srcLine+SIZE[WORD]*2;
nw ¬ nw - 2;
};
IF nw = 1 THEN {
One trailing word
FetchNextOff[0];
dstLine[0] ¬ BITXOR[dstLine[0], BITNOT[ w]];
dstLine ¬ dstLine+SIZE[WORD];
srcLine ¬ srcLine+SIZE[WORD];
};
FetchLast[0, fetchLastWord];
dstLine[0] ¬ MF[dstLine[0], w, rMask];
IF count <= 1 THEN EXIT;
count ¬ count - 1;
dstPtr ¬ dstPtr + dRast;
srcPtr ¬ srcPtr + sRast;
ENDLOOP;
};
};
IF lSA = 0 THEN { Inner0[] } ELSE { Inner1[] };
};
IF BITOR[sSize, fSize] # 0 THEN {
IF dstBpl MOD bpw = 0 AND srcBpl MOD bpw = 0
THEN {
don't need to do whole setup for each line
DoUniformLines[sSize, dst.word, dst.bit, src.word, src.bit];
}
ELSE {
need setup for every line
DO
DoUniformLines[1, dst.word, dst.bit, src.word, src.bit];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
src.word ¬ src.word + WordFloorUnitsForBits[(src.bit+srcBpl)];
src.bit ¬ CARDINAL[(src.bit+srcBpl)] MOD bpw;
ENDLOOP;
};
};
};
forwardOp: PUBLIC ARRAY DstFunc OF ARRAY SrcFunc OF PROC [dst: BitAddress, src: BitAddress, dstBpl, srcBpl, sSize, fSize: CARDINAL] ¬ [[Forward0, Forward1], [Forward2, Forward3], [Forward4, Forward5], [Forward6, Forward7]];
END.