Tile&f:
PROC [dst: BitAddress, src:
LONG
POINTER
TO RawWords, dstBpl, src0, sSizeTile, sSize, fSize:
CARDINAL] ~ {
dstLine: LONG POINTER TO ARRAY [0..&unroll) OF WORD; -- destination line word address
ndw: CARDINAL ¬ 0; -- number of destination words per (this) line
lMask: WORD; -- mask for the leftmost dest word (ones where bits are to go)
rMask: WORD; -- mask for the rightmost dest word
srcIndex: CARDINAL ¬ src0;
SrcFetch:
PROC
RETURNS [
WORD] ~
INLINE {
RETURN[
SELECT f
MOD 2
FROM
0 => {src[srcIndex]};
1 => {BITNOT[src[srcIndex]]};
ENDCASE => ERROR;
]};
F:
PROC [
IF usesd
THEN {
d,
};
s:
WORD]
RETURNS [
WORD] ~
INLINE {
RETURN[
SELECT f
FROM
0, 1 => {s};
2, 3 => {BITAND[d, s]};
4, 5 => {BITOR[d, s]};
6, 7 => {BITXOR[d, s]};
ENDCASE => ERROR;
]};
MF:
PROC [d, s, mask:
WORD]
RETURNS [
WORD] ~
INLINE {
RETURN [
IF f/2 = 2
THEN {
BITOR[d, BITAND[mask, s]]
}
ELSE
{
BITXOR[BITAND[BITXOR[F[IF usesd THEN {d, };s], d], mask], d]
};
]
};
LineSetup:
PROC ~
INLINE {
ndw ¬ WordsForBits[dst.bit + fSize];
lMask ¬ rightJustifiedOnes[bpw-dst.bit];
rMask ¬ RightJustifiedZeros[(LOOPHOLE[bpw-dst.bit-fSize, CARDINAL]) MOD bpw];
};
BBLine1:
PROC ~
INLINE {
-- only one destination word
dstLine[0] ¬ MF[dstLine[0], SrcFetch[], BITAND[lMask, rMask]];
};
FOR u:
INT
IN [2..unroll]
DO
BBLine&u:
PROC ~
INLINE {
-- only &u destination words
w: WORD ~ SrcFetch[];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
FOR j:
INT
IN [1..u-1)
DO
dstLine[&j] ¬ F[IF usesd THEN {dstLine[&j], };w];
ENDLOOP;
dstLine[&u-1] ¬ MF[dstLine[&u-1], w, rMask];
};
ENDLOOP;
BBLineN:
PROC ~
INLINE {
-- many destination words
w: WORD ~ SrcFetch[];
dstLine[0] ¬ MF[dstLine[0], w, lMask];
dstLine ¬ dstLine+SIZE[WORD];
THROUGH [0..
CARDINAL[ndw-2] / &unroll)
DO
FOR j:
INT
IN [0..unroll)
DO
dstLine[&j] ¬ F[IF usesd THEN {dstLine[&j], };w];
ENDLOOP;
dstLine ¬ dstLine+SIZE[ARRAY [0..&unroll) OF WORD];
ENDLOOP;
THROUGH [0..
CARDINAL[ndw-2]
MOD &unroll)
DO
dstLine[0] ¬ F[IF usesd THEN {dstLine[0], };w];
dstLine ¬ dstLine+SIZE[WORD];
ENDLOOP;
dstLine[0] ¬ MF[dstLine[0], w, rMask];
};
BBLine: PROC ~ INLINE { IF ndw = 1 THEN BBLine1[] ELSE BBLineN[] };
IF
CARDINAL[dstBpl]
MOD bpw = 0
THEN {
-- don't need to do whole setup for each line
dRast: CARD ~ WordFloorUnitsForBits[dstBpl];
LineSetup[];
SELECT ndw
FROM
FOR u:
INT
IN [1..unroll]
DO
&u => {
DO
dstLine ¬ LOOPHOLE[dst.word];
BBLine&u[];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + dRast;
srcIndex ¬ srcIndex + 1;
IF srcIndex = sSizeTile THEN srcIndex ¬ 0;
ENDLOOP;
};
ENDLOOP;
ENDCASE => {
DO
dstLine ¬ LOOPHOLE[dst.word];
BBLine[];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + dRast;
srcIndex ¬ srcIndex + 1;
IF srcIndex = sSizeTile THEN srcIndex ¬ 0;
ENDLOOP;
};
}
ELSE {
-- need setup for every line
DO
LineSetup[];
dstLine ¬ LOOPHOLE[dst.word];
BBLine[];
IF (sSize ¬ sSize - 1) = 0 THEN EXIT;
dst.word ¬ dst.word + WordFloorUnitsForBits[(dst.bit+dstBpl)];
dst.bit ¬ CARDINAL[(dst.bit+dstBpl)] MOD bpw;
srcIndex ¬ srcIndex + 1;
IF srcIndex = sSizeTile THEN srcIndex ¬ 0;
ENDLOOP;
};
};