-- File: LaurelByteBlt.mesa
-- Edited by Brotz, December 9, 1981 10:04 PM

-- Derived from
-- File: AltoMesaByteBlt.mesa, Last Edit: Taft December 2, 1981 4:05 PM

DIRECTORY
ByteBltDefs,
Stream USING [Block],
BitBltDefs USING [BBptr, BBTable, BITBLT],
Inline USING [BITAND, COPY];

LaurelByteBlt: PROGRAM
IMPORTS BitBltDefs, Inline
EXPORTS ByteBltDefs =
BEGIN

StartIndexGreaterThanStopIndexPlusOne: PUBLIC ERROR = CODE;

-- This implementation purportedly handles all cases of overlapping blocks.

ByteBlt: PUBLIC PROCEDURE [to, from: Stream.Block] RETURNS [nBytes: CARDINAL] =
BEGIN
toBase, fromBase: ORDERED POINTER TO PACKED ARRAY [0..0) OF [0..377B);
toByte, fromByte: [0..1];
nWords: CARDINAL;
nBytesLocal: CARDINAL; -- use for local purposes, since nBytes is returned.
-- This check is necessary since subtracting CARDINALs gives big numbers
IF to.startIndex>to.stopIndexPlusOne
OR from.startIndex>from.stopIndexPlusOne THEN
ERROR StartIndexGreaterThanStopIndexPlusOne;
nBytes ← MIN[to.stopIndexPlusOne-to.startIndex, from.stopIndexPlusOne-from.startIndex];
IF nBytes=0 THEN RETURN;
nBytesLocal ← nBytes;

-- Slow byte copy loop --
-- toBase ← LOOPHOLE[ShortenPointer[to.blockPointer]];
-- fromBase ← LOOPHOLE[ShortenPointer[from.blockPointer]];
-- IF toBase > fromBase THEN
-- FOR i: CARDINAL DECREASING IN [0 .. nBytesLocal) DO
-- toBase[to.startIndex + i] ← fromBase[from.startIndex + i]
-- ENDLOOP
-- ELSE
-- FOR i: CARDINAL IN [0 .. nBytesLocal) DO
-- toBase[to.startIndex + i] ← fromBase[from.startIndex + i]
-- ENDLOOP;
-- RETURN;
-- Beginning of old code.

-- adjust bases to reduce byte offsets to [0..1]
toBase ← LOOPHOLE[ShortenPointer[to.blockPointer], ORDERED POINTER] + to.startIndex/2;
fromBase ← LOOPHOLE[ShortenPointer[from.blockPointer], ORDERED POINTER] + from.startIndex/2;
toByte ← to.startIndex MOD 2;
fromByte ← from.startIndex MOD 2;
nWords ← nBytesLocal/2;
IF toByte=fromByte AND toBase NOT IN (fromBase .. fromBase+nWords] THEN
-- fast case: bytes are in phase, and either there is no overlap or
-- the block is being slid "down" in memory.
BEGIN
-- move the first odd byte (if any) to be sure that blocks are word aligned
IF toByte#0 THEN
BEGIN
toBase[toByte] ← fromBase[fromByte];
nBytesLocal ← nBytesLocal-1;
nWords ← nBytesLocal/2;
toBase ← toBase+1; -- know we just moved the odd byte of both words
fromBase ← fromBase+1;
END;
Inline.COPY[to: toBase, from: fromBase, nwords: nWords ];
IF (nBytesLocal MOD 2)#0 THEN
-- move the one and only remaining byte
toBase[nBytesLocal - 1] ← fromBase[nBytesLocal - 1];
END
ELSE
-- slow case: bytes are out of phase, or there is an overlap and the block
-- is being slid "up" in memory. Do it with BITBLT.
BEGIN
lineWidth: CARDINAL = 16; -- words per scan line: controls interrupt latency.
-- BITBLT is not interruptable except at the end of each scan line, so we
-- break things up into chunks in order to maintain reasonable interrupt latency
-- for the IO devices. It takes about 200microsec to move 50 bytes with the
-- display off.
bba: ARRAY [0..SIZE[BitBltDefs.BBTable]] OF WORD; -- NB: [...] gets us one extra
bbt: BitBltDefs.BBptr ← Inline.BITAND[BASE[bba]+1, 177776B]; -- even word
lines, tail: CARDINAL;
direction: {forward, backward} = -- forward means left-to-right, top-to-bottom
IF toBase<fromBase OR (toBase=fromBase AND toByte<fromByte)
THEN forward ELSE backward;
-- The main BITBLT moves a "rectangle" that is lineWidth words wide by as many
-- lines high as will fit. It cheats and actually references data beyond the
-- right-hand edge of the raster (that is, the "rectangle" may extend
-- beyond the right-hand edge of the "bitmap" for the source, destination, or
-- both blocks). This is not really legal, but we force it to work properly by
-- taking advantage of intimate knowledge about how the Alto BITBLT instruction
-- chooses the direction in which to work.
-- Precisely: if we are to move "down" in memory, force BITBLT to work
-- left-to-right and top-to-bottom in the "rectangle" by forcing dlx<slx and
-- dty<sty (and adjusting the base addresses appropriately); if we are to move
-- "up", do the reverse. This is because BITBLT makes decisions based entirely
-- on comparing dlx, slx, dty, and sty, without regard to the base addresses.
lines ← nBytesLocal/(2*lineWidth);
bbt↑ ← [
pad: 0, sourcealt: FALSE, destalt: FALSE,
sourcetype: block, function: replace, unused: ,
dbca: toBase, dbmr: lineWidth, dlx: 8*toByte, dty: 0, dw: , dh: ,
sbca: fromBase, sbmr: lineWidth, slx: 8*fromByte, sty: 0,
gray0: , gray1: , gray2: , gray3: ];
IF direction=forward
THEN { bbt.slx ← bbt.slx+16; bbt.sty ← 1; bbt.sbca ← bbt.sbca-(lineWidth+1) }
ELSE { bbt.dlx ← bbt.dlx+16; bbt.dty ← 1; bbt.dbca ← bbt.dbca-(lineWidth+1) };
-- The tail BITBLT moves a leftover line that is less than lineWidth words wide.
-- It must be done after the main BITBLT if direction=forward, before if backward.
-- Note: BITBLT is happy to be a slow no-op if asked to transfer zero bits.
tail ← nBytesLocal MOD (2*lineWidth); -- bytes left to move with second BITBLT
IF direction=backward THEN
BEGIN
bbt.dty ← bbt.dty+lines;
bbt.sty ← bbt.sty+lines;
bbt.dw ← 8*tail;
bbt.dh ← 1;
BitBltDefs.BITBLT[bbt];
bbt.dty ← bbt.dty-lines;
bbt.sty ← bbt.sty-lines;
END;
bbt.dw ← 16*lineWidth;
bbt.dh ← lines;
BitBltDefs.BITBLT[bbt];
IF direction=forward THEN
BEGIN
bbt.dty ← bbt.dty+lines;
bbt.sty ← bbt.sty+lines;
bbt.dw ← 8*tail;
bbt.dh ← 1;
BitBltDefs.BITBLT[bbt];
END;
END;
END; -- of ByteBlt --

HyperSpaceNotSupported: PUBLIC ERROR = CODE;
NilRejected: PUBLIC ERROR = CODE;

ShortenPointer: PUBLIC PROCEDURE [lp: LONG POINTER] RETURNS [sp: POINTER] =
BEGIN
LongPointer: TYPE = RECORD [p: POINTER, other: WORD];
myCopy: LongPointer ← LOOPHOLE[lp];
IF myCopy.other#0 THEN ERROR HyperSpaceNotSupported;
IF lp=NIL THEN ERROR NilRejected;
sp ← myCopy.p;
END; -- of ShortenPointer --


-- initialization
END. -- of LaurelByteBlt --