*-----------------------------------------------------------
Title[Block.mc...November 8, 1982  9:29 AM...Taft];
* Block Transfers (PrincOps, chapter 8), except BITBLT
*-----------------------------------------------------------
%

	CONTENTS, by order of occurence

Word boundary block transfers
	BLT		Block Transfer
	BLTL		Block Transfer Long
	BLTLR		Block Transfer Long Reversed
	BLTC		Block Transfer Code
	BLTCL		Block Transfer Code Long
	CKSUM		Checksum

Block comparisons
	BLEL		Block Equal Long
	BLECL		Block Equal Code Long

Byte boundary block transfers
	BYTBLT		Byte Block Transfer (not implemented)
	BYTBLTR		Byte Block Transfer Reversed (not implemented)

Bit boundary block transfers
	BITBLT		Bit Block Transfer (in BitBlt.mc)
	TXTBLT		Text Block Transfer (not implemented)
%

TopLevel;

*-----------------------------------------------------------
IFUR[BLT, 1, MemBase[BR34]];			* Block Transfer
* DO
*   dest: POINTER ← Pop[]; count: CARDINAL ← Pop[]; source: POINTER ← Pop[];
*   IF count=0 THEN EXIT;
*   StoreMds[dest]↑ ← FetchMds[source]↑;
*   Push[source+1]; Push[count-1]; Push[dest+1];
*   IF InterruptPending[] THEN {PC ← savedPC; EXIT};
*   ENDLOOP;
*-----------------------------------------------------------

* Set up both BR34 and BR35 as copies of MDS
	T← A0, BRHi← MDSHi;
	T← Stack&-2, BRLo← T;
	RTemp0← T, FlipMemBase;		* RTemp0← dest
	T← A0, BRHi← MDSHi;
	BRLo← T, Branch[BLTCommon];


*-----------------------------------------------------------
IFUR[BLTL, 1, MemBase[BR34]];			* Block Transfer Long
* DO
*   dest: LONG POINTER ← PopLong[]; count: CARDINAL ← Pop[];
*   source: LONG POINTER ← PopLong[];
*   IF count=0 THEN EXIT;
*   Store[dest]↑ ← Fetch[source]↑;
*   PushLong[source+1]; Push[count-1]; PushLong[dest+1];
*   IF InterruptPending[] THEN {PC ← savedPC; EXIT};
*   ENDLOOP;
*-----------------------------------------------------------

	BRHi← Stack&-1;			* BR34← dest
	BRLo← Stack&-1;
	T← Stack&-2, FlipMemBase;	* T← count
	BRLo← Stack&+1;			* BR35← source
	BRHi← Stack&+3;
	RTemp0← A0;			* Offsets zero relative to BRs
	RTemp1← A0, Call[BLTSetupTransfer];

* See comments under BLTSetupTransfer for how this loop works.
* At top of loop, StkP addresses high word of dest.
Subroutine;
BLTLLoop:
	StkP-2, CoReturn;
	T← (Stack&-2)-(Q← T),		* T← words done this time, Q← words remaining
		Branch[BLTLDone, ALU=0];
	Stack&+1← (Stack&+1)+T;		* Advance source pointer on stack
	Stack&+1← A← Stack&+1, XorSavedCarry;
	Stack&+1← Q;			* Update count on stack
	Stack&+1← (Stack&+1)+T;		* Advance dest pointer on stack
	Stack← A← Stack, XorSavedCarry, Branch[BLTLLoop];
TopLevel;

BLTLDone:
	StkP-1, NextOpcode;

*-----------------------------------------------------------
NewOp; ESCEntry[BLTLR];			* Block Transfer Long Reversed
* DO
*   dest: LONG POINTER ← PopLong[]; count: CARDINAL ← Pop[];
*   source: LONG POINTER ← PopLong[];
*   IF count=0 THEN EXIT;
*   Store[dest+count-1]↑ ← Fetch[source+count-1]↑;
*   PushLong[source]; Push[count-1]; PushLong[dest];
*   IF InterruptPending[] THEN {PC ← savedPC; EXIT};
*   ENDLOOP;
*-----------------------------------------------------------

* This is implemented in-line and is not optimized for high transfer rate.
* Making the common code handle BLTs in both directions would make BLTSetupTransfer
* a lot more complicated and slow down the other (presumably more common) cases.
	MemBase← BR34, Call[BRHiGetsStackPop]; * BR34← dest
	BRLo← Stack&-1;
	T← (Stack&-1)-1, FlipMemBase,	* T← count-1
		Call[BRHiGetsStackPop];	* BR35← source
	BRLo← Stack&+2;

BLTLRLoop:
	Stack← (Fetch← T)+1, FlipMemBase;
	PD← T, Branch[BLTLRInterrupt, Reschedule];
BLTLRNoInterrupt:
	T← (Store← T)-1, DBuf← MD, FlipMemBase,
		Branch[BLTLRLoop, ALU#0];

	T← MD, StkP-3, NextOpcode;

* Here if an interrupt is (possibly) pending.
* The intermediate state (just the count) is on the stack,
* so it is OK to exit to the interrupt handler at this point.
BLTLRInterrupt:
	T← MD, Call[InterruptLongRunningOpcode];
	T← (Stack)-1, Branch[BLTLRNoInterrupt]; * Interrupt not pending after all

*-----------------------------------------------------------
IFUR[BLTC, 1, MemBase[CB]];		* Block Transfer Code
* DO
*   dest: POINTER ← Pop[]; count: CARDINAL ← Pop[]; source: CARDINAL ← Pop[];
*   IF count=0 THEN EXIT;
*   StoreMds[dest]↑ ← ReadCode[source];
*   Push[source+1]; Push[count-1]; Push[dest+1];
*   IF InterruptPending[] THEN {PC ← savedPC; EXIT};
*   ENDLOOP;
*-----------------------------------------------------------

* We will use MDS and CB as dest and source BRs, since they are an even-odd pair.
	RTemp0← Stack&-2;		* dest
BLTCommon:	* BLT enters here
	RTemp1← Stack&+1;		* source
	T← Stack&+1, Call[BLTSetupTransfer]; * T← count

* See comments under BLTSetupTransfer for how this loop works.
* At top of loop, StkP addresses dest.
Subroutine;
BLTLoop:
	CoReturn;			* Transfer one munch
	Q← RTemp0, Branch[BLTDone, ALU=0];
	Stack&-1← Q;			* Put intermediate state on stack
	Stack&-1← T;
	T← RTemp1;
	Stack&+2← T, Branch[BLTLoop];
TopLevel;

BLTDone:
	StkP-3, NextOpcode;


*-----------------------------------------------------------
IFUR[BLTCL, 1, MemBase[BR34]];			* Block Transfer Code Long
* DO
*   dest: LONG POINTER ← PopLong[]; count: CARDINAL ← Pop[];
*   source: CARDINAL ← Pop[];
*   IF count=0 THEN EXIT;
*   Store[dest]↑ ← ReadCode[source];
*   Push[source+1]; Push[count-1]; PushLong[dest+1];
*   IF InterruptPending[] THEN {PC ← savedPC; EXIT};
*   ENDLOOP;
*-----------------------------------------------------------

* Load dest into BR34 and set up BR35 as copy of CB
	BRHi← Stack&-1;			* BR34← dest
	BRLo← Stack&-2;
	T← Stack&+1, MemBase← CB;	* T← source
	RTemp1← T, DummyRef← 0S;	* RTemp1← source, read CB
	RTemp0← A0, MemBase← BR34,	* RTemp0← dest offset of zero
		Call[BRGetsVA];		* BR34← CB
	T← Stack&+2, Call[BLTSetupTransfer]; * T← count

* See comments under BLTSetupTransfer for how this loop works.
* At top of loop, StkP addresses high word of dest.
Subroutine;
BLTCLLoop:
	StkP-2, CoReturn;
	T← (Stack&-1)-(Q← T),		* T← words done this time, Q← words remaining
		Branch[BLTCLDone, ALU=0];
	Stack&+1← (Stack&+1)+T;		* Advance source pointer on stack
	Stack&+1← Q;			* Update count on stack
	Stack&+1← (Stack&+1)+T;		* Advance dest pointer on stack
	Stack← A← Stack, XorSavedCarry, Branch[BLTCLLoop];
TopLevel;

BLTCLDone:
	StkP-2, NextOpcode;

*-----------------------------------------------------------
BLTSetupTransfer:
* Subroutine to handle the major work of various flavors of Block Transfer.
* Does PreFetches and deals properly with interrupts and page faults.
* Arranged as a coroutine pair with the caller, and coreturns once per munch
* to permit the caller to copy intermediate state back onto the stack.
* Thus, this routine need not know how the BLT arguments are arranged on the stack.
* The code is careful to touch all required memory data before altering
* the intermediate state so as to work correctly in the face of page faults.
* This routine uses a pair of base registers of the caller's choosing for the
* bases of the source and destination blocks; they must be an even-odd pair.
* Calling sequence:
*	<Pop arguments off stack>;
*	<dest BR← destination base pointer>;
*	<source BR← source base pointer>;
*	RTemp0← destination word address (base-relative);
*	RTemp1← source word address (base-relative);
*	T← word count;
*	MemBase← source BR
*	Call[BLTSetupTransfer];		* Returns with MemBase = source BR
* Subroutine;	* -- This is very important --
* Loop:	CoReturn;			* Returns with MemBase = source BR,
					* RTemp0, RTemp1, and T updated,
*	Branch[Done, ALU=0];		* ALU = T = remaining count
*	<Push intermediate state from RTemp0, RTemp1, and T back onto stack>;
*	Branch[Loop];
*-----------------------------------------------------------
Subroutine;

* Initially call here with word count in T.
	RTemp2← T, FlipMemBase;		* Dest BR

* See whether the destination address = source address +1.
* The regular BLT inner loop does not handle that case correctly.
	DummyRef← RTemp0, FlipMemBase, T← MD, * Compute destination VA
		Branch[BLTCountZero, ALU=0]; * Branch if nothing to do
	RTemp3← VALo;
	T← (RTemp1)+1, Q← VAHi;
	DummyRef← T, T← Q;		* Compute source VA+1
	RTemp3← (RTemp3) XOR (VALo);	* Check for resulting VAs equal
	T← T XOR (VAHi);
	RTemp3← (RTemp3) OR T, CoReturn; * RTemp3← 0 iff dest = source+1

* On first coreturn, transfer ((RTemp2-1) mod 20b)+1 words.
	T← (RTemp2)-1;
	PD← RTemp3;			* Recall which way to do the BLT
	T← T AND (17C), Branch[BlkSMunchEntry, ALU=0];

* BLTSetupTransfer (cont'd)

* T = number of words -1 for this transfer; MemBase = source BR.
* Before starting the transfer, touch the last word of the source
* and destination blocks (and store into the destination), to force
* any faults that would occur in mid-transfer to happen now.
* Need not also touch the first word, since a fault on it will
* abort the loop before it has done anything permanent.
BLTMunchEntry:
	T← (RTemp1)+(Q← T), Branch[BLTInterrupt, Reschedule];
BLTNoInterrupt:
	RTemp3← (Fetch← T)+(20C);	* Fetch last source word
	PreFetch← RTemp3, FlipMemBase;
	T← (RTemp0)+Q;
	RTemp3← (Fetch← T)+(20C);	* Fetch last destination word
	PreFetch← RTemp3, RTemp3← MD;
	RTemp2← (RTemp2)-(Cnt← Q)-1;	* Update word count
	Store← T, DBuf← RTemp3, FlipMemBase; *Dirty last destination word
	RTemp1← (Fetch← RTemp1)+1, Branch[BLTMunchExit, Cnt=0&-1];

BLTWordLoop: * Inner loop: 2 instructions per word transferred.
	RTemp1← (Fetch← RTemp1)+1, T← MD, FlipMemBase;
	RTemp0← (Store← RTemp0)+1, DBuf← T, FlipMemBase,
		Branch[BLTWordLoop, Cnt#0&-1];

BLTMunchExit:
	FlipMemBase;			* Dest BR
	RTemp0← (Store← RTemp0)+1, DBuf← MD, FlipMemBase;
	T← RTemp2, CoReturn;
* On each subsequent coreturn, transfer 20b words.
	T← 17C, Branch[BLTMunchEntry];

* Here if an interrupt is (possibly) pending.
* The caller has just finished putting the intermediate state on the stack,
* so it is OK to exit to the interrupt handler at this point.
TopLevel;
BLTInterrupt:
	RTemp3← Link, Call[InterruptLongRunningOpcode];
	Link← RTemp3;
Subroutine;
	T← (RTemp1)+Q, Branch[BLTNoInterrupt]; * Interrupt not pending after all

* BLTSetupTransfer (cont'd)

* Here to handle the "destination = source+1" case.
* This is implemented as a simple replication of the first source word
* throughout the destination block.
* T = number of words -1 for this transfer; MemBase = source BR.
* First, do PreFetches for the next transfer.
* (Need not touch block beforehand, since this case of BLT is idempotent.)
BlkSMunchEntry:
	RTemp1← T← (Fetch← RTemp1)+(Q← T)+1, * Updated source ptr = last dest
		Branch[BlkSInterrupt, Reschedule];
BlkSNoInterrupt:
	RTemp3← T+(20C), T← MD;		* T← source word to be replicated
	PreFetch← RTemp3, FlipMemBase;

	RTemp2← (RTemp2)-(Cnt← Q)-1;	* Update word count

BlkSWordLoop: * Inner loop: 1 instruction per word transferred.
	RTemp0← (Store← RTemp0)+1, DBuf← T, Branch[BlkSWordLoop, Cnt#0&-1];

BlkSMunchExit:
	T← RTemp2, B← MD, FlipMemBase, CoReturn;
* On each subsequent coreturn, transfer 20b words.
	T← 17C, Branch[BlkSMunchEntry];

* Here if word count = 0 on entry.  Force caller to quit immediately.
BLTCountZero:
	T← A0, CoReturn;
	Branch[.-1];

* Here if an interrupt is (possibly) pending.
* The caller has just finished putting the intermediate state on the stack,
* so it is OK to exit to the interrupt handler at this point.
TopLevel;
BlkSInterrupt:
	RTemp3← Link, T← MD, Call[InterruptLongRunningOpcode];
	Link← RTemp3;
Subroutine;
	T← RTemp1, Branch[BlkSNoInterrupt]; * Interrupt not pending after all


*-----------------------------------------------------------
BRGetsVA:
* Subroutine to facilitate copying one base register to another.
* Typical call:
*	MemBase← <BR to be copied from>;
*	DummyRef← 0S;
*	MemBase← <BR to be copied into>, Call[BRgetsVA];
* Clobbers T
*-----------------------------------------------------------
Subroutine;

	T← VAHi;
	BRHi← T;
	T← VALo;
	BRLo← T, Return;

TopLevel;

*-----------------------------------------------------------
NewOp; ESCEntry[CKSUM],				* Checksum
* cksum: CARDINAL;
* DO
*   count: CARDINAL ← Pop[]; source: LONG POINTER ← PopLong[]; cksum ← Pop[];
*   IF count=0 THEN EXIT;
*   Push[Checksum[s, Fetch[p]↑]; PushLong[source+1]; PushLong[count-1];
*   IF InterruptPending[] THEN {PC ← savedPC; EXIT};
*   ENDLOOP;
* Push[cksum];
*-----------------------------------------------------------

	StkP-1;
	RTemp0← A0, MemBase← LPtr, Call[BRHiGetsStackPop]; * LPtr← source
	BRLo← Stack&+2;

* Come here once per munch.  RTemp0 contains LPtr-relative address.
* StkP addresses count (words remaining).
* On the first iteration, checksum ((count-1) mod 20b) +1 words; on subsequent
* iterations, checksum 20b words.  Note that on subsequent iterations,
* c mod 20b = 0, so ((c-1) mod 20b) +1 = 20b.
CSMunch:
	T← (Stack&-3)-1;		* A-1 generates carry iff A#0
	T← T AND (17C), Branch[CSDone, Carry'];

* Touch the first and last words to be checksummed in this block,
* and issue a PreFetch for the next block.  T = word count -1.
	T← (Fetch← RTemp0)+(Q← T), Branch[CSInterrupt, Reschedule];
CSNoInterrupt:
	T← (Fetch← T)+(20C);
	PreFetch← T, T← Stack&+3, Stack&+3← MD; * T← cksum

* All possible faults have happened by this point.
	Stack&-2← (Stack&-2)-(Cnt← Q)-1; * Update word count
	RTemp0← (Fetch← RTemp0)+1;
	Stack&+1← (Stack&+1)+Q+1;	* Update long pointer on stack
	Stack&+1← A← Stack&+1, XorSavedCarry;

* Inner loop: 3 instructions per word.  T = cksum, StkP adresses count.
CSWordLoop:
	T← T+MD, StkP-3, Branch[CSWordExit, Cnt=0&-1];
	RTemp0← (Fetch← RTemp0)+1, Branch[.+2, Carry];
CSAddNoCarry:				* ALU=0 iff came from CSWordExit
	Stack&+3← T← T LCY 1, DblBranch[CSWordLoop, CSMunch, ALU#0];
CSAddCarry:
	Stack&+3← T← (T+1) LCY 1, DblBranch[CSWordLoop, CSMunch, ALU#0];
CSWordExit:
	PD← A0, DblBranch[CSAddCarry, CSAddNoCarry, Carry];

* Here when count=0.  If result is -0, change it to +0.  StkP addresses cksum.
CSDone:
	PD← (Stack)+1;			* Carry iff sum=177777
	Stack← A← Stack, XorSavedCarry, NextOpcode;

CSInterrupt:
	T← MD, Call[InterruptLongRunningOpcode];
	T← (RTemp0)+Q, Branch[CSNoInterrupt];

*-----------------------------------------------------------
NewOp; ESCEntry[BLEL];				* Block Equal Long
* DO
*   ptr1: LONG POINTER ← PopLong[]; count: CARDINAL ← Pop[];
*   ptr2: LONG POINTER ← PopLong[];
*   IF count=0 THEN {Push[TRUE]; EXIT};
*   IF Fetch[ptr1]↑ # Fetch[ptr2]↑ THEN {Push[FALSE]; EXIT};
*   PushLong[ptr2+1]; Push[count-1]; PushLong[ptr1+1];
*   IF InterruptPending[] THEN {PC ← savedPC; EXIT};
*   ENDLOOP;
*-----------------------------------------------------------

	StkP-3, MemBase← BR35, Call[BRHiGetsStackPop]; * BR35← ptr2
	RTemp0← TIOA&StkP;		* StkP for result
	BRLo← Stack&+2, Branch[BLECommon];

*-----------------------------------------------------------
NewOp; ESCEntry[BLECL];				* Block Equal Code Long
* DO
*   ptr: LONG POINTER ← PopLong[]; count: CARDINAL ← Pop[];
*   offset: CARDINAL ← Pop[];
*   IF count=0 THEN {Push[TRUE]; EXIT};
*   IF Fetch[ptr]↑ # ReadCode[offset] THEN {Push[FALSE]; EXIT};
*   Push[offset+1]; Push[count-1]; PushLong[ptr+1];
*   IF InterruptPending[] THEN {PC ← savedPC; EXIT};
*   ENDLOOP;
*-----------------------------------------------------------

	StkP-3, MemBase← CB;		* Set up BR35 as copy of CB
	DummyRef← Stack, T← MD;
	RTemp0← TIOA&StkP;		* StkP for result
	StkP+1, MemBase← BR35, Call[BRGetsVA];
BLECommon:
	T← (Stack&+2)-1, MemBase← BR34,	* T← count-1
		Call[BRHiGetsStackPop];	* BR34← ptr1
	PD← A0, BRLo← Stack&-1,
		Branch[BLEDone, Carry']; * Branch if count was initially zero

* The block equal loop is not optimized for high speed,
* since in normal use the blocks being compared are typically quite small.
* This implementation does the compare in descending order of addresses
* so as not to need to increment the pointers.
BLELoop:
	Stack← (Fetch← T)+1, FlipMemBase, Branch[BLERetFalse, ALU#0];
	T← (Fetch← T)-1, RTemp1← MD, FlipMemBase,
		Branch[BLEInterrupt, Reschedule];
BLENoInterrupt:
	PD← (RTemp1)#MD, Branch[BLELoop, Carry];

BLEDone:
	StkP← RTemp0, Branch[.+2, ALU=0]; * Set StkP for result
	Stack← A0, NextOpcode;		* Return false
	Stack← 1C, NextOpcode;		* Return true

BLERetFalse:
	PD← T-T-1, Branch[BLEDone];

* Here if an interrupt is (possibly) pending.
* The intermediate state (just the count) is on the stack,
* so it is OK to exit to the interrupt handler at this point.
BLEInterrupt:
	T← MD, Call[InterruptLongRunningOpcode];
	T← (Stack)-1, Branch[BLENoInterrupt]; * Interrupt not pending after all


*-----------------------------------------------------------
* Unimplemented opcodes
*-----------------------------------------------------------

	ESCOpcodeUnimpl[BYTBLT];		* Byte Block Transfer
	ESCOpcodeUnimpl[BYTBLTR];		* Byte Block Transfer Reversed
	ESCOpcodeUnimpl[TXTBLT];		* Text Block Transfer