{File name:  Rotate.mc
Description: 90-degree rotate opcode
Author: JPM
Created: October 3, 1986
Last Revised:
JPM	18-Dec-86  9:29:19	move reg defs to Daybreak.dfn
JPM	17-Dec-86 15:50:45	remove @ROTATEGRAY; add comments
JPM	 4-Dec-86 12:33:03	fix bug in dst map page cross/page fault code
JPM	 3-Nov-86 12:32:57	add dummy entry for @ROTATEGRAY
JPM	16-Oct-86 17:02:15	change opcode name and offset for new defs
JPM	 8-Oct-86 10:36:58	cache src words in U-reg block (so don't have to map twice)}

{ 	Copyright (C) 1986 by Xerox Corporation.  All rights reserved.}

{This minimal-stack opcode takes 6 arguments and returns an empty stack. Its arguments are:
	rVirtSrc, rhVirtSrc -- a two-word long pointer to the data source
	wplSrc -- words per source line (may be negative)
	rVirtDst, rhVirtDst -- a two-word long pointer to the data destination
	wplDst -- words per destination line (may be negative)
Each pointer is assumed to point to the first word (if wpl > 0) or last word (if wpl < 0) of a 16-word vertical block of memory. The source bits are rotated by 90 degrees and stored in the destination. There are four possible results, depending on the signs of the wpl terms:
	wplSrc > 0, wplDst > 0 -- clockwise rotation
	wplSrc > 0, wplDst < 0 -- mirrored around diagonal from top right to bottom left
	wplSrc < 0, wplDst > 0 -- mirrored around diagonal from top left to bottom right
	wplSrc < 0, wplDst < 0 -- counterclockwise rotation
Link register use is as follows:
	L0 - return from map fixups (either back to loop, or to end of loop)
	L1 - indicates sign of wplDst (0 if positive, 2 if negative)
	L2 - indicates sign of wplSrc (0 if positive, 2 if negative)
}

@ROTATEBITS:
{load arguments from stack into registers; wplDst = TOS already}
	[] ← wplDst, NegBr, rhDstVirt ← STK, pop,			c1, at[0D,10,ESCAn];
	rDstVirt ← STK, pop, L1 ← 0, BRANCH[$,RotWplDstNeg],		c2;
	wplSrc ← STK, pop, GOTO[RotCheckWplSrc],			c3;
RotWplDstNeg:
	wplSrc ← STK, pop, L1 ← 2,					c3;

RotCheckWplSrc:
	[] ← wplSrc, NegBr, rhSrcVirt ← STK, pop,			c1;
	rSrcVirt ← STK, pop, L2 ← 0, BRANCH[$,RotWplSrcNeg],		c2;
	pop, GOTO[RotCheckDstStart],					c3;
RotWplSrcNeg:
	L2 ← 2, pop,							c3;

{check map flags for all 16 destination words; side effect -- move dst pointer to opposite end of block}
RotCheckDstStart:
	Map ← [rhDstVirt, rDstVirt], wordCount ← 1, push,		c1;
	rDstVirt ← rDstVirt + wplDst, CarryBr, L1Disp, push,		c2;
	rDstReal ← MD, XWtOKDisp, pCall0, DISP4[RotCDL],		c3;

RotCheckDstLoop:
	Map ← [rhDstVirt, rDstVirt], wordCount ← wordCount + 1, NibCarryBr, BRANCH[RotDstMapFix,RotDstMapOK,0D], c1, at[0,10,RotCDL];
	Map ← [rhDstVirt, rDstVirt], wordCount ← wordCount + 1, NibCarryBr, BRANCH[RotDstMapFix,RotDstMapOK,0D], c1, at[3,10,RotCDL];
RotDstMapOK:
	rDstVirt ← rDstVirt + wplDst, CarryBr, L1Disp, BRANCH[$,RotCheckDstDone], c2;
	rDstReal ← MD, XWtOKDisp, pCall0, DISP4[RotCDL],		c3;

{wplDst positive, carry: add to high addr}
	Q ← rhDstVirt + 1, LOOPHOLE[byteTiming], BRANCH[RotDstUHMapFix,RotDstUpdateHigh,0D], c1, at[1,10,RotCDL];
{wplDst negative, no carry: subtract from high addr}
	Q ← rhDstVirt - 1, LOOPHOLE[byteTiming], BRANCH[RotDstUHMapFix,RotDstUpdateHigh,0D], c1, at[2,10,RotCDL];
RotDstUpdateHigh:
	rhDstVirt ← Q LRot0,						c2;
	Xbus ← 2, XDisp, GOTO[RotCheckDstLoop],				c3;

RotDstMapFix:
	rDstVirt ← rDstVirt - wplDst, CANCELBR[RotDstMapExamine,1],	c2;
RotDstUHMapFix:
	rDstVirt ← rDstVirt - wplDst, GOTO[RotDstMapExamine],		c2;
RotDstMapLastFix:
	rDstVirt ← rDstVirt - wplDst, L0 ← 7,				c2;
RotDstMapExamine:
	Xbus ← rDstReal LRot0, XwdDisp,					c3;

	Map ← [rhDstVirt, rDstVirt], wordCount ← wordCount - 1, DISP2[RotDMW], c1;
	MDR ← rDstReal or map.rd, L0Disp, GOTO[RotDstMapFixed],		c2, at[0,4,RotDMW];
	T ← qWriteProtect, push, GOTO[RotDstMapFault],			c2, at[1,4,RotDMW];
	MDR ← rDstReal or map.rd, L0Disp, GOTO[RotDstMapFixed],		c2, at[2,4,RotDMW];
	T ← qPageFault, push, GOTO[RotDstMapFault],			c2, at[3,4,RotDMW];
RotDstMapFixed:
	rDstVirt ← rDstVirt + wplDst, Xbus ← 2, XDisp, DISP4[RotCDL],	c3;

RotCheckDstDone:
	rhDstReal ← rDstReal ← MD, XWtOKDisp, CANCELBR[$,7],		c3;

{read all 16 source words into U-register bank E, checking flags}
{NOTE: use temporary U-register bank instead of stack regs so that, if page fault occurs,
  the stack doesn't have to be restored}
RotReadSrcStart:
	Map ← [rhSrcVirt, rSrcVirt], BRANCH[RotDstMapLastFix,$,0D],	c1, at[7,10,RotCDL];
	wordCount ← 0F, L0 ← 0,						c2;
	rhSrcReal ← rSrcReal ← MD, XRefBr, GOTO[RotReadSrcCont1],	c3;

RotReadSrcLoop:
	Map ← [rhSrcVirt, rSrcVirt], GOTO[RotReadSrcCont],		c1, at[0,4,RotRS];
	Map ← [rhSrcVirt, rSrcVirt],					c1, at[3,4,RotRS];
RotReadSrcCont:
	Ybus ← wordCount, AltUaddr, wordCount ← wordCount - 1, ZeroBr,	c2;
	uyBlockE ← srcData, rhSrcReal ← rSrcReal ← MD, XRefBr, BRANCH[$,RotReadSrcLast], c3;

RotReadSrcCont1:
	MAR ← [rhSrcReal, rSrcVirt + 0], BRANCH[RotSrcMapFix,$],	c1;
	rSrcVirt ← rSrcVirt + wplSrc, CarryBr, L2Disp,			c2;
	srcData ← MD, DISP2[RotRS],					c3;

{wplSrc positive, carry: add to high addr}
	Q ← rhSrcVirt + 1, LOOPHOLE[byteTiming], CANCELBR[RotSrcUpdateHigh,1], c1, at[1,4,RotRS];
{wplSrc negative, no carry: subtract from high addr}
	Q ← rhSrcVirt - 1, LOOPHOLE[byteTiming], CANCELBR[RotSrcUpdateHigh,1], c1, at[2,4,RotRS];
RotSrcUpdateHigh:
	rhSrcVirt ← Q LRot0,						c2;
	GOTO[RotReadSrcLoop],						c3;

RotSrcMapFix:
	GOTO[RotSrcMapExamine],						c2;
RotSrcMapLastFix:
	L0 ← 1,								c2;
RotSrcMapExamine:
	Xbus ← rSrcReal LRot0, XwdDisp,					c3;

	Map ← [rhSrcVirt, rSrcVirt], DISP2[RotSMR],			c1;
	MDR ← rSrcReal or map.referenced, L0Disp, GOTO[RotSrcMapFixed],	c2, at[0,4,RotSMR];
	MDR ← rSrcReal or map.referenced, L0Disp, GOTO[RotSrcMapFixed],	c2, at[1,4,RotSMR];
	MDR ← rSrcReal or map.referenced, L0Disp, GOTO[RotSrcMapFixed],	c2, at[2,4,RotSMR];
	T ← qPageFault, push, GOTO[RotSrcMapFault],			c2, at[3,4,RotSMR];
RotSrcMapFixed:
	Xbus ← 1, XDisp, BRANCH[RotReadSrcCont1,RotReadSrcLast],	c3;

RotReadSrcLast:
	MAR ← [rhSrcReal, rSrcVirt + 0], BRANCH[RotSrcMapLastFix,$],	c1;
	stackP ← 0,							c2;
	srcData ← MD,							c3;

	uBlockE ← srcData,						c1;
	bitCount ← 0F,							c2;
	Temp ← DRShift1 uBlockEF,					c3;

{move U-reg block E into stack, assembling first destination word}
{NOTE: OK to use stack now, because all pages are mapped (and this is minimal-stack opcode)}
RotMoveSrcLoop:
	STK ← Temp, push,						c1;
	Ybus ← bitCount ← bitCount - 1, AltUaddr, ZeroBr,		c2;
	Temp ← DRShift1 uyBlockE, BRANCH[RotMoveSrcLoop,RotMoveSrcDone], c3;

RotMoveSrcDone:
	STK ← Temp, Temp ← ~Q {note: DRShift1 inverts shifted bit},	c1;
	rDstVirt ← rDstVirt - wplDst,					c2;
	stackP ← 0, GOTO[RotWriteDstCont1] {skip map, since first real addr is cached}, c3;

{write destination word}
RotWriteDstLoop:
	Map ← [rhDstVirt, rDstVirt], GOTO[RotWriteDstCont],		c1, at[1,4,RotWDL];
	Map ← [rhDstVirt, rDstVirt],					c1, at[2,4,RotWDL];
RotWriteDstCont:
	stackP ← 0,							c2;
	rhDstReal ← rDstReal ← MD,					c3;

RotWriteDstCont1:
	MAR ← [rhDstReal, rDstVirt + 0],				c1;
	MDR ← Temp, wordCount ← wordCount + 1, NibCarryBr,		c2;
	bitCount ← 1, BRANCH[$,Bank1NxtInstc1], {exit here when done}	c3;

{shift bits from stack words to create destination word in Q}
{the following 2-cycle loop is executed 15 1/2 times and thus takes the branch at cycle 1}
RotShiftBitLoop:
	Temp ← DRShift1 STK, BRANCH[$,RotShiftBitEnd],			c*;
	STK ← Temp, push, bitCount ← bitCount + 1, NibCarryBr, GOTO[RotShiftBitLoop], c*;
RotShiftBitEnd:
	rDstVirt ← rDstVirt - wplDst, CarryBr, L1Disp,			c2;
	STK ← Temp, Temp ← ~Q {note: DRShift1 inverts shifted bit}, DISP2[RotWDL], c3;

{-wplDst negative, no carry: subtract from high addr}
	Q ← rhDstVirt - 1, LOOPHOLE[byteTiming], GOTO[RotWDUpdateHigh],	c1, at[0,4,RotWDL];
{-wplDst positive, carry: add to high addr}
	Q ← rhDstVirt + 1, LOOPHOLE[byteTiming],			c1, at[3,4,RotWDL];
RotWDUpdateHigh:
	rhDstVirt ← Q LRot0,						c2;
	GOTO[RotWriteDstLoop],						c3;

{fault conditions: store virtual address, restore stack depth}

RotDstMapFault:
	uFaultParm0 ← rDstVirt, push,					c3;

	Q ← rhDstVirt, push, GOTO[RotMapFault],				c1;

RotSrcMapFault:
	uFaultParm0 ← rSrcVirt, push,					c3;

	Q ← rhSrcVirt, push,						c1;
RotMapFault:
	uFaultParm1 ← Q, push, GOTO[Bank1Fault],			c2;