[Indigo]<Dragon>Lizard>LizardHeartImpl.mesa!56

LizardHeartImpl.mesa

Russ Atkinson (RRA) April 1, 1986 5:13:25 pm PST

McCreight, January 30, 1986 4:35:51 pm PST

DIRECTORY

Basics USING [BITAND, CARD],

DragOpsCross USING [Byte, bytesPerWord, EUStackSize, FieldDescriptor, FourBytes, IFUOverflow, IFUStackSize, IFUStatusRec, Inst, KernalLimit, OnesWord, ProcessorRegister, TrapIndex, Word, ZerosWord],

DragOpsCrossUtils USING [BytePCToWordAddress, ByteToCard, BytesToHalf, BytesToWord, CardToWord, DoubleWordShiftLeft, DragAnd, DragNot, DragOr, DragXor, HalfToCard, IntToWord, SingleWordShiftLeft, SingleWordShiftRight, TrapIndexToBytePC, VanillaAdd, VanillaSub, WordAddressToBytePC, WordToBytes, WordToCard, WordToInt],

LizardHeart USING [ALUHelper, ALUOps, CacheBase, ChangeLogger, Control, InstBuffer, InstBufferRep, LizardIFUStackIndex, LizardIFUStackSize, NoFault, Processor, ProcessorRep, RegChangeProc, RegToWord, StackPlus, TrapPC, WordToReg],

LizardLiver USING [Execute],

PrincOpsUtils USING [LongCopy],

Rope USING [ROPE];

LizardHeartImpl: CEDAR PROGRAM

IMPORTS Basics, DragOpsCrossUtils, LizardHeart, LizardLiver, PrincOpsUtils

EXPORTS LizardHeart

= BEGIN OPEN DragOpsCross, DragOpsCrossUtils, LizardHeart;

CARD: TYPE = Basics.CARD;

CurrentIFUVersion: [0..255] ← 1;

The current version # of the IFU, stored in status.version

InstBufferIndex: TYPE = [0..64);

max # of words allowed

WordsInBuffer: NAT ← 8;

# of Dragon words in the instruction buffer (power of two)

MaxMask: CARDINAL ← WordsInBuffer*bytesPerWord - 1;

Mask for valid bytes in the buffer

MaskInstBufferIndex: PROC [index: CARDINAL] RETURNS [InstBufferIndex] = INLINE {

RETURN [LOOPHOLE[Basics.BITAND[index, MaxMask]]];

};

NewProcessor: PUBLIC PROC [ifuCache,euCache: CacheBase, logger: ChangeLogger] RETURNS [p: Processor] = {

initStatus: IFUStatusRec ← [version: CurrentIFUVersion, rescheduleKeep: TRUE];

p ← NEW[ProcessorRep];

p.ifuCache ← ifuCache;

p.euCache ← euCache;

p.logger ← logger;

p.eldest ← 1;

p.youngest ← 1;

p.stackEntries ← 0;

p.regs[ifuPC] ← TrapPC[ResetTrap];

p.regsGood[euConstant] ← TRUE; -- constant register 0 = 0 (it's a ROM)

p.regsGood[ifuStatus] ← TRUE;

p.regs[ifuStatus] ← LOOPHOLE[initStatus];

p.instBuffer ← NEW[InstBufferRep[WordsInBuffer]];

};

FlushInstBuffer: PUBLIC PROC [processor: Processor] = {

This routine flushes the instruction buffer. This occurs on a bad jump prediction, a trap, or an xop.

instBuffer: InstBuffer = processor.instBuffer;

delta: INT = WordToInt[instBuffer.nextPC] - WordToInt[instBuffer.basePC];

IF delta > 0 THEN {

validBytes: INT = instBuffer.validWords*bytesPerWord;

IF validBytes > delta THEN

instBuffer.bytesDiscarded ← instBuffer.bytesDiscarded + validBytes-delta;

};

instBuffer.validWords ← 0;

instBuffer.forcedEmpty ← instBuffer.forcedEmpty + 1;

};

InstructionExecute: PUBLIC PROC [processor: Processor] = {

thisPC: Word ← processor.regs[ifuPC];

newPC: Word ← thisPC;

rtnPC: Word;

control: Control ← nextInst;

inst: Inst;

rest: Word;

oldStatus: IFUStatusRec =LOOPHOLE[processor.regs[ifuStatus]];

status: IFUStatusRec ← oldStatus;

regL: Word ← processor.regs[ifuL];

cycles: CARDINAL ← 0;

rCycles: CARDINAL ← 0;

initCycles: INT ← processor.stats.cycles;

youngest: CARDINAL ← processor.youngest;

nBytes: CARDINAL ← 0;

word: Word;

wordAddr: Word;

rbi: [0..bytesPerWord);

tx: TrapIndex ← NoFault;

valid: InstBufferIndex ← 0; -- valid bytes in inst buffer AFTER newPC

p: Processor = processor; -- for a short name

instBuffer: InstBuffer ← p.instBuffer;

instBufferPtr: LONG POINTER;

max: CARDINAL = instBuffer.max * bytesPerWord; -- max bytes in buffer

ForceBufferEmpty: PROC = INLINE {

ForceBufferEmpty forces the instruction buffer to be empty. It is a faster local version of FlushInstBuffer.

instBuffer.validWords ← 0;

instBuffer.forcedEmpty ← instBuffer.forcedEmpty + 1;

IF valid # 0 THEN {

instBuffer.bytesDiscarded ← instBuffer.bytesDiscarded + valid;

valid ← 0;

};

IsEUStackOverflow: PROC [regS: ProcessorRegister] RETURNS [BOOL] = INLINE {

RETURN [ LOOPHOLE[LOOPHOLE[regS, CARDINAL] -LOOPHOLE[WordToReg[processor.regs[ifuSLimit]], CARDINAL]+EUStackSize, CARDINAL] MOD EUStackSize IN [0..16) ];

};

CauseTrap: PROC [code: TrapIndex] = {

All traps push the status onto the EU stack, and set the status to have traps disabled and be in kernel mode. If an IFU stack overflow would result, then that has precedence over anything else. If an EU stack overflow would result from the push, then that is ignored.

This routine was cribbed from LizardHeartImpl and simplified.

regS: ProcessorRegister ← StackPlus[WordToReg[p.regs[ifuS]], 1];

oldStatus: Word = LOOPHOLE[status];

rtnPC ← thisPC;

IF WillPushOverflow[p] THEN

IFUStackOverflow has precedence over other traps

code ← IFUStackOverflowTrap;

newPC ← TrapPC[code];

control ← doAbort;

p.regs[ifuS] ← RegToWord[regS];

IF p.logger # NIL THEN {

regChange: RegChangeProc ← p.logger.regChange;

IF regChange # NIL THEN regChange[p.logger.data, p, regS, p.regs[regS], oldStatus];

};

p.regs[regS] ← oldStatus;

status.trapsEnabled ← FALSE;

status.userMode ← FALSE;

p.regs[ifuStatus] ← LOOPHOLE[status];

cycles ← cycles + 4; -- a rough guess

tx ← code;

IF code = IFUStackOverflowTrap THEN p.stats.stackOver ← p.stats.stackOver + 1;

};

FlushInstWord: PROC = INLINE {

Flushes one word from the inst buffer, shifting the words down so that instBuffer[0] corresponds to inst.basePC. We also update instBuffer.basePC forward one word, and instBuffer.validWords backwards one word.

vw: CARDINAL ← instBuffer.validWords - 1;

instBuffer.basePC ← IntToWord[WordToInt[instBuffer.basePC] + bytesPerWord];

instBuffer.validWords ← instBuffer.validWords - 1;

IF vw # 0 THEN TRUSTED {

PrincOpsUtils.LongCopy[from: @instBuffer[1], nwords: instBuffer.validWords*SIZE[Word], to: @instBuffer[0]];

};

instBuffer.validWords ← vw;

};

IF p.resetRequested THEN {

Force the machine to its reset state. The state we force it to here is highly speculative.

status ← [version: CurrentIFUVersion, rescheduleKeep: TRUE];

p.regs[ifuStatus] ← LOOPHOLE[status];

thisPC ← newPC ← TrapPC[ResetTrap];

p.youngest ← youngest ← 0;

p.eldest ← 1;

p.stackEntries ← 0;

p.resetRequested ← FALSE;

p.regs[ifuL] ← CardToWord[1];

p.regs[ifuS] ← ZerosWord;

p.euCarry ← FALSE;

ForceBufferEmpty[];

p.instBuffer ← NEW[InstBufferRep[WordsInBuffer]];

p.instBuffer.forcedEmpty ← instBuffer.forcedEmpty;

instBuffer ← p.instBuffer;

p.stats ← [];

p.ifuCache.stats ← [];

p.euCache.stats ← [];

initCycles ← cycles ← 0;

};

TRUSTED {

instBufferPtr ← @instBuffer[0];

};

IF newPC # instBuffer.nextPC

THEN ForceBufferEmpty[]

If the PC is not the next one in the sequence then flush the instruction buffer

ELSE {

Compute the number of valid bytes in the buffer.

used: INT ← WordToInt[newPC] - WordToInt[instBuffer.basePC];

valid ← instBuffer.validWords*bytesPerWord;

IF used > 0 AND used < valid THEN valid ← valid - used ELSE valid ← 0;

};

{

Fetch the next instruction byte in the instruction buffer, causing a refill if the instruction buffer is empty.

start: InstBufferIndex ← 0; -- byte index in buffer of newPC

Between instructions we have the following priority:

EUStackOverflowTrap > RescheduleTrap > IFUPageFaultTrap

IF status.trapsEnabled THEN

SELECT TRUE FROM

IsEUStackOverflow[WordToReg[p.regs[ifuS]]] => {

There was an EU stack overflow caused by the previous instruction. This may be because a previous SIP or RETK instruction enabled traps. Should this ever really happen?

tx ← EUStackOverflowTrap;

GO TO trapEarly;

};

status.reschedule => {

Take a reschedule trap.

tx ← RescheduleTrap;

GO TO trapEarly;

};

ENDCASE;

IF valid = 0 THEN {

need to fill the inst buffer from scratch

[wordAddr, rbi] ← BytePCToWordAddress[[newPC]];

[word, tx, rCycles] ← p.ifuCache.fetch[p.ifuCache, wordAddr, initCycles+cycles];

cycles ← cycles + 1;

The cache takes a cycle to come back with the answer

IF tx # NoFault THEN {tx ← IFUPageFaultTrap; GO TO trapEarly};

A page fault has occurred during an instruction fetch. Go freak out.

IF rCycles # 0 THEN {

Account for the number of reject cycles. Also keep track of the next time that the ifu cache will be available.

cycles ← cycles + rCycles;

instBuffer.busyUntil ← p.ifuCache.sharedBase.busyUntil;

};

valid ← bytesPerWord - rbi;

instBuffer.validWords ← 1;

instBuffer.basePC ← WordAddressToBytePC[wordAddr];

instBuffer[0] ← word;

};

At this point we have at least one valid byte in the buffer. This is enough to determine the instruction and how many bytes must follow.

start ← WordToInt[newPC] - WordToInt[instBuffer.basePC];

IF start >= max THEN ERROR;

word ← instBuffer[start/bytesPerWord];

inst ← LOOPHOLE[WordToBytes[word][start MOD bytesPerWord]];

nBytes ← LOOPHOLE[inst, CARDINAL] / 64;

IF nBytes = 0 THEN nBytes ← IF LOOPHOLE[inst, CARDINAL] < 40B THEN 1 ELSE 5;

IF valid < nBytes THEN {

We must make sure that we have enough bytes in the instruction buffer to execute the whole instruction. This simplifies things later, to be sure. Things are a little simpler because we know we are fetching a full word that will be appended onto the current valid stuff.

IF instBuffer.validWords = instBuffer.max THEN FlushInstWord[];

[wordAddr, rbi] ← BytePCToWordAddress[[IntToWord[WordToInt[newPC] + valid]]];

[word, tx, rCycles] ← p.ifuCache.fetch[p.ifuCache, wordAddr, initCycles+cycles];

cycles ← cycles + 1;

Account for cache access time

IF rCycles # 0 THEN {

Account for the number of reject cycles. Also keep track of the next time that the ifu cache will be available.

cycles ← cycles + rCycles;

rCycles ← 0;

instBuffer.busyUntil ← p.ifuCache.sharedBase.busyUntil;

};

IF tx # NoFault THEN {tx ← IFUPageFaultTrap; GO TO trapEarly};

A page fault has occurred during an operand byte fetch. Go freak out.

Move the four bytes from the fetched word into the instruction buffer.

start ← WordToInt[newPC] - WordToInt[instBuffer.basePC];

instBuffer[start/bytesPerWord + 1] ← word;

instBuffer.validWords ← instBuffer.validWords + 1;

valid ← valid + bytesPerWord;

};

Move the bytes from the instruction buffer to rest.

IF nBytes > 1 THEN TRUSTED {

RRA: highest byte is first in stream, and so on

instBufferPtr: LONG POINTER TO PACKED ARRAY InstBufferIndex OF Byte ← LOOPHOLE[@instBuffer[0]];

SELECT nBytes FROM

5 => {

rest ← BytesToWord[[

instBufferPtr[MaskInstBufferIndex[start+1]],

instBufferPtr[MaskInstBufferIndex[start+2]],

instBufferPtr[MaskInstBufferIndex[start+3]],

instBufferPtr[MaskInstBufferIndex[start+4]]

]];

};

3 => {

rest ← CardToWord[HalfToCard[BytesToHalf[[

instBufferPtr[MaskInstBufferIndex[start+1]],

instBufferPtr[MaskInstBufferIndex[start+2]]

]]]];

};

2 => {

rest ← CardToWord[ByteToCard[

instBufferPtr[MaskInstBufferIndex[start+1]]

]];

};

ENDCASE => ERROR;

};

newPC ← IntToWord[WordToInt[newPC] + nBytes];

valid ← valid - nBytes;

Try to fetch ahead if the bus is not busy, and we need more bytes in the instruction buffer. Note that this may cause more contention for the bus than we might like, but that the bytes should be ready at the proper time in straight-line code.

IF instBuffer.busyUntil <= initCycles+cycles THEN {

The instruction buffer is not trying to fetch anything from the cache, so we can use the cache to try to get the next instruction.

IF valid <= max-bytesPerWord THEN {

There is room to get more words into the buffer

fetchPC: Word ← IntToWord[WordToInt[newPC] + valid];

IF instBuffer.validWords = instBuffer.max THEN FlushInstWord[];

We have completely filled the inst buffer, but still need more, so shift out the lowest address word, and update the base address and # of bytes valid.

[wordAddr, rbi] ← BytePCToWordAddress[[fetchPC]];

IF rbi # 0 THEN ERROR; -- rats! we blew it!

[word, tx, rCycles] ← p.ifuCache.fetch[p.ifuCache, wordAddr, initCycles+cycles];

IF rCycles # 0 THEN {

The reject cycles occur in the background, and are not counted towards elapsed cycles. But we do keep track of the next time that the ifu cache will be available.

instBuffer.busyUntil ← p.ifuCache.sharedBase.busyUntil;

p.stats.lookaheadRejects ← p.stats.lookaheadRejects + rCycles;

};

IF tx = NoFault THEN {

The fetch was successful, so we can put more bytes into the inst buffer. Note that we restrict the max bytes in the buffer to be a multiple of bytesPerWord. There will be no wraparound in the buffer for this word fetch.

instBuffer[instBuffer.validWords] ← word;

instBuffer.validWords ← instBuffer.validWords + 1;

};

tx ← NoFault;

Make sure that the readahead fault does not show up later.

};

instBuffer.nextPC ← newPC;

Update the cycle count to allow EU cache to delay properly for busy bus

p.stats.cycles ← p.stats.cycles+cycles;

p.stats.instBufferCycles ← p.stats.instBufferCycles+cycles;

cycles ← 0;

Finally, we get to execute the instruction we fetched.

[newPC, rtnPC, control] ← LizardLiver.Execute[p, thisPC, inst, rest];

status ← LOOPHOLE[p.regs[ifuStatus]];

Refetch status in case it changed during the instruction

EXITS

trapEarly => CauseTrap[tx];

};

thisPC ← newPC;

SELECT control FROM

nextInst => {};

Continue with the new PC. This will be the case even if we are branching. A mispredicted jump will have cleared the instruction buffer in LizardLiver.Execute.

doSwitch =>

Continue with the new PC, but also flush the instruction buffer. This is the case for mispredicted jumps, or for stack jumps.

ForceBufferEmpty[];

doCall, doAbort, doReturn => {

ForceBufferEmpty[];

IF control = doReturn

THEN {

currentSize: NAT ← p.stackEntries;

SELECT currentSize FROM

= 0 => {

SIGNAL OutsideEnvelope["IFU control stack empty on return!"];

thisPC ← TrapPC[StackUnderflowTrap];

GO TO badGnus;

};

> IFUStackSize => {

SIGNAL OutsideEnvelope["IFU control stack is too full during return!"];

GO TO badGnus;

};

> IFUOverflow =>

IF status.trapsEnabled THEN {

SIGNAL OutsideEnvelope["IFU control stack is too full during return!"];

GO TO badGnus;

};

ENDCASE;

p.stackEntries ← currentSize - 1;

thisPC ← p.ifuStack[youngest].pc;

p.regs[ifuL] ← regL ← RegToWord[p.ifuStack[youngest].regL];

p.youngest ← youngest ← (youngest + (LizardIFUStackSize - 1)) MOD LizardIFUStackSize;

EXITS badGnus => {};

}

ELSE {

For doCall & doAbort, just push the PC, the overflow checking was done by the instruction execution.

newSize: NAT ← p.stackEntries+1;

IF newSize > IFUStackSize

THEN SIGNAL OutsideEnvelope["IFU control stack too full!"]

ELSE {

p.stackEntries ← newSize;

p.youngest ← youngest ← (youngest + 1) MOD LizardIFUStackSize;

};

p.ifuStack[youngest] ← [rtnPC, WordToReg[regL]];

ForceBufferEmpty[];

IF tx # NoFault THEN

We get here due to an early fault (like an IFUPageFault). The idea is to log the completion of the instruction, although no instruction was really started! This aids "shadow" simulations like McCreight's.

IF p.logger # NIL AND p.logger.instDone # NIL THEN

p.logger.instDone[p.logger.data, p, newPC, rtnPC, doAbort, cycles];

};

IF p.lastCallRtn + 3 >= p.stats.instructions THEN {

We must delay to allow changes in the IFU stack from the last call/return to become stable. This test is somewhat conservative, since multi-cycle instructions will allow the ifuStack to be stable sooner than this. However, this is a cheap way to estimate this extra delay.

cycles ← cycles + 2;

p.stats.returnInterlockCycles ← p.stats.returnInterlockCycles + 2;

};

p.lastCallRtn ← p.stats.instructions;

};

ENDCASE => ERROR;

p.regs[ifuPC] ← thisPC;

p.stats.cycles ← p.stats.cycles+cycles;

p.stats.instBytes ← p.stats.instBytes+nBytes;

};

Utility Routines for manipulating data (incomplete)

DoALUOp: PUBLIC PROC [processor: Processor, wordA,wordB: Word, op: ALUOps, trap: TrapIndex] RETURNS [res: Word, resCode: TrapIndex ← NoFault] = {

carryOut, overflow, boundsCheck, illegalLisp: BOOL ← FALSE;

carryIn: BOOL ← processor.euCarry;

Calculate the result word and carryOut

SELECT op FROM

SAdd => {

[res, ] ← WordCarryAdd[wordA, wordB, carryIn];

SELECT ALUHelper[wordA[0], wordB[0], res[0]] FROM

a0b0c1, a1b1c0 => {overflow ← TRUE};

ENDCASE;

};

UAdd =>

[res, carryOut] ← WordCarryAdd[wordA, wordB, carryIn];

SSub => {

[res, ] ← WordCarryAdd[wordA, DragNot[wordB], NOT carryIn];

SELECT ALUHelper[wordA[0], wordB[0], res[0]] FROM

a0b1c1, a1b0c0 => overflow ← TRUE;

ENDCASE;

};

USub => {

[res, carryOut] ← WordCarryAdd[wordA, DragNot[wordB], NOT carryIn];

carryOut ← NOT carryOut;

};

LAdd, LSub => {

res ← IF op = LAdd THEN VanillaAdd[wordA, wordB] ELSE VanillaSub[wordA, wordB];

IF res[0] # res[1] OR res[1] # res[2] OR wordA[0] # wordA[1] OR wordA[1] # wordA[2] OR wordB[0] # wordB[1] OR wordB[1] # wordB[2] THEN

illegalLisp ← TRUE

};

VAdd => {res ← VanillaAdd[wordA, wordB]; carryOut ← carryIn};

VSub => {res ← VanillaSub[wordA, wordB]; carryOut ← carryIn};

And => {res ← DragAnd[wordA, wordB]; RETURN};

Or => {res ← DragOr[wordA, wordB]; RETURN};

Xor => {res ← DragXor[wordA, wordB]; RETURN};

BndChk => {

boundsCheck ← WordCarryAdd[wordA, DragNot[wordB], TRUE].carryOut;

res ← wordA;

carryOut ← carryIn;

};

ENDCASE => ERROR;

Detarmine the returned condition code

resCode ← (IF (SELECT trap FROM

ALUCondFalse => FALSE,

ALUCondEZ => res = ZerosWord,

ALUCondLZ => WordToInt[res] < 0,

ALUCondLE => WordToInt[res] <= 0,

ModeFault => TRUE,

ALUCondNE => res = ZerosWord,

ALUCondGE => WordToInt[res] >= 0,

ALUCondGZ => WordToInt[res] > 0,

ALUCondOver => overflow,

ALUCondBC => boundsCheck,

ALUCondIL => illegalLisp,

ALUCondDO => FALSE, -- not yet implemented

ALUCondNotOver => NOT overflow,

ALUCondNB => NOT boundsCheck,

ALUCondNI => NOT illegalLisp,

AddressCheckFault => WordToCard[res]<KernalLimit,

ENDCASE => ERROR) THEN trap ELSE NoFault);

IF resCode = NoFault THEN processor.euCarry ← carryOut;

There is the possibility that the carry out should NOT be set if the returned code # ALUCondFalse. One of these days we should find out if this is true.

};

WordCarryAdd: PROC [wordA, wordB: Word, carryIn: BOOL] RETURNS [wordC: Word, carryOut: BOOL ← FALSE] = {

cardA: CARD ← WordToCard[wordA];

cardB: CARD ← WordToCard[wordB];

cardC: CARD ← cardA+cardB;

SELECT cardC FROM

< cardA, < cardB => carryOut ← TRUE;

ENDCASE;

IF carryIn THEN {cardC ← cardC+1; IF cardC = 0 THEN carryOut ← TRUE};

wordC ← CardToWord[cardC];

};

FieldUnit: PUBLIC PROC [Left,Right: Word, fd: FieldDescriptor] RETURNS [out: Word] = {

shifter: Word = DoubleWordShiftLeft[Left, Right, fd.shift];

The shifter output has the input double word shifted left by fd.shift bits

mask: Word ← IF fd.mask >= 32 THEN OnesWord ELSE SingleWordShiftRight[OnesWord, 32-fd.mask];

The default mask has fd.mask 1s right-justified in the word

IF fd.insert

THEN {

mask ← DragAnd[mask, SingleWordShiftLeft[OnesWord, fd.shift]];

fd.insert => clear rightmost fd.shift bits of the mask

out ← DragOr[DragAnd[mask, shifter], DragAnd[DragNot[mask], Right]];

1 bits in the mask select the shifter output

0 bits in the mask select bits from Right

}

ELSE

out ← DragAnd[mask, shifter];

1 bits in the mask select the shifter output

};

WillPushOverflow: PUBLIC PROC [processor: Processor] RETURNS [BOOL] = {

IF processor.stackEntries < IFUOverflow-1 THEN RETURN [FALSE] ELSE {

status: IFUStatusRec ← LOOPHOLE[processor.regs[ifuStatus]];

SELECT processor.stackEntries FROM

= IFUOverflow-1 => {};

< IFUStackSize =>

IF status.trapsEnabled THEN

SIGNAL OutsideEnvelope["IFU control stack too full with traps enabled"];

ENDCASE =>

SIGNAL OutsideEnvelope["IFU control stack has more than IFUStackSize entries"];

RETURN [status.trapsEnabled];

};

OutsideEnvelope: PUBLIC SIGNAL [explanation: Rope.ROPE] = CODE;

END.