LizardHeartImpl.mesa
Copyright © 1984, 1985, 1986 by Xerox Corporation. All rights reserved.
Russ Atkinson (RRA) April 1, 1986 5:13:25 pm PST
McCreight, January 30, 1986 4:35:51 pm PST
DIRECTORY
Basics USING [BITAND, CARD],
DragOpsCross USING [Byte, bytesPerWord, EUStackSize, FieldDescriptor, FourBytes, IFUOverflow, IFUStackSize, IFUStatusRec, Inst, KernalLimit, OnesWord, ProcessorRegister, TrapIndex, Word, ZerosWord],
DragOpsCrossUtils USING [BytePCToWordAddress, ByteToCard, BytesToHalf, BytesToWord, CardToWord, DoubleWordShiftLeft, DragAnd, DragNot, DragOr, DragXor, HalfToCard, IntToWord, SingleWordShiftLeft, SingleWordShiftRight, TrapIndexToBytePC, VanillaAdd, VanillaSub, WordAddressToBytePC, WordToBytes, WordToCard, WordToInt],
LizardHeart USING [ALUHelper, ALUOps, CacheBase, ChangeLogger, Control, InstBuffer, InstBufferRep, LizardIFUStackIndex, LizardIFUStackSize, NoFault, Processor, ProcessorRep, RegChangeProc, RegToWord, StackPlus, TrapPC, WordToReg],
LizardLiver USING [Execute],
PrincOpsUtils USING [LongCopy],
Rope USING [ROPE];
LizardHeartImpl: CEDAR PROGRAM
IMPORTS Basics, DragOpsCrossUtils, LizardHeart, LizardLiver, PrincOpsUtils
EXPORTS LizardHeart
= BEGIN OPEN DragOpsCross, DragOpsCrossUtils, LizardHeart;
CARD: TYPE = Basics.CARD;
CurrentIFUVersion: [0..255] ← 1;
The current version # of the IFU, stored in status.version
InstBufferIndex: TYPE = [0..64);
max # of words allowed
WordsInBuffer: NAT ← 8;
# of Dragon words in the instruction buffer (power of two)
MaxMask: CARDINAL ← WordsInBuffer*bytesPerWord - 1;
Mask for valid bytes in the buffer
MaskInstBufferIndex: PROC [index: CARDINAL] RETURNS [InstBufferIndex] = INLINE {
RETURN [LOOPHOLE[Basics.BITAND[index, MaxMask]]];
};
NewProcessor: PUBLIC PROC [ifuCache,euCache: CacheBase, logger: ChangeLogger] RETURNS [p: Processor] = {
initStatus: IFUStatusRec ← [version: CurrentIFUVersion, rescheduleKeep: TRUE];
p ← NEW[ProcessorRep];
p.ifuCache ← ifuCache;
p.euCache ← euCache;
p.logger ← logger;
p.eldest ← 1;
p.youngest ← 1;
p.stackEntries ← 0;
p.regs[ifuPC] ← TrapPC[ResetTrap];
p.regsGood[euConstant] ← TRUE; -- constant register 0 = 0 (it's a ROM)
p.regsGood[ifuStatus] ← TRUE;
p.regs[ifuStatus] ← LOOPHOLE[initStatus];
p.instBuffer ← NEW[InstBufferRep[WordsInBuffer]];
};
FlushInstBuffer: PUBLIC PROC [processor: Processor] = {
This routine flushes the instruction buffer. This occurs on a bad jump prediction, a trap, or an xop.
instBuffer: InstBuffer = processor.instBuffer;
delta: INT = WordToInt[instBuffer.nextPC] - WordToInt[instBuffer.basePC];
IF delta > 0 THEN {
validBytes: INT = instBuffer.validWords*bytesPerWord;
IF validBytes > delta THEN
instBuffer.bytesDiscarded ← instBuffer.bytesDiscarded + validBytes-delta;
};
instBuffer.validWords ← 0;
instBuffer.forcedEmpty ← instBuffer.forcedEmpty + 1;
};
InstructionExecute: PUBLIC PROC [processor: Processor] = {
thisPC: Word ← processor.regs[ifuPC];
newPC: Word ← thisPC;
rtnPC: Word;
control: Control ← nextInst;
inst: Inst;
rest: Word;
oldStatus: IFUStatusRec =LOOPHOLE[processor.regs[ifuStatus]];
status: IFUStatusRec ← oldStatus;
regL: Word ← processor.regs[ifuL];
cycles: CARDINAL ← 0;
rCycles: CARDINAL ← 0;
initCycles: INT ← processor.stats.cycles;
youngest: CARDINAL ← processor.youngest;
nBytes: CARDINAL ← 0;
word: Word;
wordAddr: Word;
rbi: [0..bytesPerWord);
tx: TrapIndex ← NoFault;
valid: InstBufferIndex ← 0; -- valid bytes in inst buffer AFTER newPC
p: Processor = processor; -- for a short name
instBuffer: InstBuffer ← p.instBuffer;
instBufferPtr: LONG POINTER;
max: CARDINAL = instBuffer.max * bytesPerWord; -- max bytes in buffer
ForceBufferEmpty: PROC = INLINE {
ForceBufferEmpty forces the instruction buffer to be empty. It is a faster local version of FlushInstBuffer.
instBuffer.validWords ← 0;
instBuffer.forcedEmpty ← instBuffer.forcedEmpty + 1;
IF valid # 0 THEN {
instBuffer.bytesDiscarded ← instBuffer.bytesDiscarded + valid;
valid ← 0;
};
};
IsEUStackOverflow: PROC [regS: ProcessorRegister] RETURNS [BOOL] = INLINE {
RETURN [ LOOPHOLE[LOOPHOLE[regS, CARDINAL] -LOOPHOLE[WordToReg[processor.regs[ifuSLimit]], CARDINAL]+EUStackSize, CARDINAL] MOD EUStackSize IN [0..16) ];
};
CauseTrap: PROC [code: TrapIndex] = {
All traps push the status onto the EU stack, and set the status to have traps disabled and be in kernel mode. If an IFU stack overflow would result, then that has precedence over anything else. If an EU stack overflow would result from the push, then that is ignored.
This routine was cribbed from LizardHeartImpl and simplified.
regS: ProcessorRegister ← StackPlus[WordToReg[p.regs[ifuS]], 1];
oldStatus: Word = LOOPHOLE[status];
rtnPC ← thisPC;
IF WillPushOverflow[p] THEN
IFUStackOverflow has precedence over other traps
code ← IFUStackOverflowTrap;
newPC ← TrapPC[code];
control ← doAbort;
p.regs[ifuS] ← RegToWord[regS];
IF p.logger # NIL THEN {
regChange: RegChangeProc ← p.logger.regChange;
IF regChange # NIL THEN regChange[p.logger.data, p, regS, p.regs[regS], oldStatus];
};
p.regs[regS] ← oldStatus;
status.trapsEnabled ← FALSE;
status.userMode ← FALSE;
p.regs[ifuStatus] ← LOOPHOLE[status];
cycles ← cycles + 4; -- a rough guess
tx ← code;
IF code = IFUStackOverflowTrap THEN p.stats.stackOver ← p.stats.stackOver + 1;
};
FlushInstWord: PROC = INLINE {
Flushes one word from the inst buffer, shifting the words down so that instBuffer[0] corresponds to inst.basePC. We also update instBuffer.basePC forward one word, and instBuffer.validWords backwards one word.
vw: CARDINAL ← instBuffer.validWords - 1;
instBuffer.basePC ← IntToWord[WordToInt[instBuffer.basePC] + bytesPerWord];
instBuffer.validWords ← instBuffer.validWords - 1;
IF vw # 0 THEN TRUSTED {
PrincOpsUtils.LongCopy[from: @instBuffer[1], nwords: instBuffer.validWords*SIZE[Word], to: @instBuffer[0]];
};
instBuffer.validWords ← vw;
};
IF p.resetRequested THEN {
Force the machine to its reset state. The state we force it to here is highly speculative.
status ← [version: CurrentIFUVersion, rescheduleKeep: TRUE];
p.regs[ifuStatus] ← LOOPHOLE[status];
thisPC ← newPC ← TrapPC[ResetTrap];
p.youngest ← youngest ← 0;
p.eldest ← 1;
p.stackEntries ← 0;
p.resetRequested ← FALSE;
p.regs[ifuL] ← CardToWord[1];
p.regs[ifuS] ← ZerosWord;
p.euCarry ← FALSE;
ForceBufferEmpty[];
p.instBuffer ← NEW[InstBufferRep[WordsInBuffer]];
p.instBuffer.forcedEmpty ← instBuffer.forcedEmpty;
instBuffer ← p.instBuffer;
p.stats ← [];
p.ifuCache.stats ← [];
p.euCache.stats ← [];
initCycles ← cycles ← 0;
};
TRUSTED {
instBufferPtr ← @instBuffer[0];
};
IF newPC # instBuffer.nextPC
THEN ForceBufferEmpty[]
If the PC is not the next one in the sequence then flush the instruction buffer
ELSE {
Compute the number of valid bytes in the buffer.
used: INT ← WordToInt[newPC] - WordToInt[instBuffer.basePC];
valid ← instBuffer.validWords*bytesPerWord;
IF used > 0 AND used < valid THEN valid ← valid - used ELSE valid ← 0;
};
{
Fetch the next instruction byte in the instruction buffer, causing a refill if the instruction buffer is empty.
start: InstBufferIndex ← 0; -- byte index in buffer of newPC
Between instructions we have the following priority:
EUStackOverflowTrap > RescheduleTrap > IFUPageFaultTrap
IF status.trapsEnabled THEN
SELECT TRUE FROM
IsEUStackOverflow[WordToReg[p.regs[ifuS]]] => {
There was an EU stack overflow caused by the previous instruction. This may be because a previous SIP or RETK instruction enabled traps. Should this ever really happen?
tx ← EUStackOverflowTrap;
GO TO trapEarly;
};
status.reschedule => {
Take a reschedule trap.
tx ← RescheduleTrap;
GO TO trapEarly;
};
ENDCASE;
IF valid = 0 THEN {
need to fill the inst buffer from scratch
[wordAddr, rbi] ← BytePCToWordAddress[[newPC]];
[word, tx, rCycles] ← p.ifuCache.fetch[p.ifuCache, wordAddr, initCycles+cycles];
cycles ← cycles + 1;
The cache takes a cycle to come back with the answer
IF tx # NoFault THEN {tx ← IFUPageFaultTrap; GO TO trapEarly};
A page fault has occurred during an instruction fetch. Go freak out.
IF rCycles # 0 THEN {
Account for the number of reject cycles. Also keep track of the next time that the ifu cache will be available.
cycles ← cycles + rCycles;
instBuffer.busyUntil ← p.ifuCache.sharedBase.busyUntil;
};
valid ← bytesPerWord - rbi;
instBuffer.validWords ← 1;
instBuffer.basePC ← WordAddressToBytePC[wordAddr];
instBuffer[0] ← word;
};
At this point we have at least one valid byte in the buffer. This is enough to determine the instruction and how many bytes must follow.
start ← WordToInt[newPC] - WordToInt[instBuffer.basePC];
IF start >= max THEN ERROR;
word ← instBuffer[start/bytesPerWord];
inst ← LOOPHOLE[WordToBytes[word][start MOD bytesPerWord]];
nBytes ← LOOPHOLE[inst, CARDINAL] / 64;
IF nBytes = 0 THEN nBytes ← IF LOOPHOLE[inst, CARDINAL] < 40B THEN 1 ELSE 5;
IF valid < nBytes THEN {
We must make sure that we have enough bytes in the instruction buffer to execute the whole instruction. This simplifies things later, to be sure. Things are a little simpler because we know we are fetching a full word that will be appended onto the current valid stuff.
IF instBuffer.validWords = instBuffer.max THEN FlushInstWord[];
[wordAddr, rbi] ← BytePCToWordAddress[[IntToWord[WordToInt[newPC] + valid]]];
[word, tx, rCycles] ← p.ifuCache.fetch[p.ifuCache, wordAddr, initCycles+cycles];
cycles ← cycles + 1;
Account for cache access time
IF rCycles # 0 THEN {
Account for the number of reject cycles. Also keep track of the next time that the ifu cache will be available.
cycles ← cycles + rCycles;
rCycles ← 0;
instBuffer.busyUntil ← p.ifuCache.sharedBase.busyUntil;
};
IF tx # NoFault THEN {tx ← IFUPageFaultTrap; GO TO trapEarly};
A page fault has occurred during an operand byte fetch. Go freak out.
Move the four bytes from the fetched word into the instruction buffer.
start ← WordToInt[newPC] - WordToInt[instBuffer.basePC];
instBuffer[start/bytesPerWord + 1] ← word;
instBuffer.validWords ← instBuffer.validWords + 1;
valid ← valid + bytesPerWord;
};
Move the bytes from the instruction buffer to rest.
IF nBytes > 1 THEN TRUSTED {
RRA: highest byte is first in stream, and so on
instBufferPtr: LONG POINTER TO PACKED ARRAY InstBufferIndex OF Byte ← LOOPHOLE[@instBuffer[0]];
SELECT nBytes FROM
5 => {
rest ← BytesToWord[[
instBufferPtr[MaskInstBufferIndex[start+1]],
instBufferPtr[MaskInstBufferIndex[start+2]],
instBufferPtr[MaskInstBufferIndex[start+3]],
instBufferPtr[MaskInstBufferIndex[start+4]]
]];
};
3 => {
rest ← CardToWord[HalfToCard[BytesToHalf[[
instBufferPtr[MaskInstBufferIndex[start+1]],
instBufferPtr[MaskInstBufferIndex[start+2]]
]]]];
};
2 => {
rest ← CardToWord[ByteToCard[
instBufferPtr[MaskInstBufferIndex[start+1]]
]];
};
ENDCASE => ERROR;
};
newPC ← IntToWord[WordToInt[newPC] + nBytes];
valid ← valid - nBytes;
Try to fetch ahead if the bus is not busy, and we need more bytes in the instruction buffer. Note that this may cause more contention for the bus than we might like, but that the bytes should be ready at the proper time in straight-line code.
IF instBuffer.busyUntil <= initCycles+cycles THEN {
The instruction buffer is not trying to fetch anything from the cache, so we can use the cache to try to get the next instruction.
IF valid <= max-bytesPerWord THEN {
There is room to get more words into the buffer
fetchPC: Word ← IntToWord[WordToInt[newPC] + valid];
IF instBuffer.validWords = instBuffer.max THEN FlushInstWord[];
We have completely filled the inst buffer, but still need more, so shift out the lowest address word, and update the base address and # of bytes valid.
[wordAddr, rbi] ← BytePCToWordAddress[[fetchPC]];
IF rbi # 0 THEN ERROR; -- rats! we blew it!
[word, tx, rCycles] ← p.ifuCache.fetch[p.ifuCache, wordAddr, initCycles+cycles];
IF rCycles # 0 THEN {
The reject cycles occur in the background, and are not counted towards elapsed cycles. But we do keep track of the next time that the ifu cache will be available.
instBuffer.busyUntil ← p.ifuCache.sharedBase.busyUntil;
p.stats.lookaheadRejects ← p.stats.lookaheadRejects + rCycles;
};
IF tx = NoFault THEN {
The fetch was successful, so we can put more bytes into the inst buffer. Note that we restrict the max bytes in the buffer to be a multiple of bytesPerWord. There will be no wraparound in the buffer for this word fetch.
instBuffer[instBuffer.validWords] ← word;
instBuffer.validWords ← instBuffer.validWords + 1;
};
tx ← NoFault;
Make sure that the readahead fault does not show up later.
};
};
instBuffer.nextPC ← newPC;
Update the cycle count to allow EU cache to delay properly for busy bus
p.stats.cycles ← p.stats.cycles+cycles;
p.stats.instBufferCycles ← p.stats.instBufferCycles+cycles;
cycles ← 0;
Finally, we get to execute the instruction we fetched.
[newPC, rtnPC, control] ← LizardLiver.Execute[p, thisPC, inst, rest];
status ← LOOPHOLE[p.regs[ifuStatus]];
Refetch status in case it changed during the instruction
EXITS
trapEarly => CauseTrap[tx];
};
thisPC ← newPC;
SELECT control FROM
nextInst => {};
Continue with the new PC. This will be the case even if we are branching. A mispredicted jump will have cleared the instruction buffer in LizardLiver.Execute.
doSwitch =>
Continue with the new PC, but also flush the instruction buffer. This is the case for mispredicted jumps, or for stack jumps.
ForceBufferEmpty[];
doCall, doAbort, doReturn => {
ForceBufferEmpty[];
IF control = doReturn
THEN {
currentSize: NAT ← p.stackEntries;
SELECT currentSize FROM
= 0 => {
SIGNAL OutsideEnvelope["IFU control stack empty on return!"];
thisPC ← TrapPC[StackUnderflowTrap];
GO TO badGnus;
};
> IFUStackSize => {
SIGNAL OutsideEnvelope["IFU control stack is too full during return!"];
GO TO badGnus;
};
> IFUOverflow =>
IF status.trapsEnabled THEN {
SIGNAL OutsideEnvelope["IFU control stack is too full during return!"];
GO TO badGnus;
};
ENDCASE;
p.stackEntries ← currentSize - 1;
thisPC ← p.ifuStack[youngest].pc;
p.regs[ifuL] ← regL ← RegToWord[p.ifuStack[youngest].regL];
p.youngest ← youngest ← (youngest + (LizardIFUStackSize - 1)) MOD LizardIFUStackSize;
EXITS badGnus => {};
}
ELSE {
For doCall & doAbort, just push the PC, the overflow checking was done by the instruction execution.
newSize: NAT ← p.stackEntries+1;
IF newSize > IFUStackSize
THEN SIGNAL OutsideEnvelope["IFU control stack too full!"]
ELSE {
p.stackEntries ← newSize;
p.youngest ← youngest ← (youngest + 1) MOD LizardIFUStackSize;
};
p.ifuStack[youngest] ← [rtnPC, WordToReg[regL]];
ForceBufferEmpty[];
IF tx # NoFault THEN
We get here due to an early fault (like an IFUPageFault). The idea is to log the completion of the instruction, although no instruction was really started! This aids "shadow" simulations like McCreight's.
IF p.logger # NIL AND p.logger.instDone # NIL THEN
p.logger.instDone[p.logger.data, p, newPC, rtnPC, doAbort, cycles];
};
IF p.lastCallRtn + 3 >= p.stats.instructions THEN {
We must delay to allow changes in the IFU stack from the last call/return to become stable. This test is somewhat conservative, since multi-cycle instructions will allow the ifuStack to be stable sooner than this. However, this is a cheap way to estimate this extra delay.
cycles ← cycles + 2;
p.stats.returnInterlockCycles ← p.stats.returnInterlockCycles + 2;
};
p.lastCallRtn ← p.stats.instructions;
};
ENDCASE => ERROR;
p.regs[ifuPC] ← thisPC;
p.stats.cycles ← p.stats.cycles+cycles;
p.stats.instBytes ← p.stats.instBytes+nBytes;
};
Utility Routines for manipulating data (incomplete)
DoALUOp: PUBLIC PROC [processor: Processor, wordA,wordB: Word, op: ALUOps, trap: TrapIndex] RETURNS [res: Word, resCode: TrapIndex ← NoFault] = {
carryOut, overflow, boundsCheck, illegalLisp: BOOLFALSE;
carryIn: BOOL ← processor.euCarry;
Calculate the result word and carryOut
SELECT op FROM
SAdd => {
[res, ] ← WordCarryAdd[wordA, wordB, carryIn];
SELECT ALUHelper[wordA[0], wordB[0], res[0]] FROM
a0b0c1, a1b1c0 => {overflow ← TRUE};
ENDCASE;
};
UAdd =>
[res, carryOut] ← WordCarryAdd[wordA, wordB, carryIn];
SSub => {
[res, ] ← WordCarryAdd[wordA, DragNot[wordB], NOT carryIn];
SELECT ALUHelper[wordA[0], wordB[0], res[0]] FROM
a0b1c1, a1b0c0 => overflow ← TRUE;
ENDCASE;
};
USub => {
[res, carryOut] ← WordCarryAdd[wordA, DragNot[wordB], NOT carryIn];
carryOut ← NOT carryOut;
};
LAdd, LSub => {
res ← IF op = LAdd THEN VanillaAdd[wordA, wordB] ELSE VanillaSub[wordA, wordB];
IF res[0] # res[1] OR res[1] # res[2] OR wordA[0] # wordA[1] OR wordA[1] # wordA[2] OR wordB[0] # wordB[1] OR wordB[1] # wordB[2] THEN
illegalLisp ← TRUE
};
VAdd => {res ← VanillaAdd[wordA, wordB]; carryOut ← carryIn};
VSub => {res ← VanillaSub[wordA, wordB]; carryOut ← carryIn};
And => {res ← DragAnd[wordA, wordB]; RETURN};
Or => {res ← DragOr[wordA, wordB]; RETURN};
Xor => {res ← DragXor[wordA, wordB]; RETURN};
BndChk => {
boundsCheck ← WordCarryAdd[wordA, DragNot[wordB], TRUE].carryOut;
res ← wordA;
carryOut ← carryIn;
};
ENDCASE => ERROR;
Detarmine the returned condition code
resCode ← (IF (SELECT trap FROM
ALUCondFalse => FALSE,
ALUCondEZ => res = ZerosWord,
ALUCondLZ => WordToInt[res] < 0,
ALUCondLE => WordToInt[res] <= 0,
ModeFault => TRUE,
ALUCondNE => res = ZerosWord,
ALUCondGE => WordToInt[res] >= 0,
ALUCondGZ => WordToInt[res] > 0,
ALUCondOver => overflow,
ALUCondBC => boundsCheck,
ALUCondIL => illegalLisp,
ALUCondDO => FALSE, -- not yet implemented
ALUCondNotOver => NOT overflow,
ALUCondNB => NOT boundsCheck,
ALUCondNI => NOT illegalLisp,
AddressCheckFault => WordToCard[res]<KernalLimit,
ENDCASE => ERROR) THEN trap ELSE NoFault);
IF resCode = NoFault THEN processor.euCarry ← carryOut;
There is the possibility that the carry out should NOT be set if the returned code # ALUCondFalse. One of these days we should find out if this is true.
};
WordCarryAdd: PROC [wordA, wordB: Word, carryIn: BOOL] RETURNS [wordC: Word, carryOut: BOOLFALSE] = {
cardA: CARD ← WordToCard[wordA];
cardB: CARD ← WordToCard[wordB];
cardC: CARD ← cardA+cardB;
SELECT cardC FROM
< cardA, < cardB => carryOut ← TRUE;
ENDCASE;
IF carryIn THEN {cardC ← cardC+1; IF cardC = 0 THEN carryOut ← TRUE};
wordC ← CardToWord[cardC];
};
FieldUnit: PUBLIC PROC [Left,Right: Word, fd: FieldDescriptor] RETURNS [out: Word] = {
shifter: Word = DoubleWordShiftLeft[Left, Right, fd.shift];
The shifter output has the input double word shifted left by fd.shift bits
mask: Word ← IF fd.mask >= 32 THEN OnesWord ELSE SingleWordShiftRight[OnesWord, 32-fd.mask];
The default mask has fd.mask 1s right-justified in the word
IF fd.insert
THEN {
mask ← DragAnd[mask, SingleWordShiftLeft[OnesWord, fd.shift]];
fd.insert => clear rightmost fd.shift bits of the mask
out ← DragOr[DragAnd[mask, shifter], DragAnd[DragNot[mask], Right]];
1 bits in the mask select the shifter output
0 bits in the mask select bits from Right
}
ELSE
out ← DragAnd[mask, shifter];
1 bits in the mask select the shifter output
};
WillPushOverflow: PUBLIC PROC [processor: Processor] RETURNS [BOOL] = {
IF processor.stackEntries < IFUOverflow-1 THEN RETURN [FALSE] ELSE {
status: IFUStatusRec ← LOOPHOLE[processor.regs[ifuStatus]];
SELECT processor.stackEntries FROM
= IFUOverflow-1 => {};
< IFUStackSize =>
IF status.trapsEnabled THEN
SIGNAL OutsideEnvelope["IFU control stack too full with traps enabled"];
ENDCASE =>
SIGNAL OutsideEnvelope["IFU control stack has more than IFUStackSize entries"];
RETURN [status.trapsEnabled];
};
};
OutsideEnvelope: PUBLIC SIGNAL [explanation: Rope.ROPE] = CODE;
END.