LizardToolOutputImpl.mesa
Copyright Ó 1985, 1986, 1987 by Xerox Corporation. All rights reserved.
Russ Atkinson (RRA) May 18, 1987 7:27:41 pm PDT
McCreight, January 8, 1986 12:19:22 pm PST
Peter Kessler November 11, 1985 3:05:59 pm PST
DIRECTORY
DragOpsCross USING [Byte, EULegalRegs, EUStackSize, FourBytes, Inst, ProcessorRegister, Word, ZerosWord],
DragOpsCrossUtils USING [AddDelta, BytePCToWordAddress, WordToCard, WordToInt],
HandCodingUtil USING [GetInstArray, GetRegArray, NameArray, RegNameArray, ToStream],
IO USING [PutF, PutF1, PutFR, PutFR1, PutRope, STREAM],
LizardCache USING [CacheBase, CacheBaseRep, CacheStats],
LizardHeart USING [IFUStackEntry, LizardIFUStackSize, Processor, ProcessorStats],
LizardToolOutput USING [InstCountTable, InstCountTableRep, LastInstMod, LastInstTable, StatSnapshot, TestAbort],
Real USING [Round],
Rope USING [ROPE, Substr],
SparseMemory USING [Base, Fetch];
LizardToolOutputImpl: CEDAR PROGRAM
IMPORTS DragOpsCrossUtils, HandCodingUtil, IO, Real, Rope, SparseMemory
EXPORTS LizardToolOutput
= BEGIN OPEN LizardToolOutput;
Byte: TYPE = DragOpsCross.Byte;
LizardIFUStackSize: NAT = LizardHeart.LizardIFUStackSize;
Inst: TYPE = DragOpsCross.Inst;
ProcessorRegister: TYPE = DragOpsCross.ProcessorRegister;
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
Word: TYPE = DragOpsCross.Word;
ZerosWord: Word = DragOpsCross.ZerosWord;
LocalInstStats: TYPE = REF LocalInstStatsRep;
LocalInstStatsRep: TYPE = ARRAY Inst OF LocalInstStatsEntry;
LocalInstStatsEntry: TYPE = RECORD [inst: Inst, count: INT];
Exported procedures
ShowRegisters: PUBLIC PROC
[out: STREAM, processor: LizardHeart.Processor, testAbort: TestAbort ← NIL] = {
regOut: NAT ← 0;
regNameArray: HandCodingUtil.RegNameArray = HandCodingUtil.GetRegArray[];
{
regS: CARDINAL ← DragOpsCrossUtils.WordToCard[processor.regs[ifuS]];
regL: CARDINAL ← DragOpsCrossUtils.WordToCard[processor.regs[ifuL]];
delta: CARDINAL ← (regS-regL+1) MOD DragOpsCross.EUStackSize;
limit: CARDINAL ← DragOpsCrossUtils.WordToCard[processor.regs[ifuSLimit]];
PutF1I[out, "\n EU stack - regL = %g", regL];
PutF1I[out, ", regS = %g", regS];
PutF1I[out, ", delta = %g", delta];
PutF1I[out, ", limit = %g", limit];
FOR k: NAT IN [0..delta) DO
reg: ProcessorRegister ← VAL[(ORD[regL]+k) MOD DragOpsCross.EUStackSize];
word: Word = processor.regs[reg];
IF testAbort # NIL AND testAbort[] THEN GO TO requestStopPrint;
IO.PutRope[out, IF (k MOD 4) = 0 THEN "\n " ELSE ", "];
PutF1I[out, "r%g: ", k];
IF DragOpsCrossUtils.WordToCard[word] < 8
THEN PutAsCard[out, "%g", word]
ELSE PutAsBoth[out, "%w (%d)", word];
ENDLOOP;
IO.PutRope[out, "\n"];
};
FOR reg: ProcessorRegister IN DragOpsCross.EULegalRegs DO
word: Word = processor.regs[reg];
IF testAbort # NIL AND testAbort[] THEN GO TO requestStopPrint;
IF regOut MOD 4 = 0 THEN IO.PutRope[out, "\n"];
regOut ← regOut + 1;
IO.PutF1[out, " %5g ", [rope[regNameArray[reg]]]];
IF DragOpsCrossUtils.WordToCard[word] < 8
THEN PutAsCard[out, "%g", word]
ELSE PutAsBoth[out, "%w (%d)", word];
ENDLOOP;
IO.PutRope[out, "\n"];
Also show the IFU stack
{
youngest: NAT ← processor.youngest;
eldest: NAT = processor.eldest;
IF (youngest+1) MOD LizardIFUStackSize # eldest
THEN {
pc: Word ← processor.regs[ifuPC];
IO.PutRope[out, "\nIFU stack (youngest first) "];
PutAsCard[out, "(pc: %w", pc];
IF processor.trapsEnabled
THEN IO.PutRope[out, ", traps enabled"]
ELSE IO.PutRope[out, ", traps disabled"];
IF processor.userMode
THEN IO.PutRope[out, ", user"]
ELSE IO.PutRope[out, ", kernel"];
IO.PutRope[out, ")\n"];
DO
entry: LizardHeart.IFUStackEntry = processor.ifuStack[youngest];
IF testAbort # NIL AND testAbort[] THEN GO TO requestStopPrint;
IO.PutRope[out, " regL: "];
IO.PutRope[out, regNameArray[VAL[entry.status.lBase]]];
PutAsCard[out, ", rtnPC: %w", entry.pc];
IF entry.status.userMode
THEN IO.PutRope[out, " (user)"]
ELSE IO.PutRope[out, " (kernel)"];
IF entry.status.trapsEnabled
THEN IO.PutRope[out, " (traps enabled)"]
ELSE IO.PutRope[out, " (traps disabled)"];
IO.PutRope[out, "\n"];
IF youngest = eldest THEN EXIT;
youngest ← (youngest + (LizardIFUStackSize-1)) MOD LizardIFUStackSize;
ENDLOOP;
}
ELSE IO.PutRope[out, "\nIFU stack empty\n"];
};
EXITS requestStopPrint => {
IO.PutRope[out, "\nPrinting stopped by request.\n\n"];
};
};
ShowRing: PUBLIC PROC [out: STREAM, ring: LastInstTable, mem: SparseMemory.Base, howMany: [0..LastInstMod] ← LastInstMod, testAbort: TestAbort ← NIL] = {
Dump the last N instructions
regNameArray: HandCodingUtil.RegNameArray = HandCodingUtil.GetRegArray[];
instNameArray: HandCodingUtil.NameArray = HandCodingUtil.GetInstArray[];
which: CARDINAL ← ring.last;
PutF1I[out, "\nLast %g instructions (most recent, first printed):\n", LastInstMod];
THROUGH [0..LastInstMod) DO
pc: Word = ring.pcArray[which];
inst: DragOpsCross.Inst;
rest: Word;
IF pc = ZerosWord THEN EXIT;
IF testAbort # NIL AND testAbort[] THEN GO TO requestStopPrint;
PutAsCard[out, " pc: %w => ", pc];
[inst, rest] ← GetInstAndRest[mem, pc];
HandCodingUtil.ToStream[out, inst, rest, pc];
IO.PutRope[out, "\n"];
which ← (which-1) MOD LastInstMod;
ENDLOOP;
EXITS requestStopPrint => {
IO.PutRope[out, "\nPrinting stopped by request.\n\n"];
};
};
ShowStats: PUBLIC PROC [out: STREAM, processor: LizardHeart.Processor, instStats: InstCountTable, testAbort: TestAbort ← NIL] = {
pStats: LizardHeart.ProcessorStats ← processor.stats;
instNameArray: HandCodingUtil.NameArray = HandCodingUtil.GetInstArray[];
instOut: NAT ← 0;
cycles: INT ← pStats.cycles;
insts: INT ← pStats.instructions;
bytes: INT ← pStats.instBytes;
rejects: INT ← processor.ifuCache.stats.rejectCycles+processor.euCache.stats.rejectCycles;
instructionsSoFar: INT ← 0;
invTotal: REALIF pStats.instructions > 0 THEN 100.0 / pStats.instructions ELSE 0.0;
Dump the processor statistics
PutF1I[out, "\ninstructions: %g", insts];
PutF1I[out, ", cycles: %g", cycles];
IF insts > 0 THEN {
PutReal1[out, "\n bytes/inst: %g", (bytes*1.0)/insts, 2];
PutReal1[out, ", cycles/inst: %g", (cycles*1.0)/insts, 2];
IF rejects > 0 THEN
PutReal1[out, ", cycles/reject: %g", (cycles*1.0)/rejects, 2];
};
PutF1I[out, "\n instBytesUsed: %g", bytes];
PutF1I[out, ", instBytesFlushed: %g", processor.instBuffer.bytesDiscarded];
PutF1I[out, ", forcedEmpty: %g", processor.instBuffer.forcedEmpty];
PutF1I[out, "\n euFetches: %g", pStats.euFetches];
PutF1I[out, ", euStores: %g", pStats.euStores];
{
detail of jumps, calls, and predictions
goodPredict: INT ← pStats.fallThruGood+pStats.jumpGood;
badPredict: INT ← pStats.fallThruBad+pStats.jumpBad;
PutF1I[out, "\n goodPredictions: %g", goodPredict];
PutF1I[out, ", badPredictions: %g", badPredict];
PutF1I[out, ", uncond jumps: %g", pStats.jumps];
PutF1I[out, ", calls: %g", pStats.calls];
PutF1I[out, "\n (fallThruGood: %g", pStats.fallThruGood];
PutF1I[out, ", jumpGood: %g", pStats.jumpGood];
PutF1I[out, ", fallThruBad: %g", pStats.fallThruBad];
PutF1I[out, ", jumpBad: %g)", pStats.jumpBad];
PutF1I[out, "\n (jumpBack - uncond: %g", pStats.jumpBackU];
PutF1I[out, ", good predict: %g", pStats.jumpBackG];
PutF1I[out, ", bad predict: %g)", pStats.jumpBackB];
};
PutF1I[out, "\n stackOver: %g", pStats.stackOver];
PutF1I[out, ", regBusyCycles: %g", pStats.regBusyCycles];
PutF1I[out, "\n instBufferCycles: %g", pStats.instBufferCycles];
PutF1I[out, ", returnInterlockCycles: %g", pStats.returnInterlockCycles];
PutF1I[out, "\n lookaheadProbes: %g", pStats.lookaheadProbes];
PutF1I[out, ", lookaheadRejectCycles: %g", pStats.lookaheadRejects];
IF testAbort # NIL AND testAbort[] THEN GO TO requestStopPrint;
Dump the cache statistics
IO.PutRope[out, "\n\nIFU cache - "];
DumpCache[out, processor.ifuCache];
IO.PutRope[out, "\nEU cache - "];
DumpCache[out, processor.euCache];
IO.PutRope[out, "\n"];
Dump the instruction statistics
ShowInstStats[out, instStats, testAbort];
IO.PutRope[out, "\n"];
EXITS requestStopPrint => {
IO.PutRope[out, "\nPrinting stopped by request.\n\n"];
};
};
ShowBiasedStats: PUBLIC PROC [out: STREAM, old, new: StatSnapshot, testAbort:TestAbort] = {
instOut: NAT ← 0;
cycles: INT ← new.pStats.cycles - old.pStats.cycles;
insts: INT ← new.pStats.instructions - old.pStats.instructions;
bytes: INT ← new.pStats.instBytes - old.pStats.instBytes;
ifuRejects: INT ← new.ifuStats.rejectCycles - old.ifuStats.rejectCycles;
euRejects: INT ← new.euStats.rejectCycles - old.euStats.rejectCycles;
rejects: INT ← ifuRejects + euRejects;
euFetches: INT ← new.pStats.euFetches - old.pStats.euFetches;
euStores: INT ← new.pStats.euStores - old.pStats.euStores;
instructionsSoFar: INT ← 0;
invTotal: REALIF insts > 0 THEN 100.0 / insts ELSE 0.0;
discarded: INT ← new.discarded - old.discarded;
forcedEmpty: INT ← new.forcedEmpty - old.forcedEmpty;
Dump the processor statistics
PutF1I[out, "instructions: %g", insts];
PutF1I[out, ", cycles: %g", cycles];
IF insts > 0 THEN {
PutReal1[out, "\n bytes/inst: %g", (bytes*1.0)/insts, 2];
PutReal1[out, ", cycles/inst: %g", (cycles*1.0)/insts, 2];
IF rejects > 0 THEN
PutReal1[out, ", cycles/reject: %g", (cycles*1.0)/rejects, 2];
};
PutF1I[out, "\n instBytesUsed: %g", bytes];
PutF1I[out, ", instBytesFlushed: %g", discarded];
PutF1I[out, ", forcedEmpty: %g", forcedEmpty];
PutF1I[out, "\n euFetches: %g", euFetches];
PutF1I[out, ", euStores: %g", euStores];
{
detail of jumps, calls, and predictions
fallThruGood: INT ← new.pStats.fallThruGood - old.pStats.fallThruGood;
jumpGood: INT ← new.pStats.jumpGood - old.pStats.jumpGood;
fallThruBad: INT ← new.pStats.fallThruBad - old.pStats.fallThruBad;
jumpBad: INT ← new.pStats.jumpBad - old.pStats.jumpBad;
goodPredict: INT ← fallThruGood+jumpGood;
badPredict: INT ← fallThruBad+jumpBad;
jumpBackU: INT ← new.pStats.jumpBackU - old.pStats.jumpBackU;
jumpBackG: INT ← new.pStats.jumpBackG - old.pStats.jumpBackG;
jumpBackB: INT ← new.pStats.jumpBackB - old.pStats.jumpBackB;
PutF1I[out, "\n goodPredictions: %g", goodPredict];
PutF1I[out, ", badPredictions: %g", badPredict];
PutF1I[out, ", uncond jumps: %g", new.pStats.jumps - old.pStats.jumps];
PutF1I[out, ", calls: %g", new.pStats.calls - old.pStats.calls];
PutF1I[out, "\n (fallThruGood: %g", fallThruGood];
PutF1I[out, ", jumpGood: %g", jumpGood];
PutF1I[out, ", fallThruBad: %g", fallThruBad];
PutF1I[out, ", jumpBad: %g)", jumpBad];
PutF1I[out, "\n (jumpBack - uncond: %g", jumpBackU];
PutF1I[out, ", good predict: %g", jumpBackG];
PutF1I[out, ", bad predict: %g)", jumpBackB];
};
PutF1I[out, "\n stackOver: %g",
new.pStats.stackOver - old.pStats.stackOver];
PutF1I[out, ", regBusyCycles: %g",
new.pStats.regBusyCycles - old.pStats.regBusyCycles];
PutF1I[out, "\n instBufferCycles: %g",
new.pStats.instBufferCycles - old.pStats.instBufferCycles];
PutF1I[out, ", returnInterlockCycles: %g",
new.pStats.returnInterlockCycles - old.pStats.returnInterlockCycles];
PutF1I[out, "\n lookaheadProbes: %g",
new.pStats.lookaheadProbes - old.pStats.lookaheadProbes];
PutF1I[out, ", lookaheadRejectCycles: %g",
new.pStats.lookaheadRejects - old.pStats.lookaheadRejects];
IF testAbort # NIL AND testAbort[] THEN GO TO requestStopPrint;
Dump the cache statistics
IO.PutRope[out, "\n\nIFU cache - "];
ShowBiasedCache[out, old.ifuStats, new.ifuStats];
IO.PutRope[out, "\nEU cache - "];
ShowBiasedCache[out, old.euStats, new.euStats];
IO.PutRope[out, "\n"];
Dump the instruction statistics
{
biasedInstTable: InstCountTable ← NEW[InstCountTableRep];
FOR inst: Inst IN Inst DO
biasedInstTable[inst] ← new.iStats[inst] - old.iStats[inst];
ENDLOOP;
ShowInstStats[out, biasedInstTable, testAbort];
};
IO.PutRope[out, "\n"];
EXITS requestStopPrint => {
IO.PutRope[out, "\nPrinting stopped by request.\n\n"];
};
};
Internal procedures
PutAsBoth: PROC [out: STREAM, format: ROPE, word: Word] = {
IO.PutF[out, format,
[cardinal[DragOpsCrossUtils.WordToCard[word]]],
[integer[DragOpsCrossUtils.WordToInt[word]]]];
};
PutAsCard: PROC [out: STREAM, format: ROPE, word: Word] = {
IO.PutF1[out, format, [cardinal[DragOpsCrossUtils.WordToCard[word]]]];
};
PutF1I: PROC [out: STREAM, format: ROPE, int: INT] = {
IO.PutF1[out, format, [integer[int]]];
};
PutReal1: PROC [out: STREAM, format: ROPE, real: REAL, fractionPlaces: [0..4] ← 1] = {
assumes that %g is used in the format
fact: NATSELECT fractionPlaces FROM
0 => 1, 1 => 10, 2 => 100, 3 => 1000, 4 => 10000, ENDCASE => ERROR;
fix: INT ← Real.Round[real*fact];
fract: NAT ← fix MOD fact;
form: ROPE ← "%g.%g";
fix ← fix / fact;
SELECT fractionPlaces FROM
0 => {PutF1I[out, format, fix]; RETURN};
1 => form ← "%g.%01g";
2 => form ← "%g.%02g";
3 => form ← "%g.%03g";
4 => form ← "%g.%04g";
ENDCASE => ERROR;
IO.PutF1[out, format, [rope[IO.PutFR[form, [integer[fix]], [integer[fract]] ]]]];
};
DumpCache: PROC [out: STREAM, cache: LizardCache.CacheBase] = {
probes: INT ← cache.stats.probes;
PutF1I[out, "probes: %g", probes];
PutF1I[out, ", misses: %g", cache.stats.misses];
PutF1I[out, ", mapMisses: %g", cache.stats.mapMisses];
PutF1I[out, ", dirtyWrites: %g", cache.stats.dirtyWrites];
PutF1I[out, ", rejectCycles: %g", cache.stats.rejectCycles];
IF probes > 0 THEN {
PutReal1[out, "\n miss rate: %%%g", (cache.stats.misses*100.0)/probes, 2];
PutReal1[out, ", reject cycles/probe: %g", (cache.stats.rejectCycles*1.0)/probes, 2];
};
};
GetInstAndRest: PROC [mem: SparseMemory.Base, pc: Word] RETURNS [inst: Inst, rest: Word] = {
A slow but simple routine to fetch instructions and the rest given the PC.
lastAddr: Word ← ZerosWord;
bytes: DragOpsCross.FourBytes ← LOOPHOLE[ZerosWord];
instLen: NAT;
word: Word;
FetchByte: PROC [offset: NAT] RETURNS [Byte] = {
wordAddr: Word;
byteIndex: [0..3];
[wordAddr, byteIndex] ←
DragOpsCrossUtils.BytePCToWordAddress[[DragOpsCrossUtils.AddDelta[offset, pc]]];
IF lastAddr # wordAddr THEN
word ← SparseMemory.Fetch[mem, lastAddr ← wordAddr];
RETURN [LOOPHOLE[word, DragOpsCross.FourBytes][byteIndex]];
};
inst ← LOOPHOLE[FetchByte[0]];
instLen ← LOOPHOLE[inst, CARDINAL] / 64;
IF instLen = 0 THEN instLen ← IF LOOPHOLE[inst, CARDINAL] < 40B THEN 1 ELSE 5;
SELECT instLen FROM
2 => bytes[3] ← FetchByte[1];
3 => {
bytes[2] ← FetchByte[1];
bytes[3] ← FetchByte[2];
};
5 => {
bytes[0] ← FetchByte[1];
bytes[1] ← FetchByte[2];
bytes[2] ← FetchByte[3];
bytes[3] ← FetchByte[4];
};
ENDCASE;
RETURN [inst, LOOPHOLE[bytes]];
};
ShowInstStats: PROC [out: IO.STREAM, instStats: InstCountTable, testAbort: TestAbort] = {
SortInstStats: PROC [instStats: InstCountTable] RETURNS [LocalInstStats] = {
nonZero: NAT ← 0;
zeroEntry: LocalInstStatsEntry ← [VAL[0], 0];
local: LocalInstStats ← NEW[LocalInstStatsRep ← ALL[zeroEntry] ];
FOR inst: Inst IN Inst DO
count: INT = instStats[inst];
IF count # 0 THEN {
j: NAT ← nonZero;
FOR i: NAT DECREASING IN [0..nonZero) DO
IF local[VAL[i]].count < count THEN {
local[VAL[i+1]] ← local[VAL[i]];
j ← i;
};
ENDLOOP;
local[VAL[j]] ← [inst, count];
nonZero ← nonZero + 1;
};
ENDLOOP;
RETURN [local];
};
instNameArray: HandCodingUtil.NameArray = HandCodingUtil.GetInstArray[];
total: INT ← 0;
invTotal: REAL;
instructionsSoFar: INT ← 0;
instOut: NAT ← 0;
local: LocalInstStats ← SortInstStats[instStats];
FOR inst: Inst IN Inst DO
total ← total + instStats[inst];
ENDLOOP;
invTotal ← IF total > 0 THEN 100.0 / total ELSE 0.0;
FOR inst: Inst IN Inst DO
entry: LocalInstStatsEntry ← local[inst];
count: INT = entry.count;
cumPerc: REAL ← (instructionsSoFar ← instructionsSoFar + count) * invTotal;
cumPercRope: ROPE ← Rope.Substr[IO.PutFR1["%f4.1", [real[cumPerc]] ], 0, 4];
IF testAbort # NIL AND testAbort[] THEN GO TO requestStopPrint;
IF count = 0 THEN EXIT;
IF instOut MOD 4 = 0 THEN IO.PutRope[out, "\n"];
instOut ← instOut + 1;
IO.PutF[out, "%7g (%4g%%): %-7g",
[integer[count]], [rope[cumPercRope]], [rope[instNameArray[entry.inst]]] ];
ENDLOOP;
EXITS
requestStopPrint => {
IO.PutRope[out, "\nPrinting stopped by request.\n\n"];
};
};
ShowBiasedCache: PROC [out: IO.STREAM, old, new: LizardCache.CacheStats] = {
biasedCache: LizardCache.CacheBase ← NEW[LizardCache.CacheBaseRep];
biasedCache.stats.probes ← new.probes - old.probes;
biasedCache.stats.misses ← new.misses - old.misses;
biasedCache.stats.mapMisses ← new.mapMisses - old.mapMisses;
biasedCache.stats.dirtyWrites ← new.dirtyWrites - old.dirtyWrites;
biasedCache.stats.rejectCycles ← new.rejectCycles - old.rejectCycles;
DumpCache[out, biasedCache];
};
END.