-- File [Ivy]<Nelson>Lupine>LupineExerciserTimerImpl.mesa.
-- Last edited by BZM on 14-Mar-82 13:24:24.


DIRECTORY
  CWF USING [FWF0, FWF1, FWF2, FWFCR],
  Format USING [LongNumber, StringProc],
  Heap USING [systemZone],
  LupineExerciser USING [Counter, String],
  LupineExerciserPrivate USING [
	CallSpy, CheckAbort, ExerciseHandle, PrecisionTime,
	RingBufferIndex, SortBuffer, SortBufferIndex, SystemTime ],
  Mopcodes USING [zMISC],
  Process USING [Yield],
  System USING [
	GetClockPulses, GreenwichMeanTime,
	Microseconds, Pulses, PulsesToMicroseconds],
  Time USING [Current];


LupineExerciserTimerImpl: PROGRAM
  IMPORTS
	CWF, Format, Heap, Private: LupineExerciserPrivate,
	Process, System, Time
 EXPORTS LupineExerciser, LupineExerciserPrivate
 = BEGIN OPEN LupineExerciser;


-- Private types.

  SystemTime: TYPE = Private.SystemTime;
  PrecisionTime: TYPE = Private.PrecisionTime;
  RingBufferIndex: TYPE = Private.RingBufferIndex;


-- Exerciser instance data.

  Handle: PUBLIC TYPE = Private.ExerciseHandle;


--  Primary time logging routines that use Pilot's timer.

  Start: PUBLIC PROC [self: Handle, name: String] =
    BEGIN OPEN tp: self.tp;
    Private.CheckAbort[self];
    CWF.FWF2[self.logPut,
      "*N*NTiming %D trials of %LS"L, @tp.trialsPerPass, name];
    TimeStamp[self, " at "L];
    PreparePrecisionTimings[self];
    IF tp.spying THEN Private.CallSpy[self, startSpying];
    StartTimer[self: self, reset: TRUE];
    END;
    
  Stamp: PUBLIC PROC [self: Handle] = 
    BEGIN
    StopTimer[self];
    CWF.FWF1[self.logPut, "Doing %D "L, @self.tp.testsPerTrial];
    TimeStamp[self, " calls at "L];
    StartTimer[self];
    END;

  Delay: PUBLIC PROC [self: Handle] =
    BEGIN
    StopTimer[self];
    Private.CheckAbort[self];
    DoDelay[self, 600];
    StartTimer[self];
    END;

  Stop: PUBLIC PROC [self: Handle] =
    BEGIN OPEN tp: self.tp;
    ms, tenths: LONG INTEGER;
    StopTimer[self];
    IF tp.spying THEN Private.CallSpy[self, stopSpying];
    Private.CheckAbort[self];
    IF tp.trialsPerPass = 0 OR tp.testsPerTrial = 0
      THEN ms ← tenths ← 0
      ELSE [ms, tenths] ← ConvertTimer[
	ReadTimer[self].elapsed /
	(tp.trialsPerPass*LONG[tp.testsPerTrial]) ];
    CWF.FWF2[self.logPut,
      "The mean call time (overhead included) was %LD.%LD"L,
      @ms, @tenths ];
    TimeStamp[self, " ms at "L];
    PrintPrecisionTimings[self];
    IF tp.spying AND tp.showSpyData=afterEachTest
      THEN Private.CallSpy[self, displayStats];
    END;

  TimeStamp: PROC [self: Handle, herald: String] =
    BEGIN
    time: System.GreenwichMeanTime ← Time.Current[];
    Private.CheckAbort[self];
    CWF.FWF2[self.logPut, "%LS%LT.*N"L, herald, @time];
    END;


-- Pilot Timing Routines.

  Microseconds: TYPE = LONG CARDINAL;

  ReadPilotClock: PROC RETURNS [SystemTime] =
    INLINE {RETURN[ LOOPHOLE[System.GetClockPulses[]] ]};

  PilotPrecisionRead: PROC RETURNS [--SystemTime-- PrecisionTime] =
    INLINE { RETURN[ ReadPilotClock[] ] };

  PilotPrecisionConvert: PROC [pt: --SystemTime-- PrecisionTime]
      RETURNS [--microseconds:-- Microseconds] =
    INLINE {RETURN[ SystemTimeToUsec[pt] ]};



  StartTimer: PROC [self: Handle, reset: BOOLEAN←FALSE] = 
    INLINE BEGIN OPEN time: self.timer;
    IF reset THEN time.elapsed ← 0;
    time.start ← ReadPilotClock[];
    END;

  StopTimer: PROC [self: Handle] =
    INLINE BEGIN OPEN time: self.timer;
    time.event ← ReadPilotClock[] - time.start;
    time.elapsed ← time.elapsed + time.event;
    END;

  ReadTimer: PROC [self: Handle] RETURNS [event, elapsed: SystemTime] =
    INLINE BEGIN OPEN time: self.timer;
    RETURN[time.event, time.elapsed]
    END;

  ConvertTimer: PROC [time: SystemTime] RETURNS [ms,tenthsOfMs: LONG INTEGER] =
    INLINE BEGIN
    time ← (SystemTimeToUsec[time]+50)/100;
    ms ← time/10;
    tenthsOfMs ← time MOD 10;
    END;

  SystemTimeToUsec: PROC [time: SystemTime] RETURNS [Microseconds] =
    INLINE BEGIN
    RETURN[ System.PulsesToMicroseconds[System.Pulses[time]] ]
    END;


  DoDelay: PROC [self: Handle, interval: --Microseconds-- LONG INTEGER] =
    -- Spin at least "interval" between executions.  There is a startup glitch.
    BEGIN
    LastIntegerTime: SystemTime = LAST[LONG INTEGER];
    DO
      grossWait: SystemTime =
        SystemTimeToUsec[ReadPilotClock[]-self.timer.lastDelayFinished];
      waitTime: LONG INTEGER =
        IF grossWait < LastIntegerTime THEN grossWait ELSE LastIntegerTime;
      SELECT interval-waitTime FROM
	< 0   => EXIT;
	< 50 => Process.Yield;  --NULL;
	ENDCASE => Process.Yield;
      ENDLOOP;
    self.timer.lastDelayFinished ← ReadPilotClock[];
    END;


--  Individual call high precision timing routines.


  StartPrecisionTiming: PUBLIC PROC [self: Handle] =
    --INLINE-- BEGIN OPEN timer: self.precisionTimer;
    timer.start ← PrecisionRead[self];
    END;

  StopPrecisionTiming: PUBLIC PROC [self: Handle] =
    --INLINE-- BEGIN OPEN timer: self.precisionTimer;
    stopTime: PrecisionTime = PrecisionRead[self];
    timer.ringBuffer[
      (timer.ringIndex ← (timer.ringIndex+1) MOD LENGTH[timer.ringBuffer]) ] 
	  ← (stopTime - timer.start);
    END;

  PrecisionRead: PROC [self: Handle] RETURNS [--pt:-- PrecisionTime] =
    INLINE BEGIN
    RETURN[IF self.tp.useDoradoClock
	     THEN DoradoPrecisionRead[self] ELSE PilotPrecisionRead[] ];
    END;

  PrecisionConvert: PROC [self: Handle, pt: PrecisionTime]
      RETURNS [--microseconds:-- Microseconds] =
    INLINE BEGIN
    RETURN[IF self.tp.useDoradoClock
	     THEN DoradoPrecisionConvert[pt] ELSE PilotPrecisionConvert[pt] ];
    END;


--  Precision timing routines.

  InitPrecisionTimings: PUBLIC PROCEDURE [self: Handle] =
    BEGIN OPEN timer: self.precisionTimer;
    timer.sortBuffer ← Heap.systemZone.NEW[
      Private.SortBuffer[self.tp.testsPerTrial+2] ];
    timer.ringBuffer ← DESCRIPTOR [ 
      @timer.sortBuffer[FIRST[Private.SortBufferIndex]+1],
      LENGTH[DESCRIPTOR[timer.sortBuffer↑]]-2 ];
    END;

  FinishPrecisionTimings: PUBLIC PROCEDURE [self: Handle] =
    BEGIN OPEN timer: self.precisionTimer;
    Heap.systemZone.FREE[@timer.sortBuffer];
    timer.sortBuffer ← NIL;
    timer.ringBuffer ← NIL;
    END;

  PreparePrecisionTimings: PROC [self: Handle] =
    INLINE BEGIN OPEN timer: self.precisionTimer;
    IF BASE[timer.ringBuffer] = NIL THEN ERROR;
    timer.ringIndex ← PRED[LENGTH[timer.ringBuffer]];
    --FOR i: RingBufferIndex IN [0..LENGTH[timer.ringBuffer]) DO
    --  timer.ringBuffer[i] ← 0 ENDLOOP;
    --IF LENGTH[timer.ringBuffer] > 0 THEN BEGIN
    --  timer.ringBuffer[0] ← 0;
    --  Inline.LongCopy[
    --    from: @timer.ringBuffer[0],
    --    to: @timer.ringBuffer[1],
    --    nwords: LENGTH[timer.ringBuffer]*SIZE[PrecisionTime];
    --  END;
    IF self.tp.useDoradoClock THEN StartDoradoCounters[EmuCycles];
    END;

  PrintPrecisionTimings: PROC [self: Handle] =
    BEGIN OPEN timer: self.precisionTimer;
    ringBufferLength: CARDINAL ← LENGTH[timer.ringBuffer];
    Range: INTEGER = MIN[10, ringBufferLength];
    PrintRange: PROC [herald: String, startIndex: RingBufferIndex] =
      BEGIN
      CWF.FWF1[self.logPut, "%LS"L, herald];
      FOR i: RingBufferIndex IN [startIndex..startIndex+Range) DO
	  IF timer.ringBuffer[i] > 99999
	    THEN CWF.FWF0[self.logPut, " **********"L]
	    ELSE BEGIN
	      LongStringProc: Format.StringProc =
		{CWF.FWF1[self.logPut, "%LS"L, s]};
	      Format.LongNumber[
		proc: LongStringProc,
		n: timer.ringBuffer[i],
		format: [base: 10, zerofill: FALSE, unsigned: TRUE, columns: 6] ];
	      END;
	  ENDLOOP;
      CWF.FWFCR[self.logPut];
      END;  -- PrintRange.
    FOR i: RingBufferIndex IN [FIRST[RingBufferIndex]..ringBufferLength) DO
      timer.ringBuffer[i] ← PrecisionConvert[self, timer.ringBuffer[i]];
      ENDLOOP;
    QuickSort[DESCRIPTOR[timer.sortBuffer↑]];
    CWF.FWF1[self.logPut, 
      "Some instantaneous call times (microseconds) from a %D element ring buffer:*N"L,
      @ringBufferLength ];
    PrintRange["  Fastest times:"L, FIRST[RingBufferIndex]];
    PrintRange["  Median times: "L,
      FIRST[RingBufferIndex] + (ringBufferLength-Range)/2 ];
    PrintRange["  Slowest times:"L,
      FIRST[RingBufferIndex]+ringBufferLength-Range];
    END;

  
  -- Dorado 64ns precision counter routines.
  -- Modified from [Ivy]<McDaniel>Measurements>Measure.mesa.

  doradoCounters: CounterVector ← ZeroCounterVector;

  DoradoPrecisionRead: PROC [self: Handle] RETURNS [PrecisionTime] =
    INLINE BEGIN
    ReadDoradoCounters[@doradoCounters];
    RETURN[IF self.tp.countOnlyEmulatorCycles
	     THEN doradoCounters.A.low ELSE doradoCounters.B.low ];
    END;

  DoradoPrecisionConvert: PROC [pt: PrecisionTime]
      RETURNS [--microseconds:-- Microseconds] =
    INLINE BEGIN
    -- Measured clock time was 2*32.2 ns => 644/10000 = 161/2500 usec/cycle
    -- Compute slowly to avoid overflow and do rounding.
    RETURN[
	  (LONG[161]*(pt/LONG[2500]))
	+ ( LONG[161]*(pt MOD LONG[2500])+LONG[2500/2] )/LONG[2500] ];
    END;

  StartDoradoCounters: PROCEDURE [CounterDescriptor] =
    MACHINE CODE BEGIN Mopcodes.zMISC, 240B END;

  ReadDoradoCounters: PROCEDURE [CounterVectorPtr] =
    MACHINE CODE BEGIN Mopcodes.zMISC, 241B END;

  StopDoradoCounters: PROCEDURE =
    MACHINE CODE BEGIN Mopcodes.zMISC, 242B END;

  CounterVector: TYPE = MACHINE DEPENDENT RECORD [
    A(0): MACHINE DEPENDENT RECORD [low(0): LONG CARDINAL, high(2): CARDINAL],
    B(3): MACHINE DEPENDENT RECORD [low(0): LONG CARDINAL, high(2): CARDINAL] ];

  ZeroCounterVector: CounterVector = [[0,0],[0,0]];

  CounterVectorPtr: TYPE = LONG POINTER TO CounterVector;

  ASelection: TYPE = MACHINE DEPENDENT {
    true, hold, procRef, ifuJump, miss, bpa, bpc, bpe };

  BSelection: TYPE = MACHINE DEPENDENT {
    true, hold, ifuRef, ifuNrdy, miss, bpb, bpc, bpd };

  allA, allB, doA, doB: BOOLEAN = TRUE;
  emuOnlyA, emuOnlyB, notIS: BOOLEAN = FALSE;

  CounterDescriptor: TYPE = MACHINE DEPENDENT RECORD  [
    instrSet: BOOLEAN,
    ignoreBits123: [0..7],
    enableA, enableB: BOOLEAN,
    ignoreBits67: [0..3],
    aSelects: ASelection,
    allTasksA: BOOLEAN,
    bSelects: BSelection,
    allTasksB: BOOLEAN  ];

  EmuCycles: CounterDescriptor = [notIS,0,doA,doB,0,true,emuOnlyA,true,allB];


-- This Quicksort routine is from Sedgewick's thesis.
-- It is a transliteration of [Maxc2]<Guibas>STSRT.SAI.

  QuickSort: PROC [table: LONG DESCRIPTOR FOR ARRAY OF PrecisionTime] =
    BEGIN
    first: INTEGER = 0;
    last: INTEGER = LENGTH[table]-1;
    M: INTEGER = 9;
    PP, L, R, I, J: INTEGER;
    V, TK: PrecisionTime;
    STACK: ARRAY [0..20) OF INTEGER;
    --Originally: STACK: ARRAY [0:2*(LOG(256/(M+2)) DIV 1)+1] OF INTEGER;

    table[first] ← FIRST[PrecisionTime];  table[last] ← LAST[PrecisionTime];
    PP ← 0; L ← first+1; R ← last-1;

    WHILE PP >=0 DO
      I ← L; J ← R+1; V ← table[L];
      WHILE I < J DO
	 I←I+1; WHILE table[I]<V DO I ← I+1 ENDLOOP;
	 J←J-1; WHILE table[J]>V DO J ← J-1 ENDLOOP;
	 TK←table[J]; table[J]←table[I]; table[I]←TK;
	 ENDLOOP;
      table[I]←table[J]; table[J]←table[L]; table[L]←TK;
      SELECT TRUE FROM
	 R-J > J-L =>
	  SELECT TRUE FROM
	     R-J <= M => NULL;
	     J-L <= M => {L←J+1; LOOP};
	     ENDCASE => BEGIN
	       PP←PP+2; STACK[PP]←J+1; STACK[PP+1]←R; R←J-1;
	       LOOP;
	       END;
	 J-L <= M => NULL;
	 R-J <= M => {R←J-1; LOOP};
	 ENDCASE => BEGIN
	   PP ← PP+2; STACK[PP]←L; STACK[PP+1]←J-1; L←J+1;
	   LOOP;
	   END;
      L←STACK[PP];
      R←STACK[PP+1];
      PP←PP-2;
      REPEAT
	FINISHED =>
	  FOR I IN [2..last) DO
	    V←table[I]; J←I-1;
	    WHILE table[J]>V DO table[J+1]←table[J]; J←J-1 ENDLOOP;
	    table[J+1]←V;
	    ENDLOOP;
      ENDLOOP;
    END;



-- Module Initialization.


  END.  -- LupineExerciserTimerImpl.