-- ModelScannerImpl.Mesa
-- derived from file Scanner.Mesa
-- Pilot 6.0/ Mesa 7.0
-- last modified by Satterthwaite, January 31, 1983 10:33 am
-- last modified by Schmidt,  5-Jan-82 18:16:13

DIRECTORY
  Ascii: TYPE USING [ControlZ, CR, NUL, TAB],
  CharIO: TYPE USING [Handle, PutChar, PutNumber, PutString],
  Environment: TYPE USING [charsPerWord, maxCARDINAL, wordsPerPage],
  FileStream: TYPE USING [EndOf, GetIndex, IndexOutOfRange, SetIndex],
  LongString: TYPE USING [AppendSubString, SubStringDescriptor],
  ModelParseTable: TYPE USING [
    endMarker, HashIndex, HashTableRef, IndexTableRef, ScanTableRef, 
    TableRef, tokenFILENAME, tokenID, tokenNUM, tokenSTR, VocabularyRef],
  P1: FROM "modelparsedefs" USING [
    AcquireStream, nullValue, ReleaseStream, Token, Value],
  Stream: TYPE USING [Delete, GetBlock, GetChar, Handle],
  Subr: TYPE USING [AllocateString, CopyString, FreeString, LongZone, strcpy];

ModelScannerImpl: PROGRAM
    IMPORTS
      CharIO, FileStream, LongString, P1, Stream, Subr 
    EXPORTS P1 = {
  OPEN ModelParseTable;

  TextPages: CARDINAL = 6;
  TextWords: CARDINAL = TextPages * Environment.wordsPerPage;
  TextChars: CARDINAL = TextWords * Environment.charsPerWord;
  
  LV: TYPE = LONG POINTER TO LVRecord;
  LVRecord: TYPE = RECORD[
  	stream: Stream.Handle ← NIL,	-- the input stream
	streamOrigin: LONG CARDINAL ← 0,
    	tB: LONG POINTER TO TBObject ← NIL,
  	tI: [0..TextChars] ← 0,
	tMax: [0..TextChars] ← 0,
  	tOrigin: CARDINAL ← 0, 
	tLimit: CARDINAL ← 0,
  	tEnded: BOOL ← FALSE,
  	char: CHAR ← '\000,	-- current (most recently scanned) character
 	nTokens: CARDINAL ← 0,			-- token count
  	nErrors: CARDINAL ← 0			-- # errors
	];
  TBObject: TYPE = PACKED ARRAY [0..TextChars) OF CHAR;

  NLEVELS: CARDINAL = 5;
  LevSeq: TYPE = LONG POINTER TO LevSeqRecord;
  LevSeqRecord: TYPE = RECORD[
  	size: CARDINAL ← 0,
	body: SEQUENCE maxsize: CARDINAL OF LVRecord
	];
	
-- !! MDS USAGE !!
  hashTab: HashTableRef;
  scanTab: ScanTableRef;
  vocab: VocabularyRef;
  vocabIndex: IndexTableRef;

  buffer: LONG STRING ← NIL;			-- token assembly area
  iMax: CARDINAL;			-- iMax = buffer.maxlength
  desc: LongString.SubStringDescriptor;	-- initial buffer segment

  qDot: BOOL;		-- used to resolved decimal point vs. interval
  levseq: LevSeq ← NIL;
 -- endof MDS usage


  FillBuffer: PROC = {
    lv: LV~@levseq[levseq.size-1];
    lv.tOrigin ← lv.tLimit;
    IF lv.tEnded
      THEN lv.tMax ← 0
      ELSE {
	[bytesTransferred: lv.tMax] ← Stream.GetBlock[lv.stream, [lv.tB, 0, TextChars]];
	IF lv.tMax < TextChars THEN lv.tEnded ← TRUE;
	lv.tLimit ← lv.tOrigin + lv.tMax;
	};
    IF lv.tMax = 0 THEN {lv.tB[0] ← Ascii.NUL; lv.tMax ← 1};
    lv.tI ← 0
    };


  BufferOverflow: ERROR = CODE;

  ExpandBuffer: PROC = {
    oldBuffer: LONG STRING ← buffer;
    IF oldBuffer.length > 2000 THEN ERROR BufferOverflow;
    buffer ← Subr.AllocateString[2*oldBuffer.length];
    Subr.strcpy[buffer, oldBuffer];
    iMax ← buffer.length ← buffer.maxlength;
    Subr.FreeString[oldBuffer];
    desc.base ← buffer
    };


  NextChar: PROC = INLINE {
    lv: LV~@levseq[levseq.size-1];
    IF (lv.tI←lv.tI+1) = lv.tMax THEN FillBuffer[]; lv.char ← lv.tB[lv.tI]
    };


  Atom: PUBLIC PROC RETURNS [token: P1.Token] = {
    OPEN token;
    lv: LV ← NIL;
    IF levseq.size = 0 THEN ERROR;
    lv ← @levseq[levseq.size-1];
    DO
      WHILE lv.char IN [Ascii.NUL..' ] DO
	SELECT lv.char FROM
	  Ascii.NUL => {		-- ↑@↑@ is Tioga escape seq
	    IF (lv.tI←lv.tI+1) = lv.tMax THEN {
			IF lv.tEnded THEN GO TO EndFile;
			FillBuffer[]};
	    lv.char ← lv.tB[lv.tI];
	    IF lv.char = Ascii.NUL THEN GO TO EndFile};
	  Ascii.ControlZ =>
	    UNTIL lv.char = Ascii.CR DO
	      IF (lv.tI←lv.tI+1) = lv.tMax THEN {
			IF lv.tEnded THEN GO TO EndFile; 
			FillBuffer[];
			};
	      lv.char ← lv.tB[lv.tI];
	      ENDLOOP;
	  ENDCASE;
	IF (lv.tI←lv.tI+1) = lv.tMax THEN {
		IF lv.tEnded THEN GO TO EndFile; 
		FillBuffer[];
		};
	lv.char ← lv.tB[lv.tI]
	ENDLOOP;
      index ← lv.tOrigin + lv.tI;  value ← P1.nullValue;
      SELECT lv.char FROM

	'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm,
	'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z  => {
	  i: CARDINAL ← 0;
	  str: STRING ← [40];
	  DO
	    buffer[i] ← lv.char;
	    NextChar[];
	    SELECT lv.char FROM
	      IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>
		IF (i ← i+1) >= iMax THEN ExpandBuffer[];
	      ENDCASE => EXIT;
	    ENDLOOP;
	  desc.length ← i+1;
	  class ← tokenID;  -- value.r ← SymbolOps.EnterString[@desc];
	  str.length ← 0;
	  LongString.AppendSubString[str,@desc];
	  value ← [ref[Subr.CopyString[str]]];
	  GO TO GotNext};

	'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M,
	'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z  => {
	  i: CARDINAL ← 0;
	  str: STRING ← [40];
	  uId: BOOL ← TRUE;
	  first, last: CARDINAL ← lv.char-0C;
	  DO
	    buffer[i] ← lv.char;
	    NextChar[];
	    SELECT lv.char FROM
	      IN ['A..'Z] => {
		last ← lv.char-0C; IF (i ← i+1) >= iMax THEN ExpandBuffer[]};
	      IN ['a..'z], IN ['0..'9] => {
		uId ← FALSE; IF (i ← i+1) >= iMax THEN ExpandBuffer[]};
	      ENDCASE => EXIT;
	    ENDLOOP;
	  i ← i+1;
	  IF uId
	    THEN {
	      h: HashIndex ← ((first*128-first) + last) MOD LAST[HashIndex] + 1;
	      j, s1, s2: CARDINAL;
	      WHILE (j ← hashTab[h].symbol) # 0 DO
		IF vocabIndex[j]-(s2←vocabIndex[j-1]) = i
		  THEN
		    FOR s1 IN [0 .. i) DO
		      IF buffer[s1] # vocab.text[s2] THEN EXIT;
		      s2 ← s2+1;
		      REPEAT
			FINISHED => {class ← j; GO TO GotNext};
		      ENDLOOP;
		IF (h ← hashTab[h].link) = 0 THEN EXIT;
		ENDLOOP};
	  desc.length ← i;
	  class ← tokenID;  -- value.r ← SymbolOps.EnterString[@desc];
	  str.length ← 0;
	  LongString.AppendSubString[str,@desc];
	  value ← [ref[Subr.CopyString[str]]];
	  GO TO GotNext};

	IN ['0..'9]  => {
	  v: LONG CARDINAL;
	  str: STRING ← [20];
	  valid: BOOL;
	  v10, v8: LONG CARDINAL ← 0;
	  valid10, valid8: BOOL ← TRUE;
	  exp: INTEGER ← 0;
	  point: BOOL ← FALSE;
	  MaxWord: CARDINAL = Environment.maxCARDINAL;
	  WHILE lv.char IN ['0..'9] DO
	    IF valid10 THEN [v10, valid10] ← AppendDigit10[v10, lv.char];
	    IF valid8 THEN [v8, valid8] ← AppendDigit8[v8, lv.char];
	    NextChar[];
	    ENDLOOP;
	  IF lv.char = '. THEN {
	    NextChar[];
	    IF lv.char = '.
	      THEN qDot ← TRUE
	      ELSE {point ← TRUE; [v10, exp, valid10] ← ScanFraction[v10, valid10]}};
	  SELECT lv.char FROM
	    'b, 'B => {
	      NextChar[];
	      v ← v8;  [exp, valid] ← ScanScaleFactor[valid8 AND ~point];
	      THROUGH [1 .. exp]  WHILE valid DO
		[v, valid] ← AppendDigit8[v, '0] ENDLOOP;
	      class ← tokenNUM};
	    'd, 'D => {
	      NextChar[];
	      v ← v10;  [exp, valid] ← ScanScaleFactor[valid10 AND ~point];
	      THROUGH [1 .. exp]  WHILE valid DO
		[v, valid] ← AppendDigit10[v, '0] ENDLOOP;
	      class ← tokenNUM};
	    ENDCASE => {
	      v ← v10; valid ← valid10; class ← IF point THEN tokenNUM ELSE tokenNUM};
	    IF v > MaxWord
		THEN value ← EnterLongLit[v]
		ELSE {class ← tokenNUM; value ← EnterLit[v]};
	  IF ~valid THEN ScanError[number, index];
          GO TO GotNext};

	'@ => {
		i: CARDINAL;
		str: STRING ← [100];
		rbseen: BOOL ← FALSE;	-- avoid parsing too far if ]
		i ← 0;
		DO 
			buffer[i] ← lv.char;
			NextChar[];
			-- check for [ after @
			IF i = 0 AND lv.char ~= '[ THEN rbseen ← TRUE;
			SELECT lv.char FROM
			IN ['a..'z], IN ['A..'Z], IN ['0..'9], 
			'>, '<, '., '*, '↑, '~ => {
				IF (i ← i+1) >= iMax THEN ExpandBuffer[];
	      			IF lv.char = '< OR lv.char = '> THEN rbseen ← TRUE;
				};
			'[, '] => {
				IF ~rbseen THEN {
					IF (i ← i+1) >= iMax THEN ExpandBuffer[];
					IF lv.char = '] THEN rbseen ← TRUE
					}
				ELSE EXIT;
				};
			ENDCASE => EXIT;
			ENDLOOP;
		desc.length ← i + 1;
		class ← tokenFILENAME;
		str.length ← 0;
		LongString.AppendSubString[str, @desc];
		value ← [ref[Subr.CopyString[str]]];
		GOTO GotNext;
		};
		
	',, ';, ':, '←, '#, '~, '+, '*, '/, '↑, '!,
	'(, '), '[, '], '{, '}  => {
	  class ← scanTab[lv.char]; GO TO GetNext};

	'" => {
	  i: CARDINAL ← 0;
	  str: STRING ← [40];
	  DO 
	    NextChar[];
	    IF lv.tEnded AND lv.tMax = 0 THEN GO TO EOFEnd;  
	    SELECT lv.char FROM
	      '" => {
		NextChar[];
		IF lv.char # '" THEN GO TO QuoteEnd};
	      ENDCASE;
	    IF i >= iMax
	      THEN ExpandBuffer[
		! BufferOverflow => {ScanError[string, index]; i ← 0; CONTINUE}];
	    buffer[i] ← lv.char;  i ← i+1;
	    REPEAT
	      QuoteEnd => NULL;
	      EOFEnd => {ScanError[string, index]; FillBuffer[];  lv.char ← lv.tB[lv.tI]};
	    ENDLOOP;
	  desc.length ← i;
	  str.length ← 0;
	  LongString.AppendSubString[str,@desc];
	  value ← [ref[Subr.CopyString[str]]];
	  class ← tokenSTR; 
	  GO TO GotNext
	  };

	'- => {
	  NextChar[];
	  IF lv.char # '- THEN {class ← scanTab['-]; GO TO GotNext};
	  lv.char ← Ascii.NUL;
	  DO
	    pChar: CHAR = lv.char;
	    IF (lv.tI←lv.tI+1) = lv.tMax THEN {
		IF lv.tEnded THEN GO TO EndFile; 
		FillBuffer[];
		};
	    lv.char ← lv.tB[lv.tI];
	    SELECT lv.char FROM
	      '- => IF pChar = '- THEN EXIT;
	      Ascii.CR => EXIT;
	      ENDCASE;
	    ENDLOOP;
	  NextChar[]};

	'= => {
	  class ← scanTab[lv.char]; GO TO GetNext};

	ENDCASE => {
	  class ← scanTab[lv.char];
	  IF class # 0 THEN GO TO GetNext;
	  NextChar[];
	  ScanError[char, index]};

      REPEAT
	GetNext => {NextChar[];};
	GotNext => NULL;
	EndFile => {
	  FillBuffer[];  lv.char ← lv.tB[lv.tI];
	  class ← endMarker;  index ← lv.tOrigin;  value ← P1.nullValue;
	  };
      ENDLOOP;	-- the loop that gets chars
    lv.nTokens ← lv.nTokens + 1;
    RETURN};


  PushStream: PROC[sh: Stream.Handle] = {
  longzone: UNCOUNTED ZONE ← Subr.LongZone[];
  lvrec: LVRecord ← [];
  IF levseq.size > levseq.maxsize THEN {
  	ERROR;		-- CWF.WF0["Too many levels of nesting.\n"L];
	-- RETURN;
	}
  ELSE {
	lv: LV ← @levseq[levseq.size];
	lv↑ ← lvrec;
	lv.tB ← longzone.NEW[TBObject];
	lv.streamOrigin ← FileStream.GetIndex[sh];
	lv.stream ← sh;
	levseq.size ← levseq.size + 1;
	FillBuffer[];
	lv.char ← lv.tB[lv.tI];
	};
  };
  
	
 -- numerical conversion

  Digit: ARRAY CHAR['0..'9] OF CARDINAL~[0,1,2,3,4,5,6,7,8,9];

  AppendDigit10: PROC [v: LONG CARDINAL, digit: CHAR ['0..'9]]
      RETURNS [newV: LONG CARDINAL, valid: BOOL] = {
    MaxV: LONG CARDINAL~429496729;	-- (2**32-1)/10
    MaxD: CARDINAL~5;			-- (2**32-1) MOD 10
    d: [0..9]~Digit[digit];
    valid ← v < MaxV OR (v = MaxV AND d <= MaxD);
    newV ← 10*v + d;
    RETURN};

  AppendDigit8: PROC [v: LONG CARDINAL, digit: CHAR ['0..'9]]
      RETURNS [newV: LONG CARDINAL, valid: BOOL] = {
    MaxV: LONG CARDINAL~3777777777B;	-- (2**32-1)/8
    MaxD: CARDINAL~7B;		-- (2**32-1) MOD 8
    d: [0..9]~Digit[digit];
    valid ← (d < 8) AND (v < MaxV OR (v = MaxV AND d <= MaxD));
    newV ← 8*v + d;
    RETURN};

  ScanFraction: PROC [v: LONG CARDINAL, valid: BOOL]
      RETURNS [newV: LONG CARDINAL, exp: INTEGER, newValid: BOOL] = {
    newV ← v;  exp ← 0;  newValid ← valid AND levseq[levseq.size - 1].char IN ['0..'9];
    WHILE levseq[levseq.size - 1].char IN ['0..'9] DO
      IF newValid THEN [newV, newValid] ← AppendDigit10[newV, levseq[levseq.size - 1].char];
      exp ← exp-1;
      NextChar[];
      ENDLOOP;
    RETURN};

  ScanExponent: PROC [exp: INTEGER, valid: BOOL]
      RETURNS [newExp: INTEGER, newValid: BOOL] = {
    op: {plus, minus} ← plus;
    scale: INTEGER;
    SELECT levseq[levseq.size - 1].char FROM
      '+ => NextChar[];
      '- => {op ← minus; NextChar[]};
      ENDCASE;
    [scale, newValid] ← ScanScaleFactor[valid AND (levseq[levseq.size - 1].char IN ['0..'9])];
    newExp ← IF op = plus THEN exp + scale ELSE exp - scale;
    RETURN};

  ScanScaleFactor: PROC [valid: BOOL]
      RETURNS [scale: INTEGER, newValid: BOOL] = {
    Max: INTEGER~(32767-9)/10;	-- (2**15-10)/10
    newValid ← valid;  scale ← 0;
    WHILE levseq[levseq.size - 1].char IN ['0..'9] DO
      newValid ← valid AND scale <= Max;
      IF newValid THEN scale ← 10*scale + Digit[levseq[levseq.size - 1].char];
      NextChar[];
      ENDLOOP;
    RETURN};

  EnterLit: PROC [v: LONG CARDINAL] RETURNS [P1.Value] = {
    RETURN [[scalar[v]]]};

  EnterLongLit: PROC [v: LONG CARDINAL] RETURNS [P1.Value] = {
    RETURN [[scalar[v]]]};

  EnterFloating: PROC [v: LONG CARDINAL, exp: INTEGER, valid: BOOL]
      RETURNS [value: P1.Value, newValid: BOOL] = {
    RETURN};

 -- initialization/finalization

  ScanInit: PUBLIC PROC [table: ModelParseTable.TableRef] = {
    lv: LV;
    longzone: UNCOUNTED ZONE ← Subr.LongZone[];
    hashTab ← @table[table.scanTable.hashTab]; 
    scanTab ← @table[table.scanTable.scanTab]; 
    vocab ← LOOPHOLE[@table[table.scanTable.vocabBody]]; 
    vocabIndex ← @table[table.scanTable.vocabIndex];
    IF buffer = NIL THEN buffer ← Subr.AllocateString[256];
    iMax ← buffer.length ← buffer.maxlength;
    desc.base ← buffer;  desc.offset ← 0;
    IF levseq = NIL THEN levseq ← longzone.NEW[LevSeqRecord[NLEVELS]];
    PushStream[P1.AcquireStream[source]];
    qDot ← FALSE;
    lv ← @levseq[levseq.size-1];
    lv.nTokens ← lv.nErrors ← 0
    };

  -- this closes the input stream and frees all the memory
  -- may be called by a nested parser
  ScanReset: PUBLIC PROC RETURNS [nTokens, nErrors: CARDINAL] = {
    longzone: UNCOUNTED ZONE ← Subr.LongZone[];
    IF levseq.size > 0 THEN {
    	lv: LV;
	levseq.size ← levseq.size-1;
	lv ← @levseq[levseq.size];
	nTokens ← lv.nTokens;
    	nErrors ← lv.nErrors;
    	longzone.FREE[@lv.tB];
    	Stream.Delete[lv.stream];
    	lv.stream ← NIL;
    	};
    IF levseq.size = 0 THEN GuaranteeScannerCleanedUp[];
    };

  GuaranteeScannerCleanedUp: PUBLIC PROC = {
  longzone: UNCOUNTED ZONE ← Subr.LongZone[];
  IF buffer # NIL THEN {
	Subr.FreeString[buffer];	-- may give address faults
	buffer ← NIL
	};
  IF levseq ~= NIL THEN longzone.FREE[@levseq];
  };
  
 -- error handling

  StreamIndex: TYPE = LONG CARDINAL;

  ResetScanIndex: PUBLIC PROC [index: CARDINAL] RETURNS [success: BOOL] = {
    lv: LV~@levseq[levseq.size - 1];
    IF index NOT IN [lv.tOrigin .. lv.tLimit)
      THEN {
	page: CARDINAL = index/(Environment.wordsPerPage*Environment.charsPerWord);
	lv.tOrigin ← lv.tLimit ← page*(Environment.wordsPerPage*Environment.charsPerWord);
	lv.tMax ← 0;  lv.tEnded ← FALSE;
	FileStream.SetIndex[lv.stream, lv.streamOrigin + lv.tOrigin];
	FillBuffer[]};
    lv.tI ← index - lv.tOrigin;
    IF lv.tI >= lv.tMax THEN FillBuffer[];  lv.char ← lv.tB[lv.tI];
    RETURN [TRUE]
    };


  ScanError: PROC [code: {number, string, char, atom}, tokenIndex: CARDINAL] = {
    errorStream: Stream.Handle ← P1.AcquireStream[log];
    lv: LV~@levseq[levseq.size-1];
    lv.nErrors ← lv.nErrors + 1;
    ErrorContext[errorStream,
      SELECT code FROM
	number => "invalid number"L,
	string => "string unterminated or too long"L,
	char => "invalid character"L,
	atom => "invalid atom"L,
	ENDCASE => NIL,
      tokenIndex];
    CharIO.PutChar[errorStream, Ascii.CR];
    P1.ReleaseStream[log]
    };


  ErrorContext: PUBLIC PROC [
      to: Stream.Handle, message: LONG STRING, tokenIndex: CARDINAL] = {
    lv: LV~@levseq[IF levseq.size > 0 THEN levseq.size - 1 ELSE 0];
    saveIndex: StreamIndex = FileStream.GetIndex[lv.stream];
    origin: StreamIndex = lv.streamOrigin + tokenIndex;
    start, lineIndex: StreamIndex ← origin;
    char: CHAR;
    n: [1..100];
    skip: BOOL;
    FOR n IN [1..100] UNTIL lineIndex = 0 DO
      skip ← FALSE;
      lineIndex ← lineIndex - 1;
      FileStream.SetIndex[lv.stream, lineIndex
      	! FileStream.IndexOutOfRange => {
		skip ← TRUE;
		CONTINUE;
		}
	];
      IF ~skip AND Stream.GetChar[lv.stream] = Ascii.CR THEN EXIT;
      start ← lineIndex;
      ENDLOOP;
    FileStream.SetIndex[lv.stream, start];
    FOR n IN [1..100] UNTIL FileStream.EndOf[lv.stream] DO
      char ← Stream.GetChar[lv.stream];
      SELECT char FROM
	Ascii.CR, Ascii.ControlZ => EXIT;
	ENDCASE => CharIO.PutChar[to, char];
      ENDLOOP;
    CharIO.PutChar[to, Ascii.CR];
    FileStream.SetIndex[lv.stream, start];
    UNTIL FileStream.GetIndex[lv.stream] = origin OR FileStream.EndOf[lv.stream] DO
      char ← Stream.GetChar[lv.stream];
      CharIO.PutChar[to, IF char = Ascii.TAB THEN Ascii.TAB ELSE ' ];
      ENDLOOP;
    CharIO.PutString[to, "↑ "L];  CharIO.PutString[to, message];  CharIO.PutString[to, " ["L];
    CharIO.PutNumber[to, tokenIndex, [base:10, zerofill:FALSE, unsigned:TRUE, columns:0]];
    CharIO.PutChar[to, ']];   CharIO.PutChar[to, Ascii.CR];
    FileStream.SetIndex[lv.stream, saveIndex]
    };

  }.