-- File PackScanner.mesa
-- Last modified by Satterthwaite on May 30, 1980  11:32 AM
-- Last modified by Schmidt on September 18, 1980  1:59 PM
-- Last modified by Lewis on  2-Apr-81 19:11:27
-- Last modified by Levin and Satterthwaite on July 6, 1982 4:32 pm
-- Derived from Compiler>Scanner.Mesa

DIRECTORY
  CharIO USING [CR, TAB, PutChar, PutNumber, PutString],
  Inline USING [LowHalf],
  PackEnviron USING [CharsPerWord, PageSize],
  P1: FROM "PackParseDefs" USING [Token],
  ParseTable USING [
    HashIndex, TSymbol, VocabHashEntry, TableRef, tokenID, TrueEndOfFile],
  Streams USING [Ended, Handle, GetIndex, GetBlock, SetIndex, GetByte, End],
  LongStorage USING [FreeString, FreePages, Pages, Prune, String],
  Strings USING [AppendString, SubStringDescriptor, String],
  SymTabOps USING [EnterString];

Scanner: PROGRAM
    IMPORTS CharIO, Inline, Streams, LongStorage, Strings, SymTabOps 
    EXPORTS P1 =
  BEGIN
  OPEN ParseTable;

  hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry;
  scanTab: LONG POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol;
  vocab: Strings.String;
  vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL;

  NUL: CHARACTER = 0C;
  CR: CHARACTER = CharIO.CR;
  ControlZ: CHARACTER = 32C;		-- Bravo escape char

  stream: Streams.Handle;	-- the input stream
  streamOrigin: LONG CARDINAL;

  errorStream: Streams.Handle;	-- the error stream

  TextPages: CARDINAL = 6;
  TextWords: CARDINAL = TextPages * PackEnviron.PageSize;
  TextChars: CARDINAL = TextWords * PackEnviron.CharsPerWord;

  tB: LONG POINTER TO PACKED ARRAY [0..TextChars) OF CHARACTER;
  tI, tMax: [0..TextChars];
  tOrigin, tLimit: CARDINAL;
  tEnded: BOOLEAN;

  FillBuffer: PROC = {
    words: [0..TextWords];
    bytes: [0..PackEnviron.CharsPerWord);
    tOrigin ← tLimit;
    IF tEnded
      THEN tMax ← 0
      ELSE {
	words ← Streams.GetBlock[stream, tB, TextWords];
	bytes ← Inline.LowHalf[Streams.GetIndex[stream]]
          MOD PackEnviron.CharsPerWord;
	IF bytes # 0 THEN words ← words-1;
	tMax ← words*PackEnviron.CharsPerWord + bytes;
	IF tMax < TextChars THEN tEnded ← TRUE;
	tLimit ← tOrigin + tMax};
    IF tMax = 0 THEN {tB[0] ← NUL; tMax ← 1};
    tI ← 0};


  buffer: Strings.String ← NIL;		-- token assembly area
  iMax: CARDINAL;			-- iMax = buffer.maxlength
  desc: Strings.SubStringDescriptor;	-- initial buffer segment

  nTokens: CARDINAL;			-- token count
  nErrors: CARDINAL;			-- lexical errors

  BufferOverflow: ERROR = CODE;

  ExpandBuffer: PROC = {
    oldBuffer: Strings.String ← buffer;
    IF oldBuffer.length > 2000 THEN ERROR BufferOverflow;
    buffer ← LongStorage.String[2*oldBuffer.length];
    Strings.AppendString[buffer, oldBuffer];
    iMax ← buffer.length ← buffer.maxlength;
    LongStorage.FreeString[oldBuffer];
    desc.base ← buffer};


  char: CHARACTER;	-- current (most recently scanned) character

  NextChar: PROC = {	-- also expanded inline within Atom
    IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]};


  Atom: PUBLIC PROC RETURNS [token: P1.Token] = {
    OPEN token;
      DO
      WHILE char IN [NUL..' ] 
	DO
	SELECT char FROM
	  ControlZ =>
	    UNTIL char = CR
	      DO
	      IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]};
	      char ← tB[tI];
	      ENDLOOP;
	  ENDCASE;
	IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile; FillBuffer[]};
	char ← tB[tI];
	ENDLOOP;
      index ← tOrigin + tI;  value ← 0;
      SELECT char FROM

	'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm,
	'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z  => {
	  i: CARDINAL;
	  i ← 0;
	    DO
	    buffer[i] ← char;
	    IF (tI←tI+1) = tMax THEN FillBuffer[];
	    char ← tB[tI];
	    SELECT char FROM
	      IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>
		IF (i ← i+1) >= iMax THEN ExpandBuffer[];
	      ENDCASE =>  EXIT;
	    ENDLOOP;
	  desc.length ← i+1;
	  class ← tokenID;  value ← SymTabOps.EnterString[@desc];
	  GO TO GotNext};

	'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M,
	'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z  => {
	  first, last: CARDINAL;
	  uId: BOOLEAN;
	  i, j: CARDINAL;
	  h: HashIndex;
	  s1, s2: CARDINAL;
	  i ← 0;  uId ← TRUE;  first ← last ← char-0C;
	    DO
	    buffer[i] ← char;
	    IF (tI←tI+1) = tMax THEN FillBuffer[];
	    char ← tB[tI];
	    SELECT char FROM
	      IN ['A..'Z] => {
		last ← char-0C; IF (i ← i+1) >= iMax THEN ExpandBuffer[]};
	      IN ['a..'z], IN ['0..'9] => {
		uId ← FALSE; IF (i ← i+1) >= iMax THEN ExpandBuffer[]};
	      ENDCASE =>  EXIT;
	    ENDLOOP;
	  i ← i+1;
	  IF uId
	    THEN {
	      h ← ((first*128-first) + last) MOD LAST[HashIndex] + 1;
	      WHILE (j ← hashTab[h].symbol) # 0 
		DO
		IF vocabIndex[j]-(s2←vocabIndex[j-1]) = i
		  THEN
		    FOR s1 IN [0 .. i) 
		      DO
		      IF buffer[s1] # vocab[s2] THEN EXIT;
		      s2 ← s2+1;
		      REPEAT
			FINISHED => {class ← j; GO TO GotNext};
		      ENDLOOP;
		IF (h ← hashTab[h].link) = 0 THEN EXIT;
		ENDLOOP};
	  desc.length ← i;
	  class ← tokenID;  value ← SymTabOps.EnterString[@desc];
	  GO TO GotNext};

	',, ';, ':, '=, '., '[, '], '{, '}  => {
	  class ← scanTab[char]; GO TO GetNext};

	'- => {
	  pChar: CHARACTER;
	  NextChar[];
	  IF char # '- THEN {class ← scanTab['-]; GO TO GotNext};
	  char ← NUL;
	    DO
	    pChar ← char;
	    IF (tI←tI+1) = tMax THEN {IF tEnded THEN GO TO EndFile;  FillBuffer[]};
	    char ← tB[tI];
	    SELECT char FROM
	      '- =>  IF pChar = '- THEN EXIT;
	      CR =>  EXIT;
	      ENDCASE;
	    ENDLOOP;
	  NextChar[]};

	ENDCASE => {
	  class ← scanTab[char];
	  IF class # 0 THEN  GO TO GetNext;
	  NextChar[];
	  ScanError[char, index]};

      REPEAT
	GetNext => {IF (tI←tI+1) = tMax THEN FillBuffer[]; char ← tB[tI]};
	GotNext => NULL;
	EndFile => {
	  FillBuffer[];  char ← tB[tI];
	  class ← TrueEndOfFile;  index ← tOrigin;  value ← 0};
      ENDLOOP;
    nTokens ← nTokens + 1;
    RETURN};


 -- initialization/finalization

  ScanInit: PUBLIC PROC [
      sourceStream, messageStream: Streams.Handle,
      table: ParseTable.TableRef] = {
    hashTab ← @table[table.scanTable.hashTab];
    scanTab ← @table[table.scanTable.scanTab];
    vocab ← LOOPHOLE[@table[table.scanTable.vocabBody], Strings.String];
    vocabIndex ← @table[table.scanTable.vocabIndex];
    IF buffer = NIL THEN buffer ← LongStorage.String[256];
    iMax ← buffer.length ← buffer.maxlength;
    desc.base ← buffer;  desc.offset ← 0;
    stream ← sourceStream;  errorStream ← messageStream;
    streamOrigin ← Streams.GetIndex[stream];
    tB ← LongStorage.Pages[TextPages];
    tOrigin ← tLimit ← 0;  tMax ← 0;  tEnded ← FALSE;
    FillBuffer[];  char ← tB[tI];
    nTokens ← nErrors ← 0};

  ScanReset: PUBLIC PROC RETURNS [CARDINAL, CARDINAL] = {
    LongStorage.FreePages[tB];
    IF buffer # NIL THEN {LongStorage.FreeString[buffer]; buffer ← NIL};
    [] ← LongStorage.Prune[];
    RETURN [nTokens, nErrors]};


 -- error handling

  StreamIndex: TYPE = LONG CARDINAL;

  NewLine: PROC = INLINE {CharIO.PutChar[errorStream, CR]};

  PrintTextLine: PROC [origin: LONG CARDINAL] RETURNS [start: LONG CARDINAL] = {
    OPEN CharIO;
    lineIndex: LONG CARDINAL;
    char: CHARACTER;
    n: [1..100];
    start ← lineIndex ← origin;
    FOR n IN [1..100] UNTIL lineIndex = 0
      DO
      lineIndex ← lineIndex - 1;
      Streams.SetIndex[stream, lineIndex];
      IF Streams.GetByte[stream] = CR THEN EXIT;
      start ← lineIndex;
      ENDLOOP;
    Streams.SetIndex[stream, start];
    FOR n IN [1..100] UNTIL Streams.Ended[stream] 
      DO
      char ← Streams.GetByte[stream !  Streams.End[] => GOTO out];
      SELECT char FROM
	CR, ControlZ => EXIT;
	ENDCASE => PutChar[errorStream, char];
      REPEAT
        out => NULL;
      ENDLOOP;
    NewLine[];  RETURN};


  ResetScanIndex: PUBLIC PROC [index: CARDINAL] = {
    page: CARDINAL;
    IF ~(index IN [tOrigin .. tLimit))
      THEN {
	page ← index/(PackEnviron.PageSize * PackEnviron.CharsPerWord);
	tOrigin ← tLimit ← page*(PackEnviron.PageSize * PackEnviron.CharsPerWord);
	tMax ← 0;  tEnded ← FALSE;
	Streams.SetIndex[stream, streamOrigin + index];
	FillBuffer[]};
    tI ← index - tOrigin;
    IF tI >= tMax THEN FillBuffer[];  char ← tB[tI]};


  ScanError: PROC [code: {number, string, char, atom}, tokenIndex: CARDINAL] = {
    nErrors ← nErrors + 1;
    ErrorContext[
      SELECT code FROM
	number => "invalid number"L,
	string => "string too long"L,
	char => "invalid character"L,
	atom => "invalid atom"L,
	ENDCASE => NIL,
      tokenIndex];
    NewLine[]};


  ErrorContext: PUBLIC PROC [message: STRING, tokenIndex: CARDINAL] = {
    OPEN CharIO;
    saveIndex: LONG CARDINAL = Streams.GetIndex[stream];
    origin: LONG CARDINAL = streamOrigin+tokenIndex;
    char: CHARACTER;
    Streams.SetIndex[stream, PrintTextLine[origin]];
    UNTIL Streams.GetIndex[stream] = origin DO
      char ← Streams.GetByte[stream ! Streams.End[] => GOTO out];
      PutChar[errorStream, IF char = TAB THEN TAB ELSE ' ];
      REPEAT
        out => NULL;
      ENDLOOP;
    PutString[errorStream, "↑ "L];  PutString[errorStream, message];
    PutString[errorStream, " ["L];
    PutNumber[errorStream, tokenIndex,
	[base:10, zerofill:FALSE, unsigned:TRUE, columns:0]];
    PutChar[errorStream, ']];   NewLine[];
    Streams.SetIndex[stream, saveIndex]};

  END.