-- file MesaScannerImpl.mesa
-- last edit by Russ Atkinson, February 9, 1982 6:29 pm

DIRECTORY
  Ascii: TYPE USING
    [BS, CR, FF, LF, NUL, TAB],
  Atom: TYPE USING
    [MakeAtom],
  MesaScanner: TYPE USING
    [GetProcType, Token, TokenKind],
  Real: TYPE
    USING [ReadReal],
  Rope: TYPE
    USING [Fetch, FromProc, ROPE, Size, Substr];

MesaScannerImpl: PROGRAM
  IMPORTS Atom, Real, Rope
  EXPORTS MesaScanner
  SHARES Rope
  = BEGIN OPEN Ascii, MesaScanner, Rope;

  ControlZ: CHAR = 32C; -- Bravo escape char
  ControlCR: CHAR = 215C;  -- control CR (Bravo relic)

  LastIntDiv10: INT = LAST[INT]/10;
  LastCardDiv8: INT = LAST[LONG CARDINAL]/8;
  
  GetToken: PUBLIC PROC
       [get: GetProcType,index: INT]
       RETURNS [token: Token] = {

    FirstChar: PROC [kind: TokenKind] = {
      -- accept the current char, set the kind
      -- and grab the next character
      token.kind ← kind;
      token.start ← index - 1;
      token.next ← index;
      char ← get[index];
      index ← index + 1;
      };
      
    AddChar: PROC [] = {
      -- just accept the current character
      token.next ← index}; 
  
    AddCharPlus: PROC [] = {
      -- accept the current character & get the next one
      token.next ← index;
      char ← get[index];
      index ← index + 1}; 
  
    NextChar: PROC = {
      -- grab the next char, but don't add it to the token
      char ← get[index];
      index ← index + 1}; 

    AcceptEscapeCode: PROC RETURNS [BOOL] = {
      -- assume that the last char accepted was '\\
      -- and that char is the 1st char in the escape code
      SELECT char FROM
        'n, 'N, 'r, 'R, 't, 'T, 'b, 'B,
        'f, 'F, 'l, 'L, '', '", '\\ =>
	   {AddCharPlus[]; RETURN [TRUE]};
	IN ['0..'3] =>
	  {AddCharPlus[];
	   IF char IN ['0..'7] THEN
	      {AddCharPlus[];
	       IF char IN ['0..'7] THEN
		  {AddCharPlus[];
		   RETURN [TRUE]}}};
	 ENDCASE;
      token.msg ← "invalid escape code";
      RETURN [FALSE];
      };

    char: CHAR ← get[index];
    index ← index + 1;
    token.msg ← NIL;
  
    DO -- at start of token (we think)
       {lag: CHAR ← char;
        SELECT char FROM
          ControlZ =>
	    {DO -- skip over the Bravo garbage
	        NextChar[];
		SELECT char FROM
		  NUL => IF lag = NUL THEN GO TO EndOfFile;
		  CR, ControlCR => EXIT;
		  ENDCASE;
	        lag ← char;
	        ENDLOOP;
	     };
	  IN [NUL..' ], ControlCR =>
	    {NextChar[];
	     IF char = NUL AND lag = NUL THEN GO TO EndOfFile};
          IN ['a..'z], IN ['A..'Z] =>
	     {FirstChar[tokenID];
	      DO
	         SELECT char FROM
		   IN ['a..'z], IN ['A..'Z], IN ['0..'9] =>
		     {AddCharPlus[]; LOOP};
		   ENDCASE => RETURN;
		 ENDLOOP;
	      };
         '0, '1, '2, '3, '4, '5, '6, '7, '8, '9 => 
             {-- accept the first run of digits 
              card: CARDINAL ← char - '0;
	      octalValid: BOOL ← card < 8;
	      small: BOOL ← octalValid;
	      FirstChar[tokenINT];
	      WHILE char IN ['0..'9] DO
	        octalValid ← octalValid AND char IN ['0..'7];
	        card ← card * 8 + (char-'0);
	        small ← small AND octalValid AND card < 400B;
	        AddCharPlus[];
	        ENDLOOP; 
              IF char = '. THEN 
                 {-- determine whether there is a fraction
		  NextChar[]; 
                  IF char = '. THEN RETURN; -- no fraction 
                  token.kind ← tokenREAL;
		  AddChar[]; 
                  WHILE char IN ['0..'9] DO
	            AddCharPlus[];
	            ENDLOOP}; 
              SELECT char FROM 
                'e, 'E => 
                    {-- accept the exponent
		     AddCharPlus[];
		     IF char = '- OR char = '+ THEN AddCharPlus[]; 
                     WHILE char IN ['0..'9] DO
	               AddCharPlus[];
	               ENDLOOP; 
                     token.kind ← tokenREAL};
                'b, 'B => 
                    {IF token.kind = tokenREAL THEN RETURN;
		     AddCharPlus[];
		     IF NOT octalValid THEN
		        {token.msg ← "invalid octal constant"; GO TO Error}};
                'c, 'C => 
                    {IF token.kind = tokenREAL THEN RETURN;
		     AddCharPlus[];
		     IF NOT small THEN
		        {token.msg ← "invalid character code"; GO TO Error};
                     token.kind ← tokenCHAR};
                ENDCASE; 
              RETURN};
         
         ',, ';, ':, '←, '#, '~, '+, '*, '/, '↑, '@, '!, '(, '), '[, '], '{, '} => 
             {FirstChar[tokenSINGLE]; RETURN};
         
         '' => 
             {FirstChar[tokenCHAR]; 
	      lag ← char;
	      AddCharPlus[];
	      IF lag # '\\ THEN RETURN; 
	      IF AcceptEscapeCode[] THEN RETURN;
	      GO TO Error;
	      };
         
         '" => 
             {FirstChar[tokenROPE];
	      DO -- eat up the string/rope literal
                 SELECT char FROM 
                   '" => 
                      {AddCharPlus[];
		       IF char # '" THEN EXIT};
                   '\\ => 
                      {AddCharPlus[];
		       IF AcceptEscapeCode[] THEN LOOP;
		       GO TO Error};    
		   NUL =>
		      {AddCharPlus[];
		       IF char # NUL THEN LOOP;
		       token.msg ← "end-of-file in string literal";
		       GO TO Error};
                   ENDCASE;
		 AddCharPlus[];
                 ENDLOOP;
	      -- accept trailing L (for local frame designation)
	      IF char = 'L OR char = 'l THEN AddCharPlus[];
	      RETURN; 
              };
         
         '$ => 
             {FirstChar[tokenATOM]; 
              SELECT char FROM 
                IN ['a..'z], IN ['A..'Z] => {};
                ENDCASE => 
		  {token.msg ← "invalid atom"; GO TO Error}; 
              DO -- accumulate rest of atom name
                 SELECT char FROM 
                   IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {};
                   ENDCASE => EXIT; 
                 AddCharPlus[]
                 ENDLOOP; 
              RETURN};
         
         '- => {-- tokenMINUS or comment processing
		FirstChar[tokenSINGLE];
		IF char # '- THEN RETURN;
		token.kind ← tokenCOMMENT;
		lag ← CR;
		-- now we have started a comment
		DO
		   AddCharPlus[];
		   SELECT char FROM
		     CR, ControlCR => GO TO LastOne;
		     '- => IF lag = '- THEN GO TO LastOne;
		     NUL => IF lag = NUL THEN
		       {token.next ← index - 2;
		        RETURN};
		     ENDCASE;
		   lag ← char;
		   ENDLOOP;
	        };
         
         '. => 
             {-- either a REAL or a dot or a dotdot
              FirstChar[tokenSINGLE];
	      IF char = '. THEN GO TO Double;
	      IF char NOT IN ['0..'9] THEN RETURN;
	      token.kind ← tokenREAL; 
              AddCharPlus[]; 
              WHILE char IN ['0..'9] DO
	        AddCharPlus[];
		ENDLOOP;
	      IF char = 'E OR char = 'e THEN
	         {-- accept the exponent
		  AddCharPlus[];
		  IF char = '- OR char = '+ THEN AddCharPlus[];
		  WHILE char IN ['0..'9] DO
	            AddCharPlus[];
		    ENDLOOP}; 
              RETURN;
	      };
         
         '= => 
             {-- either '=' or '=>'
              FirstChar[tokenSINGLE]; 
              IF char = '> 
                 THEN GO TO Double 
                 ELSE RETURN};
         
         '>, '< => 
             {-- either '>' or '>=' (or '<' or '<=')
              FirstChar[tokenSINGLE]; 
              IF char = '= 
                 THEN GO TO Double 
                 ELSE RETURN};
         
         ENDCASE =>
	   {FirstChar[tokenERROR];
            token.msg ← "invalid character";
            RETURN};
	EXITS
	  Error =>
	    {token.kind ← tokenERROR;
	     RETURN};
 	  Double =>
	    {token.kind ← tokenDOUBLE;
	     token.next ← index;
	     RETURN};
	  LastOne =>
	    {token.next ← index;
	     RETURN};
	  EndOfFile =>
	    {token.start ← token.next ← index - 2;
             token.kind ← tokenEOF;
	     RETURN}};
      
       ENDLOOP; 
    }; 

  RealFromToken: PUBLIC PROC
    [get: GetProcType, token: Token] RETURNS [REAL] = {
    -- takes a Token into a REAL (parses the literal)
    -- signals WrongKind if token.kind # tokenREAL
    index: INT ← token.start;
    gp: PROC RETURNS [c: CHAR] = {
      c ← get[index];
      index ← index + 1;
      };
    IF token.kind # tokenREAL THEN ERROR WrongKind;
    RETURN [Real.ReadReal[gp]];
    };

  IntFromToken: PUBLIC PROC
    [get: GetProcType, token: Token] RETURNS [x: INT] = {
    -- takes a Token into a INT (parses the literal)
    -- signals WrongKind if token.kind # tokenINT
    base: NAT ← 10;
    lc: CHAR ← 0C;
    end: INT ← token.next - 1;
    over: INT ← LastIntDiv10;
    IF token.kind # tokenINT THEN ERROR WrongKind;
    lc ← get[end];
    x ← 0;
    IF lc = 'b OR lc = 'B THEN
       {base ← 8; end ← end - 1; over ← LastCardDiv8};
    FOR i: INT IN [token.start..end] DO
      IF x > over THEN ERROR IntegerOverflow;
      x ← x * base + (get[i] - '0); 
      IF x < 0 AND base = 10 THEN ERROR IntegerOverflow; 
      ENDLOOP;
    };

  CharFromToken: PUBLIC PROC
    [get: GetProcType, token: Token] RETURNS [c: CHAR] = {
    -- takes a Token into a CHAR (parses the literal)
    -- signals WrongKind if token.kind # tokenCHAR
    IF token.kind # tokenCHAR THEN ERROR WrongKind;
    IF get[token.start] = '' THEN
        IF (c ← get[token.start+1]) = '\\
            THEN RETURN [ParseEscapeCode[get, token.start+2].c]
            ELSE RETURN;
    RETURN [ParseEscapeCode[get, token.start].c];
    };

  RopeFromToken: PUBLIC PROC
    [get: GetProcType, token: Token] RETURNS [new: ROPE] = {
    -- takes a Token into a ROPE (parses the literal)
    -- signals WrongKind if token.kind # tokenROPE
    index: INT ← token.start + 1;
    end: INT ← token.next - 1;
    escaped: BOOL ← FALSE;
    res: INT ← 0;
    gp: PROC RETURNS [c: CHAR] = {
      IF index >= end THEN RETURN [0C];
      c ← get[index];
      index ← index + 1;
      res ← res + 1;
      IF c = '" THEN {index ← index + 1; escaped ← TRUE};
      IF c # '\\ THEN RETURN;
      [c, index] ← ParseEscapeCode[get, index+1];
      escaped ← TRUE};
    IF token.kind # tokenROPE THEN ERROR WrongKind;
    IF get[end] = '" THEN end ← end - 1;
    new ← Rope.FromProc[end - index, gp];
    IF escaped THEN new ← new.Substr[0, res];
    };

  AtomFromToken: PUBLIC PROC
    [get: GetProcType, token: Token] RETURNS [ATOM] = {
    -- takes a Token into a ATOM (parses the literal)
    -- signals WrongKind if token.kind # tokenATOM
    IF token.kind # tokenATOM THEN ERROR WrongKind;
    RETURN[Atom.MakeAtom[ExtractRope[get, token.start+1, token.next]]];
    };

  ContentsFromToken: PUBLIC PROC
    [get: GetProcType, token: Token] RETURNS [ROPE] = {
    -- gets the contents of the token as a ROPE
    -- can be used on any token
    RETURN [ExtractRope[get, token.start, token.next]];
    };

  WrongKind: PUBLIC ERROR = CODE;

  IntegerOverflow: PUBLIC ERROR = CODE;

  -- utility routines

  ExtractRope: PROC
      [get: GetProcType, start,end: INT] RETURNS [ROPE] = {
    size: INT ← end - start;
    gp: PROC RETURNS [c: CHAR] = {
      c ← get[start];
      start ← start + 1};
    RETURN[Rope.FromProc[size, gp]];
    };

  ParseEscapeCode: PROC
      [get: GetProcType, index: INT]
      RETURNS [c: CHAR, next: INT] = {
    c ← get[index];
    next ← index + 1;
    SELECT c FROM
        'n, 'N => c ← Ascii.CR;
        'r, 'R => c ← Ascii.CR;
        't, 'T => c ← Ascii.TAB;
        'b, 'B => c ← Ascii.BS;
        'f, 'F => c ← Ascii.FF;
        'l, 'L => c ← Ascii.LF;
	IN ['0..'3] =>
	  {cc: CHAR;
	   c ← 0C + (c-'0);
	   cc ← get[next];
	   IF cc IN ['0..'7] THEN
	      {c ← 0C + ((cc-'0) + (c-0C)*10B);
	       cc ← get[next ← next + 1];
 	       IF cc IN ['0..'7] THEN
		  {c ← 0C + ((cc-'0) + (c-0C)*10B);
		   next ← next + 1}}};
	ENDCASE;
    };

  -- test facilities

  TestResults: TYPE = RECORD
    [token: Token,
     contents: ROPE,
     literal: REF];

  Test: PROC [rope: ROPE] RETURNS [list: LIST OF TestResults] = {
    get: GetProcType = {
      IF index < rope.Size[] THEN RETURN [rope.Fetch[index]];
      RETURN [NUL];
      };
    tget: GetProcType = {
      c: CHAR ← get[index];
      IF c = '& THEN c ← 'A;
      RETURN [c];
      };
    index: INT ← 0;
    lag, temp: LIST OF TestResults ← NIL;
    list ← NIL;
    DO
        token: Token ← GetToken[tget, index];
	literal: REF ← NIL;
	IF token.kind = tokenEOF THEN EXIT;
	index ← token.next;
        SELECT token.kind FROM
           tokenINT => literal ← NEW[INT ← IntFromToken[get, token]];
           tokenREAL => literal ← NEW[REAL ← RealFromToken[get, token]];
           tokenCHAR => literal ← NEW[CHAR ← CharFromToken[get, token]];
           tokenROPE => literal ← RopeFromToken[get, token];
           tokenATOM => literal ← AtomFromToken[get, token];
           ENDCASE;
	temp ← LIST[[token, ContentsFromToken[get, token], literal]];
	IF lag = NIL THEN list ← temp ELSE lag.rest ← temp;
	lag ← temp;
	ENDLOOP; 
    };

  END.