-- JaMScannerImpl.mesa
-- Original version by John Warnock, March 7, 1979 
-- Last changed by Paxton, 20-Oct-81 14:19:39

DIRECTORY
  JaMBasic USING [Object],
  JaMInternal USING [Frame, Stack],
  JaMOps USING [Array, Assert, AStore, Bug, CountToMark, Error, GetStream,
    Install, InstallReason, KillStream, MakeName, MakeString, Pop, Push,
    PushBoolean, RegisterExplicit, StringToName, SubString, Text, typechk],
  JaMStorage,
  JaMVM USING [GetChar, GetText],
  Ascii USING [ControlZ, CR, LF, SP, TAB],
  Inline USING [BITSHIFT, LongNumber],
  Real USING [ReadReal, RealException],
  StreamDefs USING [StreamHandle];

-- This program implements a very simple lexical scanner which works in the
-- following way. When the routine "StreamToken" or "StringToken" is called
-- then the first token is returned on the top of stack followed by the
-- remainder of the source at next-on-stack.

JaMScannerImpl: PROGRAM
IMPORTS JaMOps, JaMStorage, JaMVM, Inline, Real
EXPORTS JaMOps = {
OPEN VM:JaMVM, JaMStorage, JaMOps, JaMInternal, JaMBasic;

-- Types

StreamHandle: TYPE = StreamDefs.StreamHandle;

Class: TYPE = {
  eos, -- end of source
  crt, -- CR
  dlm, -- delimiter (SP,TAB,LF,comma)
  com, -- %    comment
  lpr, -- (    start of string token
  rpr, -- )    end of string token
  lbr, -- {    start of array token
  rbr, -- }    end of array token
  num, -- 0..9 numeric
  sgn, -- + -  sign
  dot, -- .    period
  exp, -- E e  exponent
  oct, -- B b  octal suffix
  oth  -- other
  };

ClassIndex: TYPE = CHARACTER[0C..177C];
ClassArray: TYPE = ARRAY ClassIndex OF Class;

State: TYPE = MACHINE DEPENDENT {
  null, -- end of source, no token found
  name, -- text is a name
  strg, -- text is a string literal, without enclosing parens
  inum, -- text is an integer
  onum, -- text is an octal number, with final "B"
  rnum, -- text is a real number
  abeg, -- text is "{"
  aend, -- text is "}"
  oops, -- text is erroneous, like ")"
  part, -- text is a partial string, up to end of source
  start, -- skipping leading delimiters
  comnt, -- skipping over a comment
  ident, -- scanning an identifier
  msign, -- so far: sgn
  ipart, -- so far: num | msign num | ipart num
  octal, -- so far: ipart B
  point, -- so far: dot | msign dot
  fpart, -- so far: point num | ipart dot | fpart num
  expon, -- so far: ipart E | fpart E
  esign, -- so far: expon + | expon -
  epart, -- so far: esign num | epart num
  paren, -- inside one level of parentheses
  -- states >paren indicate levels of nested parens
  (LAST[CARDINAL])
  };

FinalState: TYPE = State[null..start);

Action: TYPE = {
  null, -- do nothing (end of source or trailing char)
  skip, -- skip leading character
  take, -- append character to text
  back  -- put character back into source
  };

-- Globals

zone: UNCOUNTED ZONE = Zone[];

classArray: LONG POINTER TO ClassArray ← NIL;

overflow: name Object;

-- Procedures

CharClass: PROC[char: CHARACTER] RETURNS[Class] = INLINE {
  RETURN[classArray[IF char<200C THEN char ELSE char-200B]] };

InitClassArray: PROC = { Assert[classArray=NIL];
  classArray ← zone.NEW[ClassArray ← ALL[oth]]; -- default is oth
  FOR c: CHARACTER IN ['0..'9] DO classArray[c] ← num ENDLOOP;  -- Numeric (0 to 9)
  classArray[0C] ← dlm;
  classArray[Ascii.SP] ← dlm;
  classArray[Ascii.TAB] ← dlm;
  classArray[Ascii.LF] ← dlm;
  classArray[',] ← dlm;  -- Delimiters
  classArray['.] ← dot;  -- Period, decimal point
  classArray['+] ← classArray['-] ← sgn;  -- Signs
  classArray['B] ← classArray['b] ← oct;  -- Octal suffix
  classArray['E] ← classArray['e] ← exp;  -- Exponent
  classArray['(] ← lpr;  -- Left paren
  classArray[')] ← rpr;  -- Right paren
  classArray['{] ← lbr;  -- Left brace
  classArray['}] ← rbr;  -- Right brace
  classArray['%] ← com;  -- Comment
  classArray[Ascii.CR] ← crt;  -- Return; end of comment
  };

FreeClassArray: PROC = { Assert[classArray#NIL];
  zone.FREE[@classArray];
  };

StreamToken: PUBLIC PROC[frame: Frame, stream: StreamHandle]
  RETURNS[found,error: BOOLEAN, token: Object] = {
  stack: Stack ← frame.opstk;
  nest: CARDINAL ← 0;
  localtext: STRING ← [255];
  text: TextObject ← TextNew[localtext];
  found ← error ← FALSE;
  DO ENABLE UNWIND => TextFree[@text]; -- token loop
    state: State ← start; TextReset[@text];
    IF error THEN state ← aend -- finish all pending arrays
    ELSE UNTIL state IN FinalState DO -- character loop
      char: CHARACTER; class: Class; action: Action;
      IF stream.endof[stream] THEN { char ← 0C; class ← eos } -- end of source
      ELSE { char ← stream.get[stream]; class ← CharClass[char] };
      SELECT char FROM
      	Ascii.ControlZ => class ← com; -- treat Bravo trailer as comment
      	0C => class ← eos; -- treat null as end of source; for Tioga
	ENDCASE;
      [state,action] ← Transition[state,class]; -- do a state transition
      SELECT action FROM -- deal with character
        null,skip => { };
	take => { TextPut[@text,char] };
	back => { stream.putback[stream,char] };
        ENDCASE => ERROR Bug;
      ENDLOOP;
    -- state and text now describe a token
    SELECT state FROM
      null => IF nest>0 THEN { error ← TRUE; LOOP } ELSE EXIT;
      inum => token ← MakeInteger[TextRead[@text]];
      onum => token ← MakeOctal[TextRead[@text]];
      rnum => token ← MakeReal[TextRead[@text]];
      name,oops => token ← MakeName[TextRead[@text]];
      strg => token ← MakeString[TextRead[@text]];
      part => { error ← TRUE; token ← MakeString[TextRead[@text]] };
      abeg => { nest ← nest + 1; token ← [L,mark[]] };
      aend => IF nest>0 THEN {
          size: CARDINAL ← CountToMark[stack]; -- count number of elements
          array: array Object ← Array[size]; -- allocate array
	  AStore[stack,array]; -- package it up
	  Assert[Pop[stack].type=mark]; -- remove mark
          token ← array; nest ← nest - 1 }
	ELSE token ← MakeName[TextRead[@text]];
      ENDCASE;
    IF nest>0 THEN Push[stack,token] ELSE { found ← TRUE; EXIT };
    ENDLOOP; -- token loop
  TextFree[@text];
  RETURN[found,error,token];
  };

StringToken: PUBLIC PROC[frame: Frame, ob: string Object]
  RETURNS[found,error: BOOLEAN, token: Object, rem: string Object] = {
  stack: Stack ← frame.opstk;
  i: CARDINAL ← 0;
  substr: string Object;
  nest: CARDINAL ← 0;
  beg: CARDINAL ← 0;
  len: CARDINAL ← 0;
  found ← error ← FALSE;
  DO -- token loop
    state: State ← start; beg ← i; len ← 0;
    IF error THEN state ← aend -- finish all pending arrays
    ELSE UNTIL state IN FinalState DO -- character loop
      char: CHARACTER; class: Class; action: Action;
      IF i>=ob.length THEN { char ← 0C; class ← eos } -- end of source
      ELSE { char ← VM.GetChar[ob,i]; class ← CharClass[char]; i ← i + 1 };
      [state,action] ← Transition[state,class]; -- do a state transition
      SELECT action FROM -- deal with character
        skip => { beg ← i };
        null => { };
	take => { len ← len + 1 };
	back => { i ← i - 1 };
        ENDCASE => ERROR Bug;
      ENDLOOP;
    substr ← SubString[ob,beg,len];
    -- state and substr now describe a token
    SELECT state FROM
      null => IF nest>0 THEN { error ← TRUE; LOOP } ELSE EXIT;
      inum => token ← StringToInteger[substr];
      onum => token ← StringToOctal[substr];
      rnum => token ← StringToReal[substr];
      name,oops => { token ← StringToName[substr]; token.tag ← X };
      strg => { token ← substr; token.tag ← L };
      part => { error ← TRUE; token ← substr; token.tag ← L };
      abeg => { nest ← nest + 1; token ← [L,mark[]] };
      aend => IF nest>0 THEN {
          len: CARDINAL ← CountToMark[stack]; -- count number of elements
          array: array Object ← Array[len]; -- allocate array
	  AStore[stack,array]; -- package it up
	  Assert[Pop[stack].type=mark]; -- remove mark
          token ← array; nest ← nest - 1 }
	ELSE { token ← StringToName[substr]; token.tag ← X };
      ENDCASE;
    IF nest>0 THEN Push[stack,token] ELSE { found ← TRUE; EXIT };
    ENDLOOP; -- token loop
  RETURN[found,error,token,SubString[ob,i,ob.length-i]];
  };

LineComplete: PUBLIC PROC[text: Text] RETURNS[BOOLEAN] = {
  nest: CARDINAL ← 0;
  i: CARDINAL ← 0;
  DO -- token loop
    state: State ← start;
    UNTIL state IN FinalState DO -- character loop
      char: CHARACTER; class: Class; action: Action;
      IF i>=text.length THEN { char ← 0C; class ← eos } -- end of source
      ELSE { class ← CharClass[char ← text[i]]; i ← i + 1 };
      [state,action] ← Transition[state,class]; -- do a state transition
      SELECT action FROM -- deal with character
        null => { };
        skip => { --beg ← beg + 1-- };
	take => { --len ← len + 1-- };
	back => { i ← i - 1 };
        ENDCASE => ERROR Bug;
      ENDLOOP;
    -- state and text now describe a token
    SELECT state FROM
      null => EXIT;
      part => RETURN[FALSE]; -- incomplete: inside a string
      abeg => { nest ← nest + 1 };
      aend => { IF nest>0 THEN nest ← nest - 1 };
      ENDCASE;
    ENDLOOP; -- token loop
  RETURN[nest=0]; -- complete if not inside an array
  };

Transition: PROC[state: State, class: Class]
  RETURNS[State,Action] = INLINE { SELECT state FROM
  start => SELECT class FROM
    eos => { RETURN[null,null] }; -- no more tokens
    crt,dlm => { RETURN[start,skip] }; -- ignore
    com => { RETURN[comnt,skip] }; -- begin comment
    lpr => { RETURN[paren,skip] }; -- begin string with next char
    rpr => { RETURN[oops,take] }; -- extra right paren
    lbr => { RETURN[abeg,take] }; -- begin array
    rbr => { RETURN[aend,take] }; -- end array
    num => { RETURN[ipart,take] }; -- begin integer
    sgn => { RETURN[msign,take] }; -- mantissa may follow
    dot => { RETURN[point,take] }; -- fraction may follow
    oct,exp,oth => { RETURN[ident,take] }; -- begin a name
    ENDCASE;
  comnt => SELECT class FROM
    eos => { RETURN[null,null] }; -- no more tokens
    crt => { RETURN[start,skip] }; -- end of comment
    dlm,com,lpr,rpr,lbr,rbr,num,sgn,dot,oct,exp,oth => { RETURN[comnt,skip] }; -- ignore
    ENDCASE;
  msign => SELECT class FROM
    -- So far: sign for mantissa
    eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
    com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish
    num => { RETURN[ipart,take] }; -- begin integer
    dot => { RETURN[point,take] }; -- fraction may follow
    sgn,oct,exp,oth => { RETURN[ident,take] }; -- not a number
    ENDCASE;
  point => SELECT class FROM
    -- So far: (sign and`) decimal point
    eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
    com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish
    num => { RETURN[fpart,take] }; -- begin fraction
    dot,sgn,oct,exp,oth => { RETURN[ident,take] }; -- not a number
    ENDCASE;
  ipart => SELECT class FROM
    -- So far: valid integer
    eos,crt,dlm => { RETURN[inum,null] }; -- finish integer
    com,lpr,rpr,lbr,rbr => { RETURN[inum,back] }; -- back up and finish integer
    num => { RETURN[ipart,take] }; -- add integer digit
    dot => { RETURN[fpart,take] }; -- fraction follows
    oct => { RETURN[octal,take] }; -- octal number
    exp => { RETURN[expon,take] }; -- exponent follows
    sgn,oth => { RETURN[ident,take] }; -- not a number
    ENDCASE;
  octal => SELECT class FROM
    -- So far: valid integer followed by "B"
    eos,crt,dlm => { RETURN[onum,null] }; -- finish octal
    com,lpr,rpr,lbr,rbr => { RETURN[onum,back] }; -- back up and finish octal
    num,dot,sgn,oct,exp,oth => { RETURN[ident,take] }; -- not a number
    ENDCASE;
  fpart => SELECT class FROM
    -- So far: valid mantissa containing decimal point
    eos,crt,dlm => { RETURN[rnum,null] }; -- finish real
    com,lpr,rpr,lbr,rbr => { RETURN[rnum,back] }; -- back up and finish real
    num => { RETURN[fpart,take] }; -- add fraction digit
    exp => { RETURN[expon,take] }; -- exponent follows
    dot,sgn,oct,oth => { RETURN[ident,take] }; -- not a number
    ENDCASE;
  expon => SELECT class FROM
    -- So far: valid mantissa followed by "E"
    eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
    com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish
    num => { RETURN[epart,take] }; -- first exponent digit
    sgn => { RETURN[esign,take] }; -- exponent sign; digit should follow
    dot,oct,exp,oth => { RETURN[ident,take] }; -- not a number
    ENDCASE;
  esign => SELECT class FROM
    -- So far: valid mantissa followed by "E" and a sign
    eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
    com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish
    num => { RETURN[epart,take] }; -- first exponent digit
    sgn,dot,oct,exp,oth => { RETURN[ident,take] }; -- not a number
    ENDCASE;
  epart => SELECT class FROM
    -- So far: valid real with exponent
    eos,crt,dlm => { RETURN[rnum,null] }; -- finish real
    com,lpr,rpr,lbr,rbr => { RETURN[rnum,back] }; -- back up and finish real
    num => { RETURN[epart,take] }; -- add exponent digit
    sgn,dot,oct,exp,oth => { RETURN[ident,take] }; -- not a number
    ENDCASE;
  ident => SELECT class FROM
    -- So far: an identifier
    eos,crt,dlm => { RETURN[name,null] }; -- finish name
    com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish name
    num,sgn,dot,oct,exp,oth => { RETURN[ident,take] }; -- append
    ENDCASE;
  paren => SELECT class FROM
    eos => { RETURN[part,null] }; -- premature end
    lpr => { RETURN[SUCC[paren],take] }; -- up a level
    rpr => { RETURN[strg,null] }; -- finish string
    crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,oth => { RETURN[paren,take] }; -- append
    ENDCASE;
  ENDCASE => SELECT class FROM
    eos => { RETURN[part,null] }; -- premature end
    lpr => { RETURN[SUCC[state],take] }; -- open
    rpr => { RETURN[PRED[state],take] }; -- close
    crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,oth => { RETURN[state,take] }; -- append
    ENDCASE;
  ERROR Bug; -- unknown state or class
  };


MakeOctal: PROC[text: LONG STRING] RETURNS[Object] = {
  val: Inline.LongNumber ← [lc[0]];
  neg: BOOLEAN ← FALSE;
  i: CARDINAL ← 0;
  SELECT text[0] FROM '+ => { i ← 1 }; '- => { neg ← TRUE; i ← 1 }; ENDCASE;
  FOR j: CARDINAL IN[i..text.length-1) DO
    c: CHARACTER ← text[j];
    -- use shifting to multiply by 8
    val.high ← Inline.BITSHIFT[val.high,3] + Inline.BITSHIFT[val.low,3-16];
    val.low ← Inline.BITSHIFT[val.low,3];
    IF c>'0 THEN val.lc ← val.lc + (c-'0);
    ENDLOOP;
  RETURN[[L,integer[IF neg THEN -val.li ELSE val.li]]];
  };

MakeInteger: PROC[text: LONG STRING] RETURNS[Object] = {
  val: LONG CARDINAL ← 0;
  max: LONG CARDINAL = LAST[LONG CARDINAL];
  lim: LONG CARDINAL = max/10; -- lim*10 <= max
  int: LONG INTEGER;
  neg: BOOLEAN ← FALSE;
  i: CARDINAL ← 0;
  SELECT text[0] FROM '+ => { i ← 1 }; '- => { neg ← TRUE; i ← 1 }; ENDCASE;
  FOR j: CARDINAL IN[i..text.length) WHILE val<max DO
    c: CHARACTER ← text[j];
    val ← IF val<=lim THEN val*10 ELSE max;
    IF c>'0 THEN { d: CARDINAL ← c-'0; val ← IF d<(max-val) THEN val+d ELSE max };
    ENDLOOP;
  int ← LOOPHOLE[val]; IF neg THEN int ← -int;
  IF (int<0)=neg THEN RETURN[[L,integer[int]]] ELSE RETURN[MakeReal[text]];
  };

MakeReal: PROC[text: LONG STRING] RETURNS[Object] = {
  i: CARDINAL ← 0;
  Get: PROC RETURNS[CHARACTER] = { c: CHARACTER ← Ascii.CR;
    IF i<text.length THEN { c ← text[i]; i ← i + 1 }; RETURN[c] };
  ob: Object;
  { ENABLE Real.RealException => { ob ← overflow; CONTINUE };
    r: REAL ← Real.ReadReal[Get]; ob ← [L,real[r]] };
  RETURN[ob];
  };

StringToOctal: PROC[string: string Object] RETURNS[Object] = {
  text: STRING ← [20];
  IF string.length>text.maxlength THEN {
    len: CARDINAL = text.maxlength;
    VM.GetText[SubString[string,string.length-len,len],text];
    text[0] ← VM.GetChar[string,0] } -- preserve sign
  ELSE VM.GetText[string,text];
  RETURN[MakeOctal[text]];
  };

StringToInteger: PROC[string: string Object] RETURNS[Object] = {
  text: STRING ← [20];
  IF string.length>text.maxlength THEN RETURN[StringToReal[string]]
  ELSE { VM.GetText[string,text]; RETURN[MakeInteger[text]] };
  };

StringToReal: PROC[string: string Object] RETURNS[Object] = {
  i: CARDINAL ← 0;
  Get: PROC RETURNS[CHARACTER] = { c: CHARACTER ← Ascii.CR;
    IF i<string.length THEN { c ← VM.GetChar[string,i]; i ← i + 1 }; RETURN[c] };
  ob: Object;
  { ENABLE Real.RealException => { ob ← overflow; CONTINUE };
    r: REAL ← Real.ReadReal[Get]; ob ← [L,real[r]] };
  RETURN[ob];
  };

-- One intrinsic

JToken: PUBLIC PROC[frame: Frame] = {
  ob: Object ← Pop[frame.opstk];
  found,error: BOOLEAN; tok,rem: Object;
  WITH ob:ob SELECT FROM
    string => [found,error,tok,rem] ← StringToken[frame,ob];
    stream => {
      s: StreamHandle ← GetStream[ob];
      [found,error,tok] ← StreamToken[frame,s];
      IF found THEN rem ← ob ELSE KillStream[ob];
      };
    ENDCASE => ERROR Error[typechk];
  IF found THEN { Push[frame.opstk,rem]; Push[frame.opstk,tok] };
  PushBoolean[frame.opstk, found];
  -- ignores error flag
  };

-- Initialization

InstallScanner: PROC[why: InstallReason, frame: Frame] = { SELECT why FROM
  init => InitClassArray[];
  free => FreeClassArray[];
  register => {
    overflow ← MakeName[".overflow"L];
    RegisterExplicit[frame, ".token"L, JToken];
    };
  ENDCASE;
  };

Install[InstallScanner];

}.

Paxton x
   added test for 0C in StreamToken