-- December 7, 1982 3:26 pm
-- LexerImpl.mesa

--! when this module was first coded, Lex.Handles did not have "buf" fields. Now that
-- they do, the global "buf" can and should be eliminated. CGN, September 9, 1982 4:08 pm


DIRECTORY Atom, Rope, IO, IOClasses, RefText, Lexer;

LexerImpl: MONITOR
IMPORTS Atom, Rope, IO, IOClasses, RefText
EXPORTS Lexer
= BEGIN OPEN Lexer;

bufsize: INT = 200;

buf: REF TEXT ← RefText.New[bufsize];

AddOpPair: PUBLIC PROC [h: Handle, c1, c2: CHAR] =
{h.opList ←
CONS[[opname: [c1, c2],
op: Atom.MakeAtom[Rope.Cat[Rope.FromChar[c1], Rope.FromChar[c2]]]],
h.opList]};

NewHandle: PUBLIC PROC RETURNS [h: Handle] =
{h ← NEW [HandleRec ←
[error: NIL, eof: FALSE, a: NIL, buf: NIL, type: ALL [op], in: NIL, opList: NIL]];
DefaultCharTypes[h]};

Lex: PUBLIC ENTRY PROC [h: Handle] =
{buf.length ← 0;
Next2[h ! IO.EndOfStream => {h.eof ← TRUE; CONTINUE}];
h.buf ← buf; --! this patch inserted September 9, 1982 4:09 pm by CGN
IF h.eof AND buf.length # 0
THEN {h.error ← "end of file in mid-lexeme";
h.in ← IOClasses.CreateCatInputStream[
h.in, IO.RIS[Rope.FromRefText[buf]]]}};

Next2: PROC [h: Handle] = -- sets h.a to be the next token in the input
{WHILE h.type[IO.PeekChar[h.in]] = blank DO [] ← IO.GetChar[h.in] ENDLOOP;
SELECT h.type[IO.PeekChar[h.in]] FROM
letter => NextID[h];
digit => NextNumber[h];
op => NextOp[h];
quote => NextString[h]
ENDCASE};

NextID: PROC[h: Handle] =
{buf ← RefText.AppendChar[buf, IO.PeekChar[h.in]];
[] ← IO.GetChar[h.in];
WHILE h.type[IO.PeekChar[h.in]] = letter OR h.type[IO.PeekChar[h.in]] = digit
DO buf ← RefText.AppendChar[buf, IO.PeekChar[h.in]]; [] ← IO.GetChar[h.in] ENDLOOP;
h.a ← Atom.MakeAtom[Rope.FromRefText[buf]]};

NextNumber: PROC [h: Handle]=
{n: INT;
x, y: REAL;
n ← IO.PeekChar[h.in] - '0;
buf ← RefText.AppendChar[buf, IO.PeekChar[h.in]];
[] ← IO.GetChar[h.in];
WHILE h.type[IO.PeekChar[h.in]] = digit
DO n ← n * 10 + IO.PeekChar[h.in] - '0;
buf ← RefText.AppendChar[buf, IO.PeekChar[h.in]];
[] ← IO.GetChar[h.in]
ENDLOOP;
IF IO.PeekChar[h.in] # '. THEN {h.a ← NEW[INT ← n]; RETURN};
buf ← RefText.AppendChar[buf, IO.PeekChar[h.in]];
x ← n;
y ← 0.1;
[] ← IO.GetChar[h.in];
WHILE h.type[IO.PeekChar[h.in]] = digit
DO x ← x + y * (IO.PeekChar[h.in] - '0);
y ← y/10;
buf ← RefText.AppendChar[buf, IO.PeekChar[h.in]];
[] ← IO.GetChar[h.in]
ENDLOOP;
h.a ← NEW[REAL ← x]};

NextOp: PROC [h: Handle]=
{c: CHAR ← IO.PeekChar[h.in];
cc: CHAR;
buf ← RefText.AppendChar[buf, c];
[] ← IO.GetChar[h.in];
cc ← IO.PeekChar[h.in];
IF h.type[cc] = op
THEN {FOR l: LIST OF OpRec ← h.opList, l.rest UNTIL l = NIL
DO IF l.first.opname = [c, cc]
THEN {h.a ← l.first.op; [] ← IO.GetChar[h.in]; RETURN}
ENDLOOP};
h.a ← Atom.MakeAtomFromChar[c]};

NextString: PROC[h: Handle] =
{close: CHAR ← IO.PeekChar[h.in];
buf ← RefText.AppendChar[buf, IO.PeekChar[h.in]];
[] ← IO.GetChar[h.in];
WHILE IO.PeekChar[h.in] # IO.CR
AND IO.PeekChar[h.in] # IO.LF
AND IO.PeekChar[h.in] # close
DO buf ← RefText.AppendChar[buf, IO.PeekChar[h.in]];
[] ← IO.GetChar[h.in]
ENDLOOP;
IF IO.PeekChar[h.in] # close THEN {h.error ← "no multi-line strings"; RETURN};
h.a ← Rope.Substr[Rope.FromRefText[buf], 1];
[] ← IO.GetChar[h.in]};

DefaultCharTypes: PUBLIC PROC[h: Handle] =
{FOR c: CHAR IN [0C .. 255C] DO h.type[c] ← op ENDLOOP;
FOR c: CHAR IN ['a .. 'z] DO h.type[c] ← letter ENDLOOP;
FOR c: CHAR IN ['A .. 'Z] DO h.type[c] ← letter ENDLOOP;
FOR c: CHAR IN ['0 .. '9] DO h.type[c] ← digit ENDLOOP;
h.type['"] ← quote;
h.type[IO.CR] ← blank;
h.type[IO.SP] ← blank;
h.type[IO.LF] ← blank;
h.type[IO.FF] ← blank;
h.type[IO.NUL] ← blank;
h.type[IO.TAB] ← blank};

END.