-- December 7, 1982 3:26 pm -- LexerImpl.mesa --! when this module was first coded, Lex.Handles did not have "buf" fields. Now that -- they do, the global "buf" can and should be eliminated. CGN, September 9, 1982 4:08 pm DIRECTORY Atom, Rope, IO, IOClasses, RefText, Lexer; LexerImpl: MONITOR IMPORTS Atom, Rope, IO, IOClasses, RefText EXPORTS Lexer = BEGIN OPEN Lexer; bufsize: INT = 200; buf: REF TEXT _ RefText.New[bufsize]; AddOpPair: PUBLIC PROC [h: Handle, c1, c2: CHAR] = {h.opList _ CONS[[opname: [c1, c2], op: Atom.MakeAtom[Rope.Cat[Rope.FromChar[c1], Rope.FromChar[c2]]]], h.opList]}; NewHandle: PUBLIC PROC RETURNS [h: Handle] = {h _ NEW [HandleRec _ [error: NIL, eof: FALSE, a: NIL, buf: NIL, type: ALL [op], in: NIL, opList: NIL]]; DefaultCharTypes[h]}; Lex: PUBLIC ENTRY PROC [h: Handle] = {buf.length _ 0; Next2[h ! IO.EndOfStream => {h.eof _ TRUE; CONTINUE}]; h.buf _ buf; --! this patch inserted September 9, 1982 4:09 pm by CGN IF h.eof AND buf.length # 0 THEN {h.error _ "end of file in mid-lexeme"; h.in _ IOClasses.CreateCatInputStream[ h.in, IO.RIS[Rope.FromRefText[buf]]]}}; Next2: PROC [h: Handle] = -- sets h.a to be the next token in the input {WHILE h.type[IO.PeekChar[h.in]] = blank DO [] _ IO.GetChar[h.in] ENDLOOP; SELECT h.type[IO.PeekChar[h.in]] FROM letter => NextID[h]; digit => NextNumber[h]; op => NextOp[h]; quote => NextString[h] ENDCASE}; NextID: PROC[h: Handle] = {buf _ RefText.AppendChar[buf, IO.PeekChar[h.in]]; [] _ IO.GetChar[h.in]; WHILE h.type[IO.PeekChar[h.in]] = letter OR h.type[IO.PeekChar[h.in]] = digit DO buf _ RefText.AppendChar[buf, IO.PeekChar[h.in]]; [] _ IO.GetChar[h.in] ENDLOOP; h.a _ Atom.MakeAtom[Rope.FromRefText[buf]]}; NextNumber: PROC [h: Handle]= {n: INT; x, y: REAL; n _ IO.PeekChar[h.in] - '0; buf _ RefText.AppendChar[buf, IO.PeekChar[h.in]]; [] _ IO.GetChar[h.in]; WHILE h.type[IO.PeekChar[h.in]] = digit DO n _ n * 10 + IO.PeekChar[h.in] - '0; buf _ RefText.AppendChar[buf, IO.PeekChar[h.in]]; [] _ IO.GetChar[h.in] ENDLOOP; IF IO.PeekChar[h.in] # '. THEN {h.a _ NEW[INT _ n]; RETURN}; buf _ RefText.AppendChar[buf, IO.PeekChar[h.in]]; x _ n; y _ 0.1; [] _ IO.GetChar[h.in]; WHILE h.type[IO.PeekChar[h.in]] = digit DO x _ x + y * (IO.PeekChar[h.in] - '0); y _ y/10; buf _ RefText.AppendChar[buf, IO.PeekChar[h.in]]; [] _ IO.GetChar[h.in] ENDLOOP; h.a _ NEW[REAL _ x]}; NextOp: PROC [h: Handle]= {c: CHAR _ IO.PeekChar[h.in]; cc: CHAR; buf _ RefText.AppendChar[buf, c]; [] _ IO.GetChar[h.in]; cc _ IO.PeekChar[h.in]; IF h.type[cc] = op THEN {FOR l: LIST OF OpRec _ h.opList, l.rest UNTIL l = NIL DO IF l.first.opname = [c, cc] THEN {h.a _ l.first.op; [] _ IO.GetChar[h.in]; RETURN} ENDLOOP}; h.a _ Atom.MakeAtomFromChar[c]}; NextString: PROC[h: Handle] = {close: CHAR _ IO.PeekChar[h.in]; buf _ RefText.AppendChar[buf, IO.PeekChar[h.in]]; [] _ IO.GetChar[h.in]; WHILE IO.PeekChar[h.in] # IO.CR AND IO.PeekChar[h.in] # IO.LF AND IO.PeekChar[h.in] # close DO buf _ RefText.AppendChar[buf, IO.PeekChar[h.in]]; [] _ IO.GetChar[h.in] ENDLOOP; IF IO.PeekChar[h.in] # close THEN {h.error _ "no multi-line strings"; RETURN}; h.a _ Rope.Substr[Rope.FromRefText[buf], 1]; [] _ IO.GetChar[h.in]}; DefaultCharTypes: PUBLIC PROC[h: Handle] = {FOR c: CHAR IN [0C .. 255C] DO h.type[c] _ op ENDLOOP; FOR c: CHAR IN ['a .. 'z] DO h.type[c] _ letter ENDLOOP; FOR c: CHAR IN ['A .. 'Z] DO h.type[c] _ letter ENDLOOP; FOR c: CHAR IN ['0 .. '9] DO h.type[c] _ digit ENDLOOP; h.type['"] _ quote; h.type[IO.CR] _ blank; h.type[IO.SP] _ blank; h.type[IO.LF] _ blank; h.type[IO.FF] _ blank; h.type[IO.NUL] _ blank; h.type[IO.TAB] _ blank}; END.