-- September 9, 1982 4:13 pm
-- Lexer.mesa
-- Simple lexer for alpha-numeric identifiers, unsigned ints, unsigned reals w/o trailing E's,
-- single and double-character operators, and quoted strings with no funny business
-- embedded in them. No two-line strings, either. Returns lexeme as a REF INT, REF REAL,
-- ATOM, or ROPE.
-- end-of-file in mid-lexeme is an error: the last char in the stream should be of type blank.
-- You can guarantee this with IO.AppendStreams if you can't guarantee it any other way.
DIRECTORY Atom, Rope, IO;
Lexer: DEFINITIONS
= BEGIN
Handle: TYPE = REF HandleRec;
HandleRec: TYPE = RECORD
[error: Rope.ROPE, -- initially NIL, set to error message on lexical error
eof: BOOL, -- initially FALSE, set to TRUE on end of stream.
a: REF ANY, -- contains last lexeme returned by Lex
buf: REF TEXT, -- contains the string that lexed to a
in: IO.STREAM, -- Source of characters to be broken into lexemes.
type: ARRAY CHAR OF CharType,
-- described below
opList: LIST OF OpRec] ; -- list of two-character lexemes
NewHandle: PROC RETURNS [h: Handle];
-- All of h's fields are initialized except h.in
-- h.type is initialized by DefaultCharTypes below
-- To read lexemes, do h ← NewHandle[]; h.in ← <some input source>, then
-- call Lex repeatedly.
CharType: TYPE = {letter, digit, blank, quote, op};
AddOpPair: PUBLIC PROC [h: Handle, c1, c2: CHAR];
-- makes c1c2 into a double character lexeme; e.g. -> and ..
-- both chars must be of type op
Lex: PUBLIC PROC[h: Handle];
-- sets h.a to next lexeme from stream h.in,
-- OR sets h.eof to TRUE and h.a to NIL,
-- OR sets h.error to a non-nil message and h.a to NIL;
-- In the last case (error = TRUE), the characters removed
-- from h.in are put back, so you can reread them.
-- The char types in DefaultTypeArray are set by the following procedure:
OpRec: TYPE = RECORD [opname: RECORD [CHAR, CHAR], op: ATOM];
DefaultCharTypes: PUBLIC PROC[h: Handle];
-- sets h.type as follows:
-- letters to letter; digits to digit; '" to quote; cr, sp, lf, ff, nul, and tab to blank;
-- and all other to op
END.