FILE: M2SImpl.mesa
Modula-2 Scanner
Last Edited by: Gutknecht, September 18, 1985 0:28:05 am PDT
Satterthwaite March 11, 1986 5:36:16 pm PST

DIRECTORY
IO: TYPE USING [STREAM, EndOf, GetChar, Put, rope, char, card],
M2S: TYPE USING [IdBufLen, Symbol, NumTyp, CharArray];
M2SImpl : CEDAR PROGRAM
IMPORTS IO
EXPORTS M2S =
BEGIN OPEN M2S;
KW: INTEGER = 43; -- number of keywords --
maxCard: CARDINAL = 177777B;
maxDig: CARDINAL = 7;
maxExp: CARDINAL = 38;
maxReal: REAL = 1E38;
IdBufLim: CARDINAL = IdBufLen - 100;
sym: PUBLIC Symbol;
id: PUBLIC CARDINAL;
numtyp: PUBLIC NumTyp;
intval: PUBLIC LONG INTEGER;
dblval: PUBLIC LONG INTEGER;
realval: PUBLIC REAL;
scanerr: PUBLIC BOOL;
source: PUBLIC IO.STREAM;
sourcepos: PUBLIC LONG CARDINAL;
log: PUBLIC IO.STREAM;
IdBuf: PUBLIC REF CharArray ← NEW [CharArray]; -- identifier buffer --
ch: CHAR; --current character
id0, id1: CARDINAL; --indices of id-buffer
keyWds: ARRAY [0..KW] OF
RECORD [sym: Symbol, ind: CARDINAL];
power: ARRAY [0..5] OF REAL ← [1.0E1, 1.0E2, 1.0E4, 1.0E8, 1.0E16, 1.0E32];
InitScanner: PUBLIC PROC =
{ ch ← ' ; sourcepos ← 0; scanerr ← FALSE;
IF id0 = 0 THEN id0 ← id ELSE id ← id0 };
Enter: PUBLIC PROC [name: REF TEXT] RETURNS [CARDINAL] =
{ i: CARDINAL ← id;
id ← id + 1;
FOR j: CARDINAL ← 0, j + 1 WHILE name [j] # ' DO
IdBuf [id] ← name [j]; id ← id + 1
ENDLOOP;
IdBuf [i] ← '\000 + id-i;
RETURN [i] };
Diff: PUBLIC PROC [i, j: CARDINAL] RETURNS [INTEGER] =
{ k: CARDINAL ← IdBuf[i].ORD;
DO
IF k = 0 THEN RETURN [0];
IF IdBuf[i] # IdBuf[j] THEN
RETURN [ IdBuf[i].ORD - IdBuf[j].ORD ]
ELSE { i ← i+1; j ← j+1; k ← k-1}
ENDLOOP };
GetCh: PROC = { ch ← source.GetChar; sourcepos ← sourcepos + 1 };
String: PROC [termCh: CHAR] =
{ id1 ← id + 1;
IF id1 > IdBufLim THEN { Mark [91]; id1 ← 1};
DO GetCh;
IF ch = termCh THEN EXIT;
IF ch < ' THEN { Mark [45]; EXIT };
IdBuf [id1] ← ch; id1 ← id1 + 1
ENDLOOP;
GetCh;
IdBuf[id] ← '\000 + id1-id; -- length
IF IdBuf [id] = '\002 THEN {
sym ← number; numtyp ← char; intval ← IdBuf[id+1].ORD }
ELSE { sym ← string;
IF IdBuf [id] = '\001 THEN
{ IdBuf[id1] ← '\000; id1 ← id1 + 1; IdBuf[id] ← '\002 }}};
Identifier: PROC =
{ k, l, m, d: INTEGER;
id1 ← id + 1;
IF id1 > IdBufLim THEN { Mark [91]; id1 ← 1 };
DO IdBuf [id1] ← ch; id1 ← id1 + 1;
GetCh;
IF (ch<'0) OR (ch>'9) AND (ch<'A) OR (ch>'Z) AND (ch<'a) OR (ch>'z) THEN EXIT
ENDLOOP;
IdBuf [id] ← '\000 + id1-id; -- length
k ← 0; l ← KW-1;
WHILE l >= k DO
m ← (k + l) / 2;
d ← Diff [keyWds[m].ind, id];
IF d < 0 THEN k ← m + 1 ELSE l ← m - 1
ENDLOOP;
IF Diff [keyWds[k].ind, id] = 0 THEN sym ← keyWds[k].sym
ELSE sym ← ident };
Number: PROC =
{ i, j, l, d, e: CARDINAL;
x, f: REAL;
d1: INTEGER;
neg: BOOL;
lastCh: CHAR;
dig: ARRAY [0..31] OF CHAR;
Ten: PROC [e: CARDINAL] RETURNS [REAL] =
{ u: REAL ← 1.0;
FOR k: CARDINAL ← 0, k + 1 WHILE e > 0 DO
IF e MOD 2 = 1 THEN u ← power [k] * u;
e ← e/2
ENDLOOP;
RETURN [u] };
sym ← number; i ← 0;
DO dig [i] ← ch; i ← i + 1;
GetCh;
IF (ch<'0) OR (ch>'9) AND (ch<'A) OR (ch>'Z) AND (ch<'a) OR (ch>'z) THEN EXIT
ENDLOOP;
lastCh ← ch; j ← 0;
WHILE (j < i) AND (dig [j] = '0) DO j ← j + 1 ENDLOOP;
IF lastCh = '. THEN { GetCh;
IF ch = '. THEN { lastCh ← '\000; ch ← '\177 -- ellipsis-- }};
IF lastCh = '. THEN -- decimal point --
{ x ← 0.0; l ← 0;
WHILE j < i DO -- read int part --
IF l < maxDig THEN
{ IF dig [j] > '9 THEN Mark [40];
x ← x * 10.0 + (dig [j].ORD - '0.ORD);
l ← l + 1 }
ELSE Mark [41];
j ← j + 1
ENDLOOP;
l ← 0; f ← 0.0;
WHILE ('0 <= ch) AND (ch <= '9) DO -- read fraction
IF l < maxDig THEN {
f ← f*10.0 + (ch.ORD - '0.ORD);
l ← l + 1 };
GetCh;
ENDLOOP;
x ← f / Ten[l] + x; e ← 0; neg ← FALSE;
IF ch = 'E THEN { GetCh;
IF ch = '- THEN { neg ← TRUE; GetCh }
ELSE IF ch = '+ THEN GetCh;
WHILE ('0 <= ch) AND (ch <= '9) DO -- read exponent
e ← e *10 + (ch.ORD - '0.ORD);
GetCh
ENDLOOP };
IF neg THEN
IF e <= maxExp THEN x ← x / Ten[e] ELSE x ← 0.0
ELSE
IF e <= maxExp THEN { f ← Ten[e];
IF maxReal / f >= x THEN x ← f*x ELSE Mark [41] }
ELSE Mark [41];
numtyp ← real; realval ← x }
ELSE -- integer --
{ lastCh ← dig [i-1];
IF lastCh = 'B THEN { i ← i - 1;
numtyp ← cardint; intval ← 0;
WHILE j < i DO
d ← dig[j].ORD - '0.ORD;
IF (d < 8) AND ((maxCard-d) / 8 >= intval) THEN
intval ← intval * 8 + d
ELSE { Mark [29]; intval ← 0 };
j ← j + 1
ENDLOOP }
ELSE IF lastCh = 'H THEN { i ← i - 1;
IF i <= j+4 THEN
{ numtyp ← cardint; intval ← 0;
WHILE j < i DO
d ← dig[j].ORD - '0.ORD;
IF d > 22 THEN { Mark [29]; d ← 0 }
ELSE IF d > 9 THEN d ← d - 7;
intval ← intval * 16 + d;
j ← j + 1
ENDLOOP }
ELSE IF i <= j+8 THEN
{ numtyp ← longint; dblval ← 0;
DO
d ← dig[j].ORD - '0.ORD;
IF d > 22 THEN { Mark [29]; d ← 0 }
ELSE IF d > 9 THEN d ← d - 7;
dblval ← dblval * 16 + d;
j ← j + 1;
IF j = i THEN EXIT
ENDLOOP }
ELSE { Mark [29]; numtyp ← longint }}
ELSE IF lastCh = 'D THEN
{ i ← i - 1; d1 ← 0; numtyp ← longint;
WHILE j < i DO
d1 ← d1 * 10 + (dig[j].ORD - '0.ORD);
j ← j + 1
ENDLOOP;
dblval ← d1 }
ELSE IF lastCh = 'C THEN
{ i ← i - 1; intval ← 0; numtyp ← char;
WHILE j < i DO
d ← dig[j].ORD - '0.ORD;
intval ← intval * 8 + d;
IF (d >= 8) OR (intval >= 256) THEN { Mark [29]; intval ← 0 };
j ← j + 1
ENDLOOP }
ELSE -- decimal? -- {
numtyp ← cardint; intval ← 0;
WHILE j < i DO
d ← dig[j].ORD - '0.ORD;
IF (d < 10) AND ((maxCard-d) / 10 >= intval) THEN
intval ← intval * 10 + d
ELSE { Mark [29]; intval ← 0 };
j ← j + 1
ENDLOOP }}};
GetSym: PUBLIC PROC =
{ Comment: PROC =
{ GetCh;
DO
WHILE (ch # '*) AND (ch # '\000) DO
IF ch = '( THEN { GetCh;
IF ch ='* THEN Comment }
ELSE GetCh
ENDLOOP;
IF ch = '* THEN GetCh ELSE Mark [42];
IF (ch = ')) OR (ch = '\000) THEN EXIT
ENDLOOP;
IF ch = ') THEN GetCh };
DO
IF source.EndOf THEN { ch ← ' ; EXIT };
IF (ch <= ' ) OR (ch > '\177) THEN GetCh ELSE EXIT
ENDLOOP;
SELECT ch FROM -- ' <= ch <= '\177
'  => { sym ← eof; ch ← '\000 };
'! => { sym ← null; GetCh };
'" => String ['"];
'# => { sym ← neq; GetCh };
' => { sym ← null; GetCh };
'% => { sym ← null; GetCh };
'& => { sym ← and; GetCh };
'' => { String [''] };
'( => { GetCh;
IF ch = '* THEN { Comment; GetSym } ELSE sym ← lparen };
') => { sym ← rparen; GetCh };
'* => { sym ← times; GetCh };
'+ => { sym ← plus; GetCh };
', => { sym ← comma; GetCh };
'- => { sym ← minus; GetCh };
'. => { GetCh;
IF ch = '. THEN { GetCh; sym ← ellipsis } ELSE sym ← period };
'/ => { sym ← slash; GetCh };
IN ['0..'9] => Number;
': => { GetCh;
IF ch = '= THEN { GetCh; sym ← becomes } ELSE sym ← colon };
'; => { sym ← semicolon; GetCh };
'< => { GetCh;
IF ch = '= THEN { sym ← leq; GetCh }
ELSE
IF ch = '> THEN { sym ← neq; GetCh } ELSE sym ← lss };
'= => { sym ← eql; GetCh };
'> => { GetCh;
IF ch = '= THEN { GetCh; sym ← geq } ELSE sym ← gtr };
'? => { sym ← null; GetCh };
'@ => { sym ← null; GetCh };
IN ['A..'Z] => Identifier;
'[ => { sym ← lbrak; GetCh };
'\\ => { sym ← null; GetCh };
'] => { sym ← rbrak; GetCh };
'^ => { sym ← arrow; GetCh };
'← => { sym ← becomes; GetCh };
IN ['a..'z] => Identifier;
'{ => { sym ← lbrace; GetCh };
'| => { sym ← bar; GetCh };
'} => { sym ← rbrace; GetCh };
'~ => { sym ← not; GetCh };
'\177 => { sym ← ellipsis; GetCh }
ENDCASE };
KeepId: PUBLIC PROC = { id ← id1 };
Mark: PUBLIC PROC [n: CARDINAL] =
{ log.Put [IO.card[sourcepos], IO.char[' ]];
log.Put [IO.rope["err"], IO.card[n], IO.char[' ]];
scanerr ← TRUE };
M2Scanner initialization:
{ IdBuf [0] ← '\001; id ← 1; id0 ← 0; ch ← ' ;
keyWds[0].sym ← by; keyWds[0].ind ← Enter ["BY "];
keyWds[1].sym ← do; keyWds[1].ind ← Enter ["DO "];
keyWds[2].sym ← if; keyWds[2].ind ← Enter ["IF "];
keyWds[3].sym ← in; keyWds[3].ind ← Enter ["IN "];
keyWds[4].sym ← of; keyWds[4].ind ← Enter ["OF "];
keyWds[5].sym ← or; keyWds[5].ind ← Enter ["OR "];
keyWds[6].sym ← to; keyWds[6].ind ← Enter ["TO "];
keyWds[7].sym ← and; keyWds[7].ind ← Enter ["AND "];
keyWds[8].sym ← div; keyWds[8].ind ← Enter ["DIV "];
keyWds[9].sym ← end; keyWds[9].ind ← Enter ["END "];
keyWds[10].sym ← for; keyWds[10].ind ← Enter ["FOR "];
keyWds[11].sym ← mod; keyWds[11].ind ← Enter ["MOD "];
keyWds[12].sym ← not; keyWds[12].ind ← Enter ["NOT "];
keyWds[13].sym ← rem; keyWds[13].ind ← Enter ["REM "];
keyWds[14].sym ← set; keyWds[14].ind ← Enter ["SET "];
keyWds[15].sym ← var; keyWds[15].ind ← Enter ["VAR "];
keyWds[16].sym ← case; keyWds[16].ind ← Enter ["CASE "];
keyWds[17].sym ← code; keyWds[17].ind ← Enter ["CODE "];
keyWds[18].sym ← else; keyWds[18].ind ← Enter ["ELSE "];
keyWds[19].sym ← exit; keyWds[19].ind ← Enter ["EXIT "];
keyWds[20].sym ← from; keyWds[20].ind ← Enter ["FROM "];
keyWds[21].sym ← loop; keyWds[21].ind ← Enter ["LOOP "];
keyWds[22].sym ← then; keyWds[22].ind ← Enter ["THEN "];
keyWds[23].sym ← type; keyWds[23].ind ← Enter ["TYPE "];
keyWds[24].sym ← with; keyWds[24].ind ← Enter ["WITH "];
keyWds[25].sym ← array; keyWds[25].ind ← Enter ["ARRAY "];
keyWds[26].sym ← begin; keyWds[26].ind ← Enter ["BEGIN "];
keyWds[27].sym ← const; keyWds[27].ind ← Enter ["CONST "];
keyWds[28].sym ← elsif; keyWds[28].ind ← Enter ["ELSIF "];
keyWds[29].sym ← until; keyWds[29].ind ← Enter ["UNTIL "];
keyWds[30].sym ← while; keyWds[30].ind ← Enter ["WHILE "];
keyWds[31].sym ← export; keyWds[31].ind ← Enter ["EXPORT "];
keyWds[32].sym ← import; keyWds[32].ind ← Enter ["IMPORT "];
keyWds[33].sym ← module; keyWds[33].ind ← Enter ["MODULE "];
keyWds[34].sym ← record; keyWds[34].ind ← Enter ["RECORD "];
keyWds[35].sym ← repeat; keyWds[35].ind ← Enter ["REPEAT "];
keyWds[36].sym ← return; keyWds[36].ind ← Enter ["RETURN "];
keyWds[37].sym ← forward; keyWds[37].ind ← Enter ["FORWARD "];
keyWds[38].sym ← pointer; keyWds[38].ind ← Enter ["POINTER "];
keyWds[39].sym ← procedure; keyWds[39].ind ← Enter ["PROCEDURE "];
keyWds[40].sym ← qualified; keyWds[40].ind ← Enter ["QUALIFIED "];
keyWds[41].sym ← definition; keyWds[41].ind ← Enter ["DEFINITION "];
keyWds[42].sym ← implementation; keyWds[42].ind ← Enter ["IMPLEMENTATION "];
keyWds[43].ind ← Enter [" "] -- sentinel for binary search -- }
END.