-- JaMScannerImpl.mesa
-- Original version by John Warnock, March 7, 1979
-- Last changed by Paxton, 22-Jan-82 10:19:46
DIRECTORY
JaMBasic USING [Object],
JaMInternal USING [Frame, Stack],
JaMOps USING [Array, Assert, AStore, Bug, CountToMark,
Install, InstallReason, MakeName, MakeString, Pop, Push,
RegisterExplicit, StringToName, SubString, Text],
JaMStorage,
JaMScanner,
JaMVM USING [GetChar],
Ascii USING [ControlZ, CR, LF, SP, TAB],
StreamDefs USING [StreamHandle];
-- This program implements a very simple lexical scanner which works in the
-- following way. When the routine "StreamToken" or "StringToken" is called
-- then the first token is returned on the top of stack followed by the
-- remainder of the source at next-on-stack.
JaMScannerImpl: PROGRAM
IMPORTS JaMOps, JaMScanner, JaMStorage, JaMVM
EXPORTS JaMOps, JaMScanner = {
OPEN VM:JaMVM, JaMScanner, JaMStorage, JaMOps, JaMInternal, JaMBasic;
-- Globals
zone: UNCOUNTED ZONE = Zone[];
classArray: LONG POINTER TO ClassArray ← NIL;
overflow: PUBLIC name Object;
-- Procedures
CharClass: PROC[char: CHARACTER] RETURNS[Class] = INLINE {
RETURN[classArray[IF char<200C THEN char ELSE char-200B]] };
InitClassArray: PROC = { Assert[classArray=NIL];
classArray ← zone.NEW[ClassArray ← ALL[oth]]; -- default is oth
FOR c: CHARACTER IN ['0..'9] DO classArray[c] ← num ENDLOOP; -- Numeric (0 to 9)
classArray[0C] ← dlm;
classArray[Ascii.SP] ← dlm;
classArray[Ascii.TAB] ← dlm;
classArray[Ascii.LF] ← dlm;
classArray[',] ← dlm; -- Delimiters
classArray['.] ← dot; -- Period, decimal point
classArray['+] ← classArray['-] ← sgn; -- Signs
classArray['B] ← classArray['b] ← oct; -- Octal suffix
classArray['E] ← classArray['e] ← exp; -- Exponent
classArray['(] ← lpr; -- Left paren
classArray[')] ← rpr; -- Right paren
classArray['{] ← lbr; -- Left brace
classArray['}] ← rbr; -- Right brace
classArray['"] ← quo; -- String quote
classArray['\\] ← bsl; -- Backslash; escape char in quoted strings
classArray['%] ← com; -- Comment
classArray[Ascii.CR] ← crt; -- Return; end of comment
};
FreeClassArray: PROC = { Assert[classArray#NIL];
zone.FREE[@classArray];
};
StreamToken: PUBLIC PROC[frame: Frame, stream: StreamHandle]
RETURNS[found,error: BOOLEAN, token: Object] = {
stack: Stack ← frame.opstk;
nest: CARDINAL ← 0;
localtext: STRING ← [255];
text: TextObject ← TextNew[localtext];
found ← error ← FALSE;
DO ENABLE UNWIND => TextFree[@text]; -- token loop
state: State ← start; TextReset[@text];
IF error THEN state ← aend -- finish all pending arrays
ELSE UNTIL state IN FinalState DO -- character loop
char: CHARACTER; class: Class; action: Action;
IF stream.endof[stream] THEN { char ← 0C; class ← eos } -- end of source
ELSE { char ← stream.get[stream]; class ← CharClass[char] };
SELECT char FROM
Ascii.ControlZ => class ← com; -- treat Bravo trailer as comment
0C => class ← eos; -- treat null as end of source; for Tioga
ENDCASE;
[state,action] ← Transition[state,class]; -- do a state transition
SELECT action FROM -- deal with character
null,skip => { };
take => { TextPut[@text,char] };
back => { stream.putback[stream,char] };
ENDCASE => ERROR Bug;
ENDLOOP;
-- state and text now describe a token
SELECT state FROM
null => IF nest>0 THEN { error ← TRUE; LOOP } ELSE EXIT;
inum => token ← MakeInteger[TextRead[@text]];
onum => token ← MakeOctal[TextRead[@text]];
rnum => token ← MakeReal[TextRead[@text]];
name,oops => token ← MakeName[TextRead[@text]];
strg => token ← MakeString[TextRead[@text]];
strg2 => {
txt: Text;
[txt,error] ← RemoveEscapes[TextRead[@text]];
token ← MakeString[txt] };
part => { error ← TRUE; token ← MakeString[TextRead[@text]] };
abeg => { nest ← nest + 1; token ← [L,mark[]] };
aend => IF nest>0 THEN {
size: CARDINAL ← CountToMark[stack]; -- count number of elements
array: array Object ← Array[size]; -- allocate array
AStore[stack,array]; -- package it up
Assert[Pop[stack].type=mark]; -- remove mark
token ← array; nest ← nest - 1 }
ELSE token ← MakeName[TextRead[@text]];
ENDCASE;
IF nest>0 THEN Push[stack,token] ELSE { found ← TRUE; EXIT };
ENDLOOP; -- token loop
TextFree[@text];
RETURN[found,error,token];
};
StringToken: PUBLIC PROC[frame: Frame, ob: string Object]
RETURNS[found,error: BOOLEAN, token: Object, rem: string Object] = {
stack: Stack ← frame.opstk;
i: CARDINAL ← 0;
substr: string Object;
nest: CARDINAL ← 0;
beg: CARDINAL ← 0;
len: CARDINAL ← 0;
found ← error ← FALSE;
DO -- token loop
state: State ← start; beg ← i; len ← 0;
IF error THEN state ← aend -- finish all pending arrays
ELSE UNTIL state IN FinalState DO -- character loop
char: CHARACTER; class: Class; action: Action;
IF i>=ob.length THEN { char ← 0C; class ← eos } -- end of source
ELSE { char ← VM.GetChar[ob,i]; class ← CharClass[char]; i ← i + 1 };
[state,action] ← Transition[state,class]; -- do a state transition
SELECT action FROM -- deal with character
skip => { beg ← i };
null => { };
take => { len ← len + 1 };
back => { i ← i - 1 };
ENDCASE => ERROR Bug;
ENDLOOP;
substr ← SubString[ob,beg,len];
-- state and substr now describe a token
SELECT state FROM
null => IF nest>0 THEN { error ← TRUE; LOOP } ELSE EXIT;
inum => token ← StringToInteger[substr];
onum => token ← StringToOctal[substr];
rnum => token ← StringToReal[substr];
name,oops => { token ← StringToName[substr]; token.tag ← X };
strg => { token ← substr; token.tag ← L };
part,strg2 => { error ← TRUE; token ← substr; token.tag ← L };
abeg => { nest ← nest + 1; token ← [L,mark[]] };
aend => IF nest>0 THEN {
len: CARDINAL ← CountToMark[stack]; -- count number of elements
array: array Object ← Array[len]; -- allocate array
AStore[stack,array]; -- package it up
Assert[Pop[stack].type=mark]; -- remove mark
token ← array; nest ← nest - 1 }
ELSE { token ← StringToName[substr]; token.tag ← X };
ENDCASE;
IF nest>0 THEN Push[stack,token] ELSE { found ← TRUE; EXIT };
ENDLOOP; -- token loop
RETURN[found,error,token,SubString[ob,i,ob.length-i]];
};
Transition: PROC[state: State, class: Class]
RETURNS[State,Action] = INLINE { SELECT state FROM
start => SELECT class FROM
eos => { RETURN[null,null] }; -- no more tokens
crt,dlm => { RETURN[start,skip] }; -- ignore
com => { RETURN[comnt,skip] }; -- begin comment
quo => { RETURN[qstr,skip] }; -- begin string with next char
lpr => { RETURN[paren,skip] }; -- begin string with next char
rpr => { RETURN[oops,take] }; -- extra right paren
lbr => { RETURN[abeg,take] }; -- begin array
rbr => { RETURN[aend,take] }; -- end array
num => { RETURN[ipart,take] }; -- begin integer
sgn => { RETURN[msign,take] }; -- mantissa may follow
dot => { RETURN[point,take] }; -- fraction may follow
oct,exp,bsl,oth => { RETURN[ident,take] }; -- begin a name
ENDCASE;
comnt => SELECT class FROM
eos => { RETURN[null,null] }; -- no more tokens
crt => { RETURN[start,skip] }; -- end of comment
dlm,com,lpr,rpr,lbr,rbr,num,sgn,dot,oct,exp,bsl,quo,oth => { RETURN[comnt,skip] }; -- ignore
ENDCASE;
msign => SELECT class FROM
-- So far: sign for mantissa
eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish
num => { RETURN[ipart,take] }; -- begin integer
dot => { RETURN[point,take] }; -- fraction may follow
sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
point => SELECT class FROM
-- So far: (sign and`) decimal point
eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish
num => { RETURN[fpart,take] }; -- begin fraction
dot,sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
ipart => SELECT class FROM
-- So far: valid integer
eos,crt,dlm => { RETURN[inum,null] }; -- finish integer
com,lpr,rpr,lbr,rbr,quo => { RETURN[inum,back] }; -- back up and finish integer
num => { RETURN[ipart,take] }; -- add integer digit
dot => { RETURN[fpart,take] }; -- fraction follows
oct => { RETURN[octal,take] }; -- octal number
exp => { RETURN[expon,take] }; -- exponent follows
sgn,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
octal => SELECT class FROM
-- So far: valid integer followed by "B"
eos,crt,dlm => { RETURN[onum,null] }; -- finish octal
com,lpr,rpr,lbr,rbr,quo => { RETURN[onum,back] }; -- back up and finish octal
num,dot,sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
fpart => SELECT class FROM
-- So far: valid mantissa containing decimal point
eos,crt,dlm => { RETURN[rnum,null] }; -- finish real
com,lpr,rpr,lbr,rbr,quo => { RETURN[rnum,back] }; -- back up and finish real
num => { RETURN[fpart,take] }; -- add fraction digit
exp => { RETURN[expon,take] }; -- exponent follows
dot,sgn,oct,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
expon => SELECT class FROM
-- So far: valid mantissa followed by "E"
eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish
num => { RETURN[epart,take] }; -- first exponent digit
sgn => { RETURN[esign,take] }; -- exponent sign; digit should follow
dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
esign => SELECT class FROM
-- So far: valid mantissa followed by "E" and a sign
eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish
num => { RETURN[epart,take] }; -- first exponent digit
sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
epart => SELECT class FROM
-- So far: valid real with exponent
eos,crt,dlm => { RETURN[rnum,null] }; -- finish real
com,lpr,rpr,lbr,rbr,quo => { RETURN[rnum,back] }; -- back up and finish real
num => { RETURN[epart,take] }; -- add exponent digit
sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
ident => SELECT class FROM
-- So far: an identifier
eos,crt,dlm => { RETURN[name,null] }; -- finish name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish name
num,sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- append
ENDCASE;
qstr => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
bsl => { RETURN[qstr3,take] }; -- read char after \
quo => { RETURN[strg,null] }; -- finish string
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,oth => { RETURN[qstr,take] }; -- append
ENDCASE;
qstr2 => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
bsl => { RETURN[qstr3,take] }; -- read char after \
quo => { RETURN[strg2,null] }; -- finish string with \'s
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,oth => { RETURN[qstr2,take] }; -- append
ENDCASE;
qstr3 => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,bsl,quo,oth => { RETURN[qstr2,take] }; -- append
ENDCASE;
paren => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
lpr => { RETURN[SUCC[paren],take] }; -- up a level
rpr => { RETURN[strg,null] }; -- finish string
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,quo,bsl,oth => { RETURN[paren,take] }; -- append
ENDCASE;
ENDCASE => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
lpr => { RETURN[SUCC[state],take] }; -- open
rpr => { RETURN[PRED[state],take] }; -- close
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,quo,bsl,oth => { RETURN[state,take] }; -- append
ENDCASE;
ERROR Bug; -- unknown state or class
};
LineComplete: PUBLIC PROC[text: Text] RETURNS[BOOLEAN] = {
nest: CARDINAL ← 0;
i: CARDINAL ← 0;
DO -- token loop
state: State ← start;
UNTIL state IN FinalState DO -- character loop
char: CHARACTER; class: Class; action: Action;
IF i>=text.length THEN { char ← 0C; class ← eos } -- end of source
ELSE { class ← CharClass[char ← text[i]]; i ← i + 1 };
[state,action] ← Transition[state,class]; -- do a state transition
SELECT action FROM -- deal with character
null => { };
skip => { --beg ← beg + 1-- };
take => { --len ← len + 1-- };
back => { i ← i - 1 };
ENDCASE => ERROR Bug;
ENDLOOP;
-- state and text now describe a token
SELECT state FROM
null => EXIT;
part => RETURN[FALSE]; -- incomplete: inside a string
abeg => { nest ← nest + 1 };
aend => { IF nest>0 THEN nest ← nest - 1 };
ENDCASE;
ENDLOOP; -- token loop
RETURN[nest=0]; -- complete if not inside an array
};
-- Initialization
InstallScanner: PROC[why: InstallReason, frame: Frame] = { SELECT why FROM
init => InitClassArray[];
free => FreeClassArray[];
register => {
overflow ← MakeName[".overflow"L];
RegisterExplicit[frame, ".token"L, JToken];
};
ENDCASE;
};
Install[InstallScanner];
}.