JaMScannerImpl.mesa
Original version by John Warnock, March 7, 1979
Paxton, 22-Jan-82 10:19:46
Russ Atkinson, July 22, 1983 6:39 pm
DIRECTORY
Ascii USING [ControlZ, CR, LF, SP, TAB],
JaMBasic USING [Object],
JaMInternal USING [Frame, Stack],
JaMOps USING
[Array, Assert, AStore, Bug, CountToMark, Install, InstallReason, MakeName, MakeString, Pop, Push, RegisterExplicit, StringToName, SubString, Text],
JaMStorage,
JaMScanner,
JaMVM USING [GetChar],
StreamDefs USING [StreamHandle];
This program implements a very simple lexical scanner which works in the following way. When the routine "StreamToken" or "StringToken" is called then the first token is returned on the top of stack followed by the remainder of the source at next-on-stack.
JaMScannerImpl: PROGRAM
IMPORTS JaMOps, JaMScanner, JaMStorage, JaMVM
EXPORTS JaMOps, JaMScanner = {
OPEN VM:JaMVM, JaMScanner, JaMStorage, JaMOps, JaMInternal, JaMBasic;
Globals
zone: UNCOUNTED ZONE = Zone[];
classArray: LONG POINTER TO ClassArray ← NIL;
overflow: PUBLIC name Object;
Procedures
CharClass: PROC[char: CHARACTER] RETURNS[Class] = INLINE {
RETURN[classArray[IF char<200C THEN char ELSE char-200B]] };
InitClassArray: PROC = { Assert[classArray=NIL];
classArray ← zone.NEW[ClassArray ← ALL[oth]]; -- default is oth
FOR c: CHARACTER IN ['0..'9] DO classArray[c] ← num ENDLOOP; -- Numeric (0 to 9)
classArray[0C] ← dlm;
classArray[Ascii.SP] ← dlm;
classArray[Ascii.TAB] ← dlm;
classArray[Ascii.LF] ← dlm;
classArray[',] ← dlm; -- Delimiters
classArray['.] ← dot; -- Period, decimal point
classArray['+] ← classArray['-] ← sgn; -- Signs
classArray['B] ← classArray['b] ← oct; -- Octal suffix
classArray['E] ← classArray['e] ← exp; -- Exponent
classArray['(] ← lpr; -- Left paren
classArray[')] ← rpr; -- Right paren
classArray['{] ← lbr; -- Left brace
classArray['}] ← rbr; -- Right brace
classArray['"] ← quo; -- String quote
classArray['\\] ← bsl; -- Backslash; escape char in quoted strings
classArray['%] ← com; -- Comment
classArray[Ascii.CR] ← crt; -- Return; end of comment
};
FreeClassArray: PROC = { Assert[classArray#NIL];
zone.FREE[@classArray];
};
StreamToken: PUBLIC PROC[frame: Frame, stream: StreamHandle]
RETURNS[found,error: BOOLEAN, token: Object] = {
stack: Stack ← frame.opstk;
nest: CARDINAL ← 0;
localtext: STRING ← [255];
text: TextObject ← TextNew[localtext];
found ← error ← FALSE;
DO ENABLE UNWIND => TextFree[@text]; -- token loop
state: State ← start; TextReset[@text];
IF error THEN state ← aend -- finish all pending arrays
ELSE UNTIL state IN FinalState DO -- character loop
char: CHARACTER; class: Class; action: Action;
IF stream.endof[stream] THEN { char ← 0C; class ← eos } -- end of source
ELSE { char ← stream.get[stream]; class ← CharClass[char] };
SELECT char FROM
Ascii.ControlZ => class ← com; -- treat Bravo trailer as comment
0C => class ← eos; -- treat null as end of source; for Tioga
ENDCASE;
[state,action] ← Transition[state,class]; -- do a state transition
SELECT action FROM -- deal with character
null,skip => { };
take => { TextPut[@text,char] };
back => { stream.putback[stream,char] };
ENDCASE => ERROR Bug;
ENDLOOP;
state and text now describe a token
SELECT state FROM
null => IF nest>0 THEN { error ← TRUE; LOOP } ELSE EXIT;
inum => token ← MakeInteger[TextRead[@text]];
onum => token ← MakeOctal[TextRead[@text]];
rnum => token ← MakeReal[TextRead[@text]];
name,oops => token ← MakeName[TextRead[@text]];
strg => token ← MakeString[TextRead[@text]];
strg2 => {
txt: Text;
[txt,error] ← RemoveEscapes[TextRead[@text]];
token ← MakeString[txt] };
part => { error ← TRUE; token ← MakeString[TextRead[@text]] };
abeg => { nest ← nest + 1; token ← [L,mark[]] };
aend => IF nest>0 THEN {
size: CARDINAL ← CountToMark[stack]; -- count number of elements
array: array Object ← Array[size]; -- allocate array
AStore[stack,array]; -- package it up
Assert[Pop[stack].type=mark]; -- remove mark
token ← array; nest ← nest - 1 }
ELSE token ← MakeName[TextRead[@text]];
ENDCASE;
IF nest>0 THEN Push[stack,token] ELSE { found ← TRUE; EXIT };
ENDLOOP; -- token loop
TextFree[@text];
RETURN[found,error,token];
};
StringToken: PUBLIC PROC[frame: Frame, ob: string Object]
RETURNS[found,error: BOOLEAN, token: Object, rem: string Object] = {
stack: Stack ← frame.opstk;
i: CARDINAL ← 0;
substr: string Object;
nest: CARDINAL ← 0;
beg: CARDINAL ← 0;
len: CARDINAL ← 0;
found ← error ← FALSE;
DO -- token loop
state: State ← start; beg ← i; len ← 0;
IF error THEN state ← aend -- finish all pending arrays
ELSE UNTIL state IN FinalState DO -- character loop
char: CHARACTER; class: Class; action: Action;
IF i>=ob.length THEN { char ← 0C; class ← eos } -- end of source
ELSE { char ← VM.GetChar[ob,i]; class ← CharClass[char]; i ← i + 1 };
[state,action] ← Transition[state,class]; -- do a state transition
SELECT action FROM -- deal with character
skip => { beg ← i };
null => { };
take => { len ← len + 1 };
back => { i ← i - 1 };
ENDCASE => ERROR Bug;
ENDLOOP;
substr ← SubString[ob,beg,len];
state and substr now describe a token
SELECT state FROM
null => IF nest>0 THEN { error ← TRUE; LOOP } ELSE EXIT;
inum => token ← StringToInteger[substr];
onum => token ← StringToOctal[substr];
rnum => token ← StringToReal[substr];
name,oops => { token ← StringToName[substr]; token.tag ← X };
strg => { token ← substr; token.tag ← L };
part,strg2 => { error ← TRUE; token ← substr; token.tag ← L };
abeg => { nest ← nest + 1; token ← [L,mark[]] };
aend => IF nest>0 THEN {
len: CARDINAL ← CountToMark[stack]; -- count number of elements
array: array Object ← Array[len]; -- allocate array
AStore[stack,array]; -- package it up
Assert[Pop[stack].type=mark]; -- remove mark
token ← array; nest ← nest - 1 }
ELSE { token ← StringToName[substr]; token.tag ← X };
ENDCASE;
IF nest>0 THEN Push[stack,token] ELSE { found ← TRUE; EXIT };
ENDLOOP; -- token loop
RETURN[found,error,token,SubString[ob,i,ob.length-i]];
};
Transition: PROC[state: State, class: Class]
RETURNS[State,Action] = INLINE { SELECT state FROM
start => SELECT class FROM
eos => { RETURN[null,null] }; -- no more tokens
crt,dlm => { RETURN[start,skip] }; -- ignore
com => { RETURN[comnt,skip] }; -- begin comment
quo => { RETURN[qstr,skip] }; -- begin string with next char
lpr => { RETURN[paren,skip] }; -- begin string with next char
rpr => { RETURN[oops,take] }; -- extra right paren
lbr => { RETURN[abeg,take] }; -- begin array
rbr => { RETURN[aend,take] }; -- end array
num => { RETURN[ipart,take] }; -- begin integer
sgn => { RETURN[msign,take] }; -- mantissa may follow
dot => { RETURN[point,take] }; -- fraction may follow
oct,exp,bsl,oth => { RETURN[ident,take] }; -- begin a name
ENDCASE;
comnt => SELECT class FROM
eos => { RETURN[null,null] }; -- no more tokens
crt => { RETURN[start,skip] }; -- end of comment
dlm,com,lpr,rpr,lbr,rbr,num,sgn,dot,oct,exp,bsl,quo,oth => { RETURN[comnt,skip] }; -- ignore
ENDCASE;
msign => SELECT class FROM
So far: sign for mantissa
eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish
num => { RETURN[ipart,take] }; -- begin integer
dot => { RETURN[point,take] }; -- fraction may follow
sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
point => SELECT class FROM
So far: (sign and`) decimal point
eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish
num => { RETURN[fpart,take] }; -- begin fraction
dot,sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
ipart => SELECT class FROM
So far: valid integer
eos,crt,dlm => { RETURN[inum,null] }; -- finish integer
com,lpr,rpr,lbr,rbr,quo => { RETURN[inum,back] }; -- back up and finish integer
num => { RETURN[ipart,take] }; -- add integer digit
dot => { RETURN[fpart,take] }; -- fraction follows
oct => { RETURN[octal,take] }; -- octal number
exp => { RETURN[expon,take] }; -- exponent follows
sgn,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
octal => SELECT class FROM
So far: valid integer followed by "B"
eos,crt,dlm => { RETURN[onum,null] }; -- finish octal
com,lpr,rpr,lbr,rbr,quo => { RETURN[onum,back] }; -- back up and finish octal
num,dot,sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
fpart => SELECT class FROM
So far: valid mantissa containing decimal point
eos,crt,dlm => { RETURN[rnum,null] }; -- finish real
com,lpr,rpr,lbr,rbr,quo => { RETURN[rnum,back] }; -- back up and finish real
num => { RETURN[fpart,take] }; -- add fraction digit
exp => { RETURN[expon,take] }; -- exponent follows
dot,sgn,oct,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
expon => SELECT class FROM
So far: valid mantissa followed by "E"
eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish
num => { RETURN[epart,take] }; -- first exponent digit
sgn => { RETURN[esign,take] }; -- exponent sign; digit should follow
dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
esign => SELECT class FROM
So far: valid mantissa followed by "E" and a sign
eos,crt,dlm => { RETURN[name,null] }; -- finish as a name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish
num => { RETURN[epart,take] }; -- first exponent digit
sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
epart => SELECT class FROM
So far: valid real with exponent
eos,crt,dlm => { RETURN[rnum,null] }; -- finish real
com,lpr,rpr,lbr,rbr,quo => { RETURN[rnum,back] }; -- back up and finish real
num => { RETURN[epart,take] }; -- add exponent digit
sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number
ENDCASE;
ident => SELECT class FROM
So far: an identifier
eos,crt,dlm => { RETURN[name,null] }; -- finish name
com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish name
num,sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- append
ENDCASE;
qstr => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
bsl => { RETURN[qstr3,take] }; -- read char after \
quo => { RETURN[strg,null] }; -- finish string
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,oth => { RETURN[qstr,take] }; -- append
ENDCASE;
qstr2 => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
bsl => { RETURN[qstr3,take] }; -- read char after \
quo => { RETURN[strg2,null] }; -- finish string with \'s
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,oth => { RETURN[qstr2,take] }; -- append
ENDCASE;
qstr3 => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,bsl,quo,oth => { RETURN[qstr2,take] }; -- append
ENDCASE;
paren => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
lpr => { RETURN[SUCC[paren],take] }; -- up a level
rpr => { RETURN[strg,null] }; -- finish string
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,quo,bsl,oth => { RETURN[paren,take] }; -- append
ENDCASE;
ENDCASE => SELECT class FROM
eos => { RETURN[part,null] }; -- premature end
lpr => { RETURN[SUCC[state],take] }; -- open
rpr => { RETURN[PRED[state],take] }; -- close
crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,quo,bsl,oth => { RETURN[state,take] }; -- append
ENDCASE;
ERROR Bug; -- unknown state or class
};
LineComplete: PUBLIC PROC[text: Text] RETURNS[BOOLEAN] = {
nest: CARDINAL ← 0;
i: CARDINAL ← 0;
DO -- token loop
state: State ← start;
UNTIL state IN FinalState DO -- character loop
char: CHARACTER; class: Class; action: Action;
IF i>=text.length THEN { char ← 0C; class ← eos } -- end of source
ELSE { class ← CharClass[char ← text[i]]; i ← i + 1 };
[state,action] ← Transition[state,class]; -- do a state transition
SELECT action FROM -- deal with character
null => { };
skip => { --beg ← beg + 1-- };
take => { --len ← len + 1-- };
back => { i ← i - 1 };
ENDCASE => ERROR Bug;
ENDLOOP;
state and text now describe a token
SELECT state FROM
null => EXIT;
part => RETURN[FALSE]; -- incomplete: inside a string
abeg => { nest ← nest + 1 };
aend => { IF nest>0 THEN nest ← nest - 1 };
ENDCASE;
ENDLOOP; -- token loop
RETURN[nest=0]; -- complete if not inside an array
};
Initialization
InstallScanner: PROC[why: InstallReason, frame: Frame] = { SELECT why FROM
init => InitClassArray[];
free => FreeClassArray[];
register => {
overflow ← MakeName[".overflow"L];
RegisterExplicit[frame, ".token"L, JToken];
};
ENDCASE;
};
Install[InstallScanner];
}.