-- JaMScanner.mesa
-- Written by: John Warnock, March 7, 1979
-- Last changed by Doug Wyatt, February 10, 1981 7:17 PM
DIRECTORY
JaMScannerDefs,
JaMMasterDefs USING [Frame, Object, Stack],
JaMControlDefs USING [GetCurrentFrame, RegisterCommand],
JaMExecDefs USING [JaMError, overflow, syntaxerr, typechk],
JaMLiteralDefs USING [MakeStringObject],
JaMStackDefs USING [Pop, Push, Top],
JaMStartDefs USING [GetJaMStream],
JaMVMDefs USING [AllocateCharsVM, GetCharsVM, PutCharVM],
Ascii USING [CR, SP, TAB, LF, ControlZ, ESC],
Inline USING [BITAND, BITSHIFT, LowHalf];
-- This program implements a very simple lexical scanner which works in the
-- following way. When the routine "StreamToken" or "StringToken" is called
-- then the first token is returned on the top of stack followed by the
-- remainder of the string at next-on-stack.
-- The implementation strategy is based on a state transition table.
-- The indexes into the table are: "charclass" and "state". These two indexes
-- determine a procedure which is called to do the appropriate thing on a
-- given state transition.
JaMScanner: PROGRAM
IMPORTS JaMControlDefs,JaMExecDefs,JaMLiteralDefs,JaMStackDefs,
JaMStartDefs,JaMVMDefs,Inline
EXPORTS JaMScannerDefs =
BEGIN OPEN JaMStackDefs,JaMMasterDefs;
-- StreamToken is the version of the scanner that takes
-- a stream as its input.
StreamToken: PUBLIC PROCEDURE [streamobject: StreamType Object,sStack:Stack]
RETURNS [BOOLEAN] =
BEGIN
-- This is the body of the stream token routine.
charcount ← 0;
ivalue ← 0;
token ← FALSE;
NameFlag ← FALSE;
positive ← TRUE;
streamsource ← streamobject;
state ← NuetSt;
stack ← sStack;
string ← [streamsource.litflag,StringType[0,0,]];
DO OPEN stream:streamsource.SHandle;
IF stream.endof[@stream] THEN
BEGIN
charclass ← Eos;
IF @stream # JaMStartDefs.GetJaMStream[] THEN stream.destroy[@stream]
ELSE stream.reset[@stream];
END
ELSE
BEGIN
char ← stream.get[@stream];
charclass ← CharacterClass[char];
END;
StreamDispatch [state][charclass][];
IF token THEN RETURN [NameFlag];
ENDLOOP;
END;
-- StringToken is the version of the scanner that takes
-- a string as its input.
StringToken: PUBLIC PROCEDURE [stringobject:StringType Object,sStack:Stack]
RETURNS [BOOLEAN] =
BEGIN
-- This is the body of the string token routine.
curcnt: CARDINAL←0;
tsoff: CARDINAL←4;
tempstring: STRING ← [8];
charcount ← 0;
ivalue ← 0;
token ←FALSE;
NameFlag ← FALSE;
positive←TRUE;
state ← NuetSt;
stringsource←stringobject;
stack←sStack;
string←[stringsource.litflag,StringType[0,0,]];
JaMVMDefs.GetCharsVM[stringsource.Address,stringsource.Offset,
@tempstring.text,0,8];
DO
IF charcount >= stringsource.Length THEN charclass ← Eos
ELSE
BEGIN
-- the buffering scheme implemented here is not logically
-- necessary but is here for efficiency.
char ← tempstring[curcnt];
IF (curcnt←curcnt+1) >= 8 THEN
BEGIN
JaMVMDefs.GetCharsVM[stringsource.Address+tsoff,stringsource.Offset,
@tempstring.text,0,8];
tsoff←tsoff+4;
curcnt←0;
END;
charclass ← CharacterClass[char];
END;
charcount ← charcount + 1;
StringDispatch [state][charclass][];
IF token THEN RETURN [NameFlag];
ENDLOOP;
END;
CR: CHARACTER = Ascii.CR;
stringsource: StringType Object;
streamsource: StreamType Object;
stack: Stack;
ClassRange: TYPE = [0..10];
State: TYPE = [0..8];
Dlm: ClassRange = 0; -- delimiter (SP,TAB,LF,comma)
Sgn: ClassRange = 1; -- Sign (+ or -)
Dot: ClassRange = 2; -- Period (.)
Num: ClassRange = 3; -- numeric
Oth: ClassRange = 4; -- other
Lpr: ClassRange = 5; -- (
Rpr: ClassRange = 6; -- )
Trl: ClassRange = 7; -- Bravo trailer (↑Z)
Crt: ClassRange = 8; -- CR
Eos: ClassRange = 9; -- end of source
Esc: ClassRange = 10; -- escape
CharClassIndex: TYPE = CHARACTER[0C..177C];
CharClassArray: TYPE = ARRAY CharClassIndex OF ClassRange;
charclassarray: CharClassArray;
CharacterClass: PROCEDURE[c: CHARACTER] RETURNS[ClassRange] = INLINE
BEGIN RETURN[charclassarray[c]] END;
InitCharClasses: PROCEDURE =
BEGIN OPEN Ascii;
c: CHARACTER;
class: POINTER TO CharClassArray=@charclassarray;
FOR c IN[0C..177C] DO class[c]←Oth ENDLOOP; -- default is Oth
FOR c IN[’0..’9] DO class[c]←Num ENDLOOP; -- Numeric (0 to 9)
class[SP]←Dlm; class[TAB]←Dlm; class[LF]←Dlm; class[’,]←Dlm; -- Delimiters
class[’+]←Sgn; class[’-]←Sgn; -- Signs]← plus and minus
class[’.]←Dot; -- Period
class[’(]←Lpr; class[’)]←Rpr; -- Parentheses
class[ControlZ]←Trl; -- Bravo formatting delimiter
class[CR]←Crt; -- Return and end of Bravo formatting
class[ESC]←Esc; -- Escape
END;
NuetSt:State = 0;
PreNum:State = 1;
PreFrt:State = 2;
ValInt:State = 3;
ValRel:State = 4;
ValFrt:State = 5;
Ident :State = 6;
StrLit:State = 7;
BravoS:State = 8;
StringDispatch: ARRAY State OF ARRAY ClassRange OF PROCEDURE←
-- Dlm Sgn Dot Num Oth Lpr Rpr Trl Crt Eos Esc --
--NuetSt--[[Null,PrNm,Frc1,Vl1I,Nam1,StBl,SErr,BrvF,Null,RetN,Null],
--PreNum-- [NsR0,Name,Frac,VlIn,Name,NsR1,SErr,NsR1,NsR0,NsR1,NsR0],
--PreFrt-- [NsR0,Name,Name,VlF2,Name,NsR1,SErr,NsR1,NsR0,NsR1,NsR0],
--ValInt-- [NIR0,Name,VlF1,VlIn,Name,NIR1,SErr,NIR1,NIR0,NIR1,NIR0],
--ValRel-- [NFR0,Name,VlFR,VlRl,Name,NFR1,SErr,NFR1,NFR0,NFR1,NFR0],
--ValFrt-- [NFR0,Name,Name,VlFr,Name,NFR1,SErr,NFR1,NFR0,NFR1,NFR0],
--Ident -- [NMR0,Name,Name,Name,Name,NMR1,SErr,NMR1,NMR0,NMR1,NMR0],
--StrLit-- [Null,Null,Null,Null,Null,StC1,CDR0,BrvF,Null,SErr,Null],
--BravoS-- [Null,Null,Null,Null,Null,Null,Null,Null,NNR0,NNR0,Null]];
-- The following are the action routines associated with the above state transitions.
Null:PROCEDURE=
BEGIN
END;
PrNm:PROCEDURE=
BEGIN
state←PreNum;
valnum ← FALSE;
Mark[];
IF char= ’- THEN positive ← FALSE;
END;
Frac:PROCEDURE=
BEGIN
state←PreFrt;
dval←rval;
rvalue ← ivalue;
valnum ← FALSE;
END;
Frc1:PROCEDURE=
BEGIN
Mark[];
Frac[];
END;
VlF1:PROCEDURE=
BEGIN
state←ValFrt;
dval←rval;
rvalue←ivalue;
END;
VlF2:PROCEDURE=
BEGIN
state←ValFrt;
rvalue←ivalue;
dval←rval;
valnum ← TRUE;
AppendFractDigit[char];
END;
VlFr:PROCEDURE=
BEGIN
valnum ← TRUE;
AppendFractDigit[char];
END;
VlIn:PROCEDURE=
BEGIN
state←ValInt;
valnum ← TRUE;
IF ~ AppendDigit[char] THEN state←ValRel;
END;
VlRl:PROCEDURE=
BEGIN
AppendRealDigit[char];
END;
VlFR:PROCEDURE=
BEGIN
state←ValFrt;
END;
Vl1I:PROCEDURE=
BEGIN
Mark[];
VlIn[];
END;
StBl:PROCEDURE=
BEGIN
state←StrLit;
nestcount←1;
Mark[];
END;
NsR0:PROCEDURE=
BEGIN
ReturnStringRemainder[0];
ReturnStringToken[];
END;
NsR1:PROCEDURE=
BEGIN
ReturnStringRemainder[1];
ReturnStringToken[];
END;
NMR0:PROCEDURE=
BEGIN
ReturnStringRemainder[0];
ReturnStringToken[];
END;
NMR1:PROCEDURE=
BEGIN
ReturnStringRemainder[1];
ReturnStringToken[];
END;
Name:PROCEDURE=
BEGIN
state←Ident;
END;
Nam1:PROCEDURE=
BEGIN
state←Ident;
Mark[];
END;
BrvF:PROCEDURE=
BEGIN
savestate←state;
state←BravoS;
END;
NIR0:PROCEDURE=
BEGIN
ReturnStringRemainder[0];
ReturnNumber[];
END;
NIR1:PROCEDURE=
BEGIN
ReturnStringRemainder[1];
ReturnNumber[];
END;
NFR0:PROCEDURE=
BEGIN
ReturnStringRemainder[0];
ReturnReal[];
END;
NFR1:PROCEDURE=
BEGIN
ReturnStringRemainder[1];
ReturnReal[];
END;
StC1:PROCEDURE=
BEGIN
nestcount ← nestcount +1;
END;
CDR0:PROCEDURE=
BEGIN
nestcount ← nestcount - 1;
IF nestcount = 0 THEN
BEGIN
ReturnStringRemainder[0];
ReturnStringLitToken[];
END;
END;
NNR0:PROCEDURE=
BEGIN
state←savestate;
END;
RetN:PROCEDURE=
BEGIN
ReturnNull[];
END;
SErr:PROCEDURE=
BEGIN
BadSyntax[];
END;
StreamDispatch: ARRAY State OF ARRAY ClassRange OF PROCEDURE←
-- Dlm Sgn Dot Num Oth Lpr Rpr Trl Crt Eos Esc --
--NuetSt--[[NullS,PrNmS,Frc1S,Vl1IS,Nam1S,StBlS,SErrS,BrvFS,NsPsS,RetNS,NullS],
--PreNum-- [NsR0S,NameS,FracS,VlInS,NameS,NsR1S,SErrS,NsR1S,NsR0S,NsRS ,NsR0S],
--PreFrt-- [NsR0S,NameS,NameS,VlF2S,NameS,NsR1S,SErrS,NsR1S,NsR0S,NsRS ,NsR0S],
--ValInt-- [NIR0S,NameS,VlF1S,VlInS,NameS,NIR1S,SErrS,NIR1S,NIR0S,NIRS ,NIR0S],
--ValRel-- [NFR0S,NameS,VlFRS,VlRlS,NameS,NFR1S,SErrS,NFR1S,NFR0S,NFRS ,NFR0S],
--ValFrt-- [NFR0S,NameS,NameS,VlFrS,NameS,NFR1S,SErrS,NFR1S,NFR0S,NFRS ,NFR0S],
--Ident -- [NMR0S,NameS,NameS,NameS,NameS,NMR1S,SErrS,NMR1S,NMR0S,NMRS ,NMR0S],
--StrLit-- [StLtS,StLtS,StLtS,StLtS,StLtS,StC1S,CDR0S,BrvFS,StLtS,SErr ,NullS],
--BravoS-- [NullS,NullS,NullS,NullS,NullS,NullS,NullS,NullS,NNR0S,NNR0S,NullS]];
-- The following are the action routines associated with the above state transitions.
NullS: PROCEDURE =
BEGIN
END;
NsPsS:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
token←TRUE;
NameFlag ← TRUE;
END;
PrNmS:PROCEDURE=
BEGIN
state←PreNum;
valnum ← FALSE;
SMark[];
StoreChar[];
IF char= ’- THEN positive ← FALSE;
END;
FracS:PROCEDURE=
BEGIN
state←PreFrt;
rvalue ← ivalue;
dval←rval;
valnum ← FALSE;
StoreChar[];
END;
Frc1S:PROCEDURE=
BEGIN
SMark[];
FracS[];
END;
VlF2S:PROCEDURE=
BEGIN
state←ValFrt;
rvalue←ivalue;
dval←rval;
valnum ← TRUE;
AppendFractDigit[char];
StoreChar[];
END;
VlF1S:PROCEDURE=
BEGIN
state←ValFrt;
rvalue←ivalue;
dval←rval;
StoreChar[];
END;
VlFrS:PROCEDURE=
BEGIN
valnum ← TRUE;
AppendFractDigit[char];
StoreChar[];
END;
VlInS:PROCEDURE=
BEGIN
state←ValInt;
valnum ← TRUE;
StoreChar[];
IF ~ AppendDigit[char] THEN state←ValRel;
END;
VlRlS:PROCEDURE=
BEGIN
AppendRealDigit[char];
END;
VlFRS:PROCEDURE=
BEGIN
state←ValFrt;
StoreChar[];
END;
Vl1IS:PROCEDURE=
BEGIN
SMark[];
VlInS[];
END;
StBlS:PROCEDURE=
BEGIN
state←StrLit;
nestcount←1;
SMark[];
END;
NsRS:PROCEDURE=
BEGIN
ReturnStreamToken[];
END;
NsR0S:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
ReturnStreamToken[];
END;
NsR1S:PROCEDURE=
BEGIN
ReturnStreamRemainder[1];
ReturnStreamToken[];
END;
NMRS:PROCEDURE=
BEGIN
ReturnStreamToken[];
END;
NMR0S:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
ReturnStreamToken[];
END;
NMR1S:PROCEDURE=
BEGIN
ReturnStreamRemainder[1];
ReturnStreamToken[];
END;
NameS:PROCEDURE=
BEGIN
state←Ident;
StoreChar[];
END;
Nam1S:PROCEDURE=
BEGIN
state←Ident;
SMark[];
StoreChar[];
END;
BrvFS:PROCEDURE=
BEGIN
savestate←state;
state←BravoS;
END;
NIRS:PROCEDURE=
BEGIN
ReturnNumber[];
END;
NIR0S:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
ReturnNumber[];
END;
NIR1S:PROCEDURE=
BEGIN
ReturnStreamRemainder[1];
ReturnNumber[];
END;
NFRS:PROCEDURE=
BEGIN
ReturnReal[];
END;
NFR0S:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
ReturnReal[];
END;
NFR1S:PROCEDURE=
BEGIN
ReturnStreamRemainder[1];
ReturnReal[];
END;
StLtS:PROCEDURE=
BEGIN
StoreChar[];
END;
StC1S:PROCEDURE=
BEGIN
nestcount ← nestcount +1;
StoreChar[];
END;
CDR0S:PROCEDURE=
BEGIN
nestcount ← nestcount - 1;
IF nestcount = 0 THEN
BEGIN
ReturnStreamRemainder[0];
ReturnStreamLitToken[];
END
ELSE StoreChar[];
END;
NNR0S:PROCEDURE=
BEGIN
state←savestate;
END;
RetNS:PROCEDURE=
BEGIN
ReturnNull[];
END;
SErrS:PROCEDURE=
BEGIN
BadSyntax[];
END;
--These support the above dispatch functions:
ReturnNumber: PROCEDURE =
BEGIN
IF ~positive THEN ivalue ← -ivalue;
IF ivalue < 32768 AND ivalue >= -LONG[32768]
THEN BEGIN
Push[[lit,IntegerType[LowHalf[ivalue]]],stack];
END
ELSE BEGIN
Push[[lit,LongIntegerType[ivalue]],stack];
END;
token←TRUE;
NameFlag←FALSE;
END;
ReturnReal: PROCEDURE =
BEGIN
Push[[lit,RealType[IF ~positive THEN -rvalue ELSE rvalue]],stack];
token←TRUE;
END;
LowHalf: PROCEDURE [li: LONG INTEGER] RETURNS [INTEGER] =
INLINE BEGIN RETURN[Inline.LowHalf[li]] END;
ReturnNull: PROCEDURE =
BEGIN
ob:NullType Object;
ob ← [nolit,NullType[]];
Push[ob,stack];
token←TRUE;
END;
Mark: PROCEDURE = INLINE
BEGIN
savecount←charcount;
END;
SMark: PROCEDURE = INLINE
BEGIN
[string.Address,string.Offset]←JaMVMDefs.AllocateCharsVM[0];
string.Length←0;
END;
StoreChar: PROCEDURE = INLINE
BEGIN
JaMVMDefs.PutCharVM[char,string.Address,string.Offset,string.Length];
string.Length ← string.Length+1;
END;
ReturnStreamRemainder:PROCEDURE [lst:[0..1] ] =
BEGIN
IF lst = 1 THEN streamsource.SHandle.putback[streamsource.SHandle,char];
Push[streamsource,stack];
IF char = CR THEN Push[prompt,stack];
END;
ReturnStringRemainder:PROCEDURE [lst:[0..1] ] =
BEGIN
ofst: CARDINAL←stringsource.Offset+charcount-lst;
string.Length ← stringsource.Length-(charcount-lst);
string.Address ← stringsource.Address+Inline.BITSHIFT[ofst,-1];
string.Offset ← Inline.BITAND[ofst,1];
Push[string,stack];
END;
ReturnStreamToken:PROCEDURE=
BEGIN
[,]←JaMVMDefs.AllocateCharsVM[string.Length];
Push[string,stack];
NameFlag ← TRUE;
token←TRUE;
END;
ReturnStreamLitToken:PROCEDURE=
BEGIN
st:StringType Object← [lit,StringType[
Offset: string.Offset,
Length: string.Length,
Address: string.Address]];
[,]←JaMVMDefs.AllocateCharsVM[string.Length];
Push[st,stack];
token←TRUE;
END;
ReturnStringToken:PROCEDURE=
BEGIN
st:StringType Object ← [nolit,StringType[,,]];
ofst:CARDINAL←stringsource.Offset + savecount-1;
st.Length ← charcount - savecount;
st.Address ← stringsource.Address + Inline.BITSHIFT[ofst,-1];
st.Offset ← Inline.BITAND[ofst,1];
Push[st,stack];
NameFlag ← TRUE;
token←TRUE;
END;
ReturnStringLitToken: PROCEDURE =
BEGIN
st:StringType Object ← [lit,StringType[,,]];
ofst:CARDINAL←stringsource.Offset + savecount;
st.Length ← charcount - savecount-1;
st.Address ← stringsource.Address + Inline.BITSHIFT[ofst,-1];
st.Offset ← Inline.BITAND[ofst,1];
Push[st,stack];
token←TRUE;
END;
AppendDigit: PROCEDURE [d:CHARACTER] RETURNS [b:BOOLEAN] = --INLINE?--
BEGIN
MaxVal:LONG INTEGER = 214748364;
MaxD: INTEGER = 7;
v:INTEGER ← LOOPHOLE[d - ’0];
SELECT ivalue FROM
< MaxVal =>BEGIN ivalue ← ivalue*10+v; RETURN[TRUE]; END;
= MaxVal => IF v <= MaxD THEN
BEGIN ivalue ← ivalue*10+v; RETURN[TRUE]; END
ELSE
IF ~positive AND v = 8 THEN
BEGIN
ivalue ← 20000000000B;
positive ← TRUE;
RETURN [TRUE];
END
ELSE
BEGIN
rivalue:REAL←ivalue;
rv:REAL←v;
rvalue←rivalue*10+rv;
RETURN [FALSE];
END;
> MaxVal=>
BEGIN
rivalue:REAL←ivalue;
rv:REAL←v;
rvalue←rivalue*10+rv;
RETURN [FALSE];
END;
ENDCASE;
END;
AppendRealDigit: PROCEDURE [d:CHARACTER] = INLINE
BEGIN
v:REAL ← d - ’0;
rvalue ← rvalue*10 + v;
END;
AppendFractDigit: PROCEDURE [d:CHARACTER] = INLINE
BEGIN
v:REAL ← d - ’0;
rvalue ← rvalue + v*dval;
dval←dval*rval;
END;
ConvertOverflow: PROCEDURE =
{ OPEN JaMExecDefs; ERROR JaMError[overflow,TRUE] };
BadSyntax: PROCEDURE =
{ OPEN JaMExecDefs; ERROR JaMError[syntaxerr,TRUE] };
Token: PUBLIC PROCEDURE =
BEGIN
frame:Frame ←JaMControlDefs.GetCurrentFrame[];
ob: Object ← Pop[frame.opstk];
WITH dob:ob SELECT FROM
StringType=> BEGIN
strng:StringType Object ← dob;
[]←StringToken[strng,frame.opstk];
NullChk[];
END;
StreamType=>BEGIN
strm:StreamType Object ← dob;
[]←StreamToken[strm,frame.opstk];
NullChk[];
END;
ENDCASE=> { OPEN JaMExecDefs; ERROR JaMError[typechk,TRUE] };
END;
NullChk: PUBLIC PROCEDURE =
BEGIN
frame:Frame ←JaMControlDefs.GetCurrentFrame[];
ob: Object ← Top[frame.opstk];
WITH dob:ob SELECT FROM
NullType=> BEGIN
[]←Pop[frame.opstk];
Push[false,frame.opstk];
END;
ENDCASE=>BEGIN
Push[true,frame.opstk];
END;
END;
-- Some global variables known to the routines.
rvalue: REAL;-- initial value for scanned real.
ivalue: LONG INTEGER ← 0;-- initial value for scanned integer.
string: StringType Object;
prompt: StringType Object;
true: BooleanType Object ← [lit,BooleanType[BooleanVal: TRUE]];
false: BooleanType Object ← [lit,BooleanType[BooleanVal: FALSE]];
charclass: ClassRange;--Current Character Class
state: State;--Current State
savestate: State;--Saved state for Bravo format
nestcount: INTEGER ←0;--Balanced parentheses count.
char: CHARACTER ← 40C;--Set Up Initially with SP.
charcount,savecount: CARDINAL ← 0; --Character positions in string.
positive: BOOLEAN← TRUE;--the sign of the current number being scanned.
rval: REAL=.1; --constant used for real conversion.
dval: REAL←rval;
valnum: BOOLEAN ← TRUE;--valid number indicator.
token: BOOLEAN ←TRUE;
NameFlag: BOOLEAN ← TRUE;
-- Initialization
InitCharClasses;
STOP;
prompt ← JaMLiteralDefs.MakeStringObject[".prompt"L];
{ OPEN JaMControlDefs;
RegisterCommand[".token"L,Token];
};
END.
DKW March 28, 1980 4:52 PM
added StartScanner
DKW March 28, 1980 4:52 PM
added names for character classes, InitCharClasses
DKW April 1, 1980 3:50 PM
now uses NotifyCommand, NotifyStringObject
DKW May 31, 1980 1:08 AM
updated for Mesa6
DKW July 14, 1980 11:21 PM
uses InlineDefs.LowHalf instead of MACHINE CODE
DKW December 8, 1980 12:16 PM
InlineDefs => Inline, IODefs => Ascii
several procedures made INLINE
DKW February 10, 1981 6:12 PM
imports errors from JaMExecDefs; initializes after STOP