-- JaMScanner.mesa
-- Written by: John Warnock, March 7, 1979
-- Last changed by Doug Wyatt, December 8, 1980 12:21 PM

DIRECTORY
JaMScannerDefs,
JaMMasterDefs
USING [Frame, Object, Stack],
JaMControlDefs
USING [GetCurrentFrame, NotifyCommand, NotifyStringObject],
JaMExecDefs
USING [JaMError],
JaMStackDefs
USING [Pop, Push, Top],
JaMStartDefs
USING [GetJaMStream],
JaMVMDefs
USING [AllocateCharsVM, GetCharsVM, PutCharVM],
Ascii
USING [CR,SP,TAB,LF,ControlZ,ESC],
Inline
USING [BITAND, BITSHIFT, LowHalf];


-- This program implements a very simple lexical scanner which works in the
-- following way. When the routine "StreamToken" or "StringToken" is called
-- then the first token is returned on the top of stack followed by the
-- remainder of the string at next-on-stack.

-- The implementation strategy is based on a state transition table.
-- The indexes into the table are: "charclass" and "state". These two indexes
-- determine a procedure which is called to do the appropriate thing on a
-- given state transition.


JaMScanner: PROGRAM
IMPORTS JaMControlDefs,JaMExecDefs,JaMStackDefs,JaMStartDefs,JaMVMDefs,
Inline
EXPORTS JaMScannerDefs =
BEGIN OPEN JaMStackDefs,JaMMasterDefs;

--
StreamToken is the version of the scanner that takes
-- a stream as its input.

StreamToken: PUBLIC PROCEDURE [streamobject: StreamType Object,sStack:Stack]
RETURNS [BOOLEAN] =
BEGIN
-- This is the body of the stream token routine.
charcount ← 0;
ivalue ← 0;
token ← FALSE;
NameFlag ← FALSE;
positive ← TRUE;
streamsource ← streamobject;
state ← NuetSt;
stack ← sStack;
string ← [streamsource.litflag,StringType[0,0,]];

DO OPEN stream:streamsource.SHandle;
IF stream.endof[@stream] THEN
BEGIN
charclass ← Eos;
IF @stream # JaMStartDefs.GetJaMStream[] THEN stream.destroy[@stream]
ELSE stream.reset[@stream];
END
ELSE
BEGIN
char ← stream.get[@stream];
charclass ← CharacterClass[char];
END;
StreamDispatch [state][charclass][];
IF token THEN RETURN [NameFlag];
ENDLOOP;
END;



--
StringToken is the version of the scanner that takes
-- a string as its input.

StringToken: PUBLIC PROCEDURE [stringobject:StringType Object,sStack:Stack]
RETURNS [BOOLEAN] =
BEGIN
-- This is the body of the string token routine.
curcnt: CARDINAL←0;
tsoff: CARDINAL←4;
tempstring: STRING ← [8];
charcount ← 0;
ivalue ← 0;
token ←FALSE;
NameFlag ← FALSE;
positive←TRUE;
state ← NuetSt;
stringsource←stringobject;
stack←sStack;
string←[stringsource.litflag,StringType[0,0,]];
JaMVMDefs.GetCharsVM[stringsource.Address,stringsource.Offset,
@tempstring.text,0,8];
DO
IF charcount >= stringsource.Length THEN charclass ← Eos
ELSE
BEGIN
-- the buffering scheme implemented here is not logically
-- necessary but is here for efficiency.
char ← tempstring[curcnt];
IF (curcnt←curcnt+1) >= 8 THEN
BEGIN
JaMVMDefs.GetCharsVM[stringsource.Address+tsoff,stringsource.Offset,
@tempstring.text,0,8];
tsoff←tsoff+4;
curcnt←0;
END;
charclass ← CharacterClass[char];
END;
charcount ← charcount + 1;
StringDispatch [state][charclass][];
IF token THEN RETURN [NameFlag];
ENDLOOP;
END;


CR: CHARACTER = Ascii.CR;
stringsource: StringType Object;
streamsource: StreamType Object;
stack: Stack;

ClassRange: TYPE = [0..10];
State: TYPE = [0..8];

Dlm: ClassRange = 0; -- delimiter (SP,TAB,LF,comma)
Sgn: ClassRange = 1; -- Sign (+ or -)
Dot: ClassRange = 2; -- Period (.)
Num: ClassRange = 3; -- numeric
Oth: ClassRange = 4; -- other
Lpr: ClassRange = 5; -- (
Rpr: ClassRange = 6; -- )
Trl: ClassRange = 7; -- Bravo trailer (↑Z)
Crt: ClassRange = 8; -- CR
Eos: ClassRange = 9; -- end of source
Esc: ClassRange = 10; -- escape

CharClassIndex: TYPE = CHARACTER[0C..177C];
CharClassArray: TYPE = ARRAY CharClassIndex OF ClassRange;
charclassarray: CharClassArray;
CharacterClass: PROCEDURE[c: CHARACTER] RETURNS[ClassRange] = INLINE
BEGIN RETURN[charclassarray[c]] END;

InitCharClasses: PROCEDURE =
BEGIN OPEN Ascii;
c: CHARACTER;
class: POINTER TO CharClassArray=@charclassarray;

FOR c IN[0C..177C] DO class[c]←Oth ENDLOOP; -- default is Oth
FOR c IN[’0..’9] DO class[c]←Num ENDLOOP; -- Numeric (0 to 9)
class[SP]←Dlm; class[TAB]←Dlm; class[LF]←Dlm; class[’,]←Dlm; -- Delimiters
class[’+]←Sgn; class[’-]←Sgn; -- Signs]← plus and minus
class[’.]←Dot; -- Period
class[’(]←Lpr; class[’)]←Rpr; -- Parentheses
class[ControlZ]←Trl; -- Bravo formatting delimiter
class[CR]←Crt; -- Return and end of Bravo formatting
class[ESC]←Esc; -- Escape
END;

NuetSt:State = 0;
PreNum:State = 1;
PreFrt:State = 2;
ValInt:State = 3;
ValRel:State = 4;
ValFrt:State = 5;
Ident :State = 6;
StrLit:State = 7;
BravoS:State = 8;

StringDispatch: ARRAY State OF ARRAY ClassRange OF PROCEDURE←

-- Dlm Sgn Dot Num Oth Lpr Rpr Trl Crt Eos Esc --

--NuetSt--
[[Null,PrNm,Frc1,Vl1I,Nam1,StBl,SErr,BrvF,Null,RetN,Null],
--PreNum--
[NsR0,Name,Frac,VlIn,Name,NsR1,SErr,NsR1,NsR0,NsR1,NsR0],
--PreFrt--
[NsR0,Name,Name,VlF2,Name,NsR1,SErr,NsR1,NsR0,NsR1,NsR0],
--ValInt--
[NIR0,Name,VlF1,VlIn,Name,NIR1,SErr,NIR1,NIR0,NIR1,NIR0],
--ValRel--
[NFR0,Name,VlFR,VlRl,Name,NFR1,SErr,NFR1,NFR0,NFR1,NFR0],
--ValFrt--
[NFR0,Name,Name,VlFr,Name,NFR1,SErr,NFR1,NFR0,NFR1,NFR0],
--Ident --
[NMR0,Name,Name,Name,Name,NMR1,SErr,NMR1,NMR0,NMR1,NMR0],
--StrLit--
[Null,Null,Null,Null,Null,StC1,CDR0,BrvF,Null,SErr,Null],
--BravoS--
[Null,Null,Null,Null,Null,Null,Null,Null,NNR0,NNR0,Null]];

-- The following are the action routines associated with the above state transitions.


Null:PROCEDURE=
BEGIN
END;

PrNm:PROCEDURE=
BEGIN
state←PreNum;
valnum ← FALSE;
Mark[];
IF char= ’- THEN positive ← FALSE;
END;

Frac:PROCEDURE=
BEGIN
state←PreFrt;
dval←rval;
rvalue ← ivalue;
valnum ← FALSE;
END;

Frc1:PROCEDURE=
BEGIN
Mark[];
Frac[];
END;

VlF1:PROCEDURE=
BEGIN
state←ValFrt;
dval←rval;
rvalue←ivalue;
END;

VlF2
:PROCEDURE=
BEGIN
state←ValFrt;
rvalue←ivalue;
dval←rval;
valnum ← TRUE;
AppendFractDigit[char];
END;

VlFr:PROCEDURE=
BEGIN
valnum ← TRUE;
AppendFractDigit[char];
END;

VlIn:PROCEDURE=
BEGIN
state←ValInt;
valnum ← TRUE;
IF ~ AppendDigit[char] THEN state←ValRel;
END;

VlRl
:PROCEDURE=
BEGIN
AppendRealDigit[char];
END;

VlFR
:PROCEDURE=
BEGIN
state←ValFrt;
END;

Vl1I:PROCEDURE=
BEGIN
Mark[];
VlIn[];
END;

StBl:PROCEDURE=
BEGIN
state←StrLit;
nestcount←1;
Mark[];
END;

NsR0:PROCEDURE=
BEGIN
ReturnStringRemainder[0];
ReturnStringToken[];
END;

NsR1:PROCEDURE=
BEGIN
ReturnStringRemainder[1];
ReturnStringToken[];
END;

NMR0:PROCEDURE=
BEGIN
ReturnStringRemainder[0];
ReturnStringToken[];
END;

NMR1:PROCEDURE=
BEGIN
ReturnStringRemainder[1];
ReturnStringToken[];
END;

Name:PROCEDURE=
BEGIN
state←Ident;
END;

Nam1:PROCEDURE=
BEGIN
state←Ident;
Mark[];
END;

BrvF
:PROCEDURE=
BEGIN
savestate←state;
state←BravoS;
END;

NIR0
:PROCEDURE=
BEGIN
ReturnStringRemainder[0];
ReturnNumber[];
END;

NIR1:PROCEDURE=
BEGIN
ReturnStringRemainder[1];
ReturnNumber[];
END;


NFR0:PROCEDURE=
BEGIN
ReturnStringRemainder[0];
ReturnReal[];
END;

NFR1:PROCEDURE=
BEGIN
ReturnStringRemainder[1];
ReturnReal[];
END;


StC1:PROCEDURE=
BEGIN
nestcount ← nestcount +1;
END;


CDR0:PROCEDURE=
BEGIN
nestcount ← nestcount - 1;
IF nestcount = 0 THEN
BEGIN
ReturnStringRemainder[0];
ReturnStringLitToken[];
END;
END;

NNR0:PROCEDURE=
BEGIN
state←savestate;
END;

RetN:PROCEDURE=
BEGIN
ReturnNull[];
END;

SErr:PROCEDURE=
BEGIN
BadSyntax[];
END;


StreamDispatch: ARRAY State OF ARRAY ClassRange OF PROCEDURE←

-- Dlm Sgn Dot Num Oth Lpr Rpr Trl Crt Eos Esc --

--NuetSt--
[[NullS,PrNmS,Frc1S,Vl1IS,Nam1S,StBlS,SErrS,BrvFS,NsPsS,RetNS,NullS],
--PreNum--
[NsR0S,NameS,FracS,VlInS,NameS,NsR1S,SErrS,NsR1S,NsR0S,NsRS ,NsR0S],
--PreFrt--
[NsR0S,NameS,NameS,VlF2S,NameS,NsR1S,SErrS,NsR1S,NsR0S,NsRS ,NsR0S],
--ValInt--
[NIR0S,NameS,VlF1S,VlInS,NameS,NIR1S,SErrS,NIR1S,NIR0S,NIRS ,NIR0S],
--ValRel--
[NFR0S,NameS,VlFRS,VlRlS,NameS,NFR1S,SErrS,NFR1S,NFR0S,NFRS ,NFR0S],
--ValFrt--
[NFR0S,NameS,NameS,VlFrS,NameS,NFR1S,SErrS,NFR1S,NFR0S,NFRS ,NFR0S],
--Ident --
[NMR0S,NameS,NameS,NameS,NameS,NMR1S,SErrS,NMR1S,NMR0S,NMRS ,NMR0S],
--StrLit--
[StLtS,StLtS,StLtS,StLtS,StLtS,StC1S,CDR0S,BrvFS,StLtS,SErr ,NullS],
--BravoS--
[NullS,NullS,NullS,NullS,NullS,NullS,NullS,NullS,NNR0S,NNR0S,NullS]];

-- The following are the action routines associated with the above state transitions.

NullS
: PROCEDURE =
BEGIN
END;


NsPsS:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
token←TRUE;
NameFlag ← TRUE;
END;

PrNmS:PROCEDURE=
BEGIN
state←PreNum;
valnum ← FALSE;
SMark[];
StoreChar[];
IF char= ’- THEN positive ← FALSE;
END;

FracS:PROCEDURE=
BEGIN
state←PreFrt;
rvalue ← ivalue;
dval←rval;
valnum ← FALSE;
StoreChar[];
END;

Frc1S:PROCEDURE=
BEGIN
SMark[];
FracS[];
END;

VlF2S
:PROCEDURE=
BEGIN
state←ValFrt;
rvalue←ivalue;
dval←rval;
valnum ← TRUE;
AppendFractDigit[char];
StoreChar[];
END;

VlF1S:PROCEDURE=
BEGIN
state←ValFrt;
rvalue←ivalue;
dval←rval;
StoreChar[];
END;

VlFrS:PROCEDURE=
BEGIN
valnum ← TRUE;
AppendFractDigit[char];
StoreChar[];
END;

VlInS:PROCEDURE=
BEGIN
state←ValInt;
valnum ← TRUE;
StoreChar[];
IF ~ AppendDigit[char] THEN state←ValRel;
END;
VlRlS:PROCEDURE=
BEGIN
AppendRealDigit[char];
END;

VlFRS
:PROCEDURE=
BEGIN
state←ValFrt;
StoreChar[];
END;

Vl1IS:PROCEDURE=
BEGIN
SMark[];
VlInS[];
END;

StBlS:PROCEDURE=
BEGIN
state←StrLit;
nestcount←1;
SMark[];
END;

NsRS:PROCEDURE=
BEGIN
ReturnStreamToken[];
END;

NsR0S:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
ReturnStreamToken[];
END;

NsR1S:PROCEDURE=
BEGIN
ReturnStreamRemainder[1];
ReturnStreamToken[];
END;

NMRS:PROCEDURE=
BEGIN
ReturnStreamToken[];
END;

NMR0S:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
ReturnStreamToken[];
END;

NMR1S:PROCEDURE=
BEGIN
ReturnStreamRemainder[1];
ReturnStreamToken[];
END;

NameS:PROCEDURE=
BEGIN
state←Ident;
StoreChar[];
END;

Nam1S:PROCEDURE=
BEGIN
state←Ident;
SMark[];
StoreChar[];
END;

BrvFS
:PROCEDURE=
BEGIN
savestate←state;
state←BravoS;
END;

NIRS:PROCEDURE=
BEGIN
ReturnNumber[];
END;

NIR0S:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
ReturnNumber[];
END;

NIR1S:PROCEDURE=
BEGIN
ReturnStreamRemainder[1];
ReturnNumber[];
END;

NFRS:PROCEDURE=
BEGIN
ReturnReal[];
END;

NFR0S:PROCEDURE=
BEGIN
ReturnStreamRemainder[0];
ReturnReal[];
END;

NFR1S:PROCEDURE=
BEGIN
ReturnStreamRemainder[1];
ReturnReal[];
END;

StLtS:PROCEDURE=
BEGIN
StoreChar[];
END;


StC1S:PROCEDURE=
BEGIN
nestcount ← nestcount +1;
StoreChar[];
END;


CDR0S:PROCEDURE=
BEGIN
nestcount ← nestcount - 1;
IF nestcount = 0 THEN
BEGIN
ReturnStreamRemainder[0];
ReturnStreamLitToken[];
END
ELSE StoreChar[];
END;

NNR0S:PROCEDURE=
BEGIN
state←savestate;
END;

RetNS:PROCEDURE=
BEGIN
ReturnNull[];
END;

SErrS:PROCEDURE=
BEGIN
BadSyntax[];
END;


--These support the above dispatch functions:


ReturnNumber: PROCEDURE =
BEGIN
IF ~positive THEN ivalue ← -ivalue;
IF ivalue < 32768 AND ivalue >= -LONG[32768]
THEN BEGIN
Push[[lit,IntegerType[LowHalf[ivalue]]],stack];
END
ELSE BEGIN
Push[[lit,LongIntegerType[ivalue]],stack];
END;
token←TRUE;
NameFlag←FALSE;
END;

ReturnReal:
PROCEDURE =
BEGIN
Push[[lit,RealType[IF ~positive THEN -rvalue ELSE rvalue]],stack];
token←TRUE;
END;

LowHalf: PROCEDURE [li: LONG INTEGER] RETURNS [INTEGER] =
INLINE BEGIN RETURN[Inline.LowHalf[li]] END;


ReturnNull: PROCEDURE =
BEGIN
ob:NullType Object;
ob ← [nolit,NullType[]];
Push[ob,stack];
token←TRUE;
END;

Mark: PROCEDURE = INLINE
BEGIN
savecount←charcount;
END;

SMark: PROCEDURE = INLINE
BEGIN
[string.Address,string.Offset]←JaMVMDefs.AllocateCharsVM[0];
string.Length←0;
END;

StoreChar: PROCEDURE = INLINE
BEGIN
JaMVMDefs.PutCharVM[char,string.Address,string.Offset,string.Length];
string.Length ← string.Length+1;
END;

ReturnStreamRemainder:PROCEDURE [lst:[0..1] ] =
BEGIN
IF lst = 1 THEN streamsource.SHandle.putback[streamsource.SHandle,char];
Push[streamsource,stack];
IF char = CR THEN Push[prompt,stack];
END;

ReturnStringRemainder
:PROCEDURE [lst:[0..1] ] =
BEGIN
ofst: CARDINAL←stringsource.Offset+charcount-lst;
string.Length ← stringsource.Length-(charcount-lst);
string.Address ← stringsource.Address+Inline.BITSHIFT[ofst,-1];
string.Offset ← Inline.BITAND[ofst,1];
Push[string,stack];
END;

ReturnStreamToken:PROCEDURE=
BEGIN
[,]←JaMVMDefs.AllocateCharsVM[string.Length];
Push[string,stack];
NameFlag ← TRUE;
token←TRUE;
END;

ReturnStreamLitToken:PROCEDURE=
BEGIN
st:StringType Object← [lit,StringType[
Offset: string.Offset,
Length: string.Length,
Address: string.Address]];
[,]←JaMVMDefs.AllocateCharsVM[string.Length];
Push[st,stack];
token←TRUE;
END;

ReturnStringToken:PROCEDURE=
BEGIN
st:StringType Object ← [nolit,StringType[,,]];
ofst:CARDINAL←stringsource.Offset + savecount-1;
st.Length ← charcount - savecount;
st.Address ← stringsource.Address + Inline.BITSHIFT[ofst,-1];
st.Offset ← Inline.BITAND[ofst,1];
Push[st,stack];
NameFlag ← TRUE;
token←TRUE;
END;

ReturnStringLitToken: PROCEDURE =
BEGIN
st:StringType Object ← [lit,StringType[,,]];
ofst:CARDINAL←stringsource.Offset + savecount;
st.Length ← charcount - savecount-1;
st.Address ← stringsource.Address + Inline.BITSHIFT[ofst,-1];
st.Offset ← Inline.BITAND[ofst,1];
Push[st,stack];
token←TRUE;
END;

AppendDigit: PROCEDURE [d:CHARACTER] RETURNS [b:BOOLEAN] = --INLINE?--
BEGIN
MaxVal:LONG INTEGER = 214748364;
MaxD: INTEGER = 7;
v:INTEGER ← LOOPHOLE[d - ’0];
SELECT ivalue FROM
< MaxVal =>BEGIN ivalue ← ivalue*10+v; RETURN[TRUE]; END;
= MaxVal => IF v <= MaxD THEN
BEGIN ivalue ← ivalue*10+v; RETURN[TRUE]; END
ELSE
IF ~positive AND v = 8 THEN
BEGIN
ivalue ← 20000000000B;
positive ← TRUE;
RETURN [TRUE];
END
ELSE
BEGIN
rivalue:REAL←ivalue;
rv:REAL←v;
rvalue←rivalue*10+rv;
RETURN [FALSE];
END;

> MaxVal
=>
BEGIN
rivalue:REAL←ivalue;
rv:REAL←v;
rvalue←rivalue*10+rv;
RETURN [FALSE];
END;
ENDCASE;
END;

AppendRealDigit
: PROCEDURE [d:CHARACTER] = INLINE
BEGIN
v:REAL ← d - ’0;
rvalue ← rvalue*10 + v;
END;


AppendFractDigit: PROCEDURE [d:CHARACTER] = INLINE
BEGIN
v:REAL ← d - ’0;
rvalue ← rvalue + v*dval;
dval←dval*rval;
END;

ConvertOverflow:PROCEDURE =
BEGIN
JaMExecDefs.JaMError[OverFlowErr,TRUE];
END;

BadSyntax:PROCEDURE =
BEGIN
JaMExecDefs.JaMError[SyntaxErr,TRUE];
END;

Token: PUBLIC PROCEDURE =
BEGIN
frame:Frame ←JaMControlDefs.GetCurrentFrame[];
ob: Object ← Pop[frame.opstk];
WITH dob:ob SELECT FROM
StringType=> BEGIN
strng:StringType Object ← dob;
[]←StringToken[strng,frame.opstk];
NullChk[];
END;
StreamType=>BEGIN
strm:StreamType Object ← dob;
[]←StreamToken[strm,frame.opstk];
NullChk[];
END;
ENDCASE=>BEGIN
JaMExecDefs.JaMError[TypeErr,TRUE];
END;
END;

NullChk
: PUBLIC PROCEDURE =
BEGIN
frame:Frame ←JaMControlDefs.GetCurrentFrame[];
ob: Object ← Top[frame.opstk];
WITH dob:ob SELECT FROM
NullType=> BEGIN
[]←Pop[frame.opstk];
Push[false,frame.opstk];
END;
ENDCASE=>BEGIN
Push[true,frame.opstk];
END;
END;
-- Some global variables known to the routines.

rvalue:REAL;
-- initial value for scanned real.
ivalue:LONG INTEGER ← 0;
-- initial value for scanned integer.
string:StringType Object;
TypeErr:StringType Object;
SyntaxErr:StringType Object;
OverFlowErr:StringType Object;
prompt:StringType Object;
true:BooleanType Object ← [lit,BooleanType[BooleanVal:TRUE]];
false:BooleanType Object ← [lit,BooleanType[BooleanVal:FALSE]];
charclass:ClassRange;
--Current Character Class
state:State;
--Current State
savestate:State;
--Saved state for Bravo format
nestcount:INTEGER ←0;
--Balanced parentheses count.
char:CHARACTER ← 40C;
--Set Up Initially with SP.
charcount,savecount:CARDINAL ← 0; --Character positions in string.
positive:BOOLEAN← TRUE;
--the sign of the current number being scanned.
rval:REAL=.1; --constant used for real conversion.
dval:REAL←rval;
valnum:BOOLEAN ← TRUE;
--valid number indicator.
token:BOOLEAN ←TRUE;
NameFlag:BOOLEAN ← TRUE;


StartScanner: PROCEDURE =
BEGIN OPEN JaMControlDefs;
--Set up string objects
NotifyStringObject[@TypeErr, ".typechk"L];
NotifyStringObject[@SyntaxErr, ".syntaxerr"L];
NotifyStringObject[@OverFlowErr, ".overflow"L];
NotifyStringObject[@prompt, ".prompt"L];
-- and one command
NotifyCommand[".token"L,Token];
END;

-- Initialization
InitCharClasses;
StartScanner;

END.

DKW March 28, 1980 4:52 PM
added StartScanner

DKW March 28, 1980 4:52 PM
added names for character classes, InitCharClasses

DKW April 1, 1980 3:50 PM
now uses NotifyCommand, NotifyStringObject

DKW May 31, 1980 1:08 AM
updated for Mesa6

DKW July 14, 1980 11:21 PM
uses InlineDefs.LowHalf instead of MACHINE CODE

DKW December 8, 1980 12:16 PM
InlineDefs => Inline, IODefs => Ascii
several procedures made INLINE