-- JaMScanner.mesa -- Written by: John Warnock, March 7, 1979 -- Last changed by Doug Wyatt, February 10, 1981 7:17 PM DIRECTORY JaMScannerDefs, JaMMasterDefs USING [Frame, Object, Stack], JaMControlDefs USING [GetCurrentFrame, RegisterCommand], JaMExecDefs USING [JaMError, overflow, syntaxerr, typechk], JaMLiteralDefs USING [MakeStringObject], JaMStackDefs USING [Pop, Push, Top], JaMStartDefs USING [GetJaMStream], JaMVMDefs USING [AllocateCharsVM, GetCharsVM, PutCharVM], Ascii USING [CR, SP, TAB, LF, ControlZ, ESC], Inline USING [BITAND, BITSHIFT, LowHalf]; -- This program implements a very simple lexical scanner which works in the -- following way. When the routine "StreamToken" or "StringToken" is called -- then the first token is returned on the top of stack followed by the -- remainder of the string at next-on-stack. -- The implementation strategy is based on a state transition table. -- The indexes into the table are: "charclass" and "state". These two indexes -- determine a procedure which is called to do the appropriate thing on a -- given state transition. JaMScanner: PROGRAM IMPORTS JaMControlDefs,JaMExecDefs,JaMLiteralDefs,JaMStackDefs, JaMStartDefs,JaMVMDefs,Inline EXPORTS JaMScannerDefs = BEGIN OPEN JaMStackDefs,JaMMasterDefs; -- StreamToken is the version of the scanner that takes -- a stream as its input. StreamToken: PUBLIC PROCEDURE [streamobject: StreamType Object,sStack:Stack] RETURNS [BOOLEAN] = BEGIN -- This is the body of the stream token routine. charcount _ 0; ivalue _ 0; token _ FALSE; NameFlag _ FALSE; positive _ TRUE; streamsource _ streamobject; state _ NuetSt; stack _ sStack; string _ [streamsource.litflag,StringType[0,0,]]; DO OPEN stream:streamsource.SHandle; IF stream.endof[@stream] THEN BEGIN charclass _ Eos; IF @stream # JaMStartDefs.GetJaMStream[] THEN stream.destroy[@stream] ELSE stream.reset[@stream]; END ELSE BEGIN char _ stream.get[@stream]; charclass _ CharacterClass[char]; END; StreamDispatch [state][charclass][]; IF token THEN RETURN [NameFlag]; ENDLOOP; END; -- StringToken is the version of the scanner that takes -- a string as its input. StringToken: PUBLIC PROCEDURE [stringobject:StringType Object,sStack:Stack] RETURNS [BOOLEAN] = BEGIN -- This is the body of the string token routine. curcnt: CARDINAL_0; tsoff: CARDINAL_4; tempstring: STRING _ [8]; charcount _ 0; ivalue _ 0; token _FALSE; NameFlag _ FALSE; positive_TRUE; state _ NuetSt; stringsource_stringobject; stack_sStack; string_[stringsource.litflag,StringType[0,0,]]; JaMVMDefs.GetCharsVM[stringsource.Address,stringsource.Offset, @tempstring.text,0,8]; DO IF charcount >= stringsource.Length THEN charclass _ Eos ELSE BEGIN -- the buffering scheme implemented here is not logically -- necessary but is here for efficiency. char _ tempstring[curcnt]; IF (curcnt_curcnt+1) >= 8 THEN BEGIN JaMVMDefs.GetCharsVM[stringsource.Address+tsoff,stringsource.Offset, @tempstring.text,0,8]; tsoff_tsoff+4; curcnt_0; END; charclass _ CharacterClass[char]; END; charcount _ charcount + 1; StringDispatch [state][charclass][]; IF token THEN RETURN [NameFlag]; ENDLOOP; END; CR: CHARACTER = Ascii.CR; stringsource: StringType Object; streamsource: StreamType Object; stack: Stack; ClassRange: TYPE = [0..10]; State: TYPE = [0..8]; Dlm: ClassRange = 0; -- delimiter (SP,TAB,LF,comma) Sgn: ClassRange = 1; -- Sign (+ or -) Dot: ClassRange = 2; -- Period (.) Num: ClassRange = 3; -- numeric Oth: ClassRange = 4; -- other Lpr: ClassRange = 5; -- ( Rpr: ClassRange = 6; -- ) Trl: ClassRange = 7; -- Bravo trailer (^Z) Crt: ClassRange = 8; -- CR Eos: ClassRange = 9; -- end of source Esc: ClassRange = 10; -- escape CharClassIndex: TYPE = CHARACTER[0C..177C]; CharClassArray: TYPE = ARRAY CharClassIndex OF ClassRange; charclassarray: CharClassArray; CharacterClass: PROCEDURE[c: CHARACTER] RETURNS[ClassRange] = INLINE BEGIN RETURN[charclassarray[c]] END; InitCharClasses: PROCEDURE = BEGIN OPEN Ascii; c: CHARACTER; class: POINTER TO CharClassArray=@charclassarray; FOR c IN[0C..177C] DO class[c]_Oth ENDLOOP; -- default is Oth FOR c IN['0..'9] DO class[c]_Num ENDLOOP; -- Numeric (0 to 9) class[SP]_Dlm; class[TAB]_Dlm; class[LF]_Dlm; class[',]_Dlm; -- Delimiters class['+]_Sgn; class['-]_Sgn; -- Signs]_ plus and minus class['.]_Dot; -- Period class['(]_Lpr; class[')]_Rpr; -- Parentheses class[ControlZ]_Trl; -- Bravo formatting delimiter class[CR]_Crt; -- Return and end of Bravo formatting class[ESC]_Esc; -- Escape END; NuetSt:State = 0; PreNum:State = 1; PreFrt:State = 2; ValInt:State = 3; ValRel:State = 4; ValFrt:State = 5; Ident :State = 6; StrLit:State = 7; BravoS:State = 8; StringDispatch: ARRAY State OF ARRAY ClassRange OF PROCEDURE_ -- Dlm Sgn Dot Num Oth Lpr Rpr Trl Crt Eos Esc -- --NuetSt-- [[Null,PrNm,Frc1,Vl1I,Nam1,StBl,SErr,BrvF,Null,RetN,Null], --PreNum-- [NsR0,Name,Frac,VlIn,Name,NsR1,SErr,NsR1,NsR0,NsR1,NsR0], --PreFrt-- [NsR0,Name,Name,VlF2,Name,NsR1,SErr,NsR1,NsR0,NsR1,NsR0], --ValInt-- [NIR0,Name,VlF1,VlIn,Name,NIR1,SErr,NIR1,NIR0,NIR1,NIR0], --ValRel-- [NFR0,Name,VlFR,VlRl,Name,NFR1,SErr,NFR1,NFR0,NFR1,NFR0], --ValFrt-- [NFR0,Name,Name,VlFr,Name,NFR1,SErr,NFR1,NFR0,NFR1,NFR0], --Ident -- [NMR0,Name,Name,Name,Name,NMR1,SErr,NMR1,NMR0,NMR1,NMR0], --StrLit-- [Null,Null,Null,Null,Null,StC1,CDR0,BrvF,Null,SErr,Null], --BravoS-- [Null,Null,Null,Null,Null,Null,Null,Null,NNR0,NNR0,Null]]; -- The following are the action routines associated with the above state transitions. Null:PROCEDURE=(600) BEGIN END;l4268 PrNm:PROCEDURE= BEGIN state_PreNum; valnum _ FALSE; Mark[]; IF char= '- THEN positive _ FALSE; END;l4268 Frac:PROCEDURE= BEGIN state_PreFrt; dval_rval; rvalue _ ivalue; valnum _ FALSE; END;l4268 Frc1:PROCEDURE= BEGIN Mark[]; Frac[]; END;l4268 VlF1:PROCEDURE= BEGIN state_ValFrt; dval_rval; rvalue_ivalue; END;l4268 VlF2:PROCEDURE= BEGIN state_ValFrt; rvalue_ivalue; dval_rval; valnum _ TRUE; AppendFractDigit[char]; END;l4268 VlFr:PROCEDURE= BEGIN valnum _ TRUE; AppendFractDigit[char]; END;l4268 VlIn:PROCEDURE= BEGIN state_ValInt; valnum _ TRUE; IF ~ AppendDigit[char] THEN state_ValRel; END;l4268 VlRl:PROCEDURE= BEGIN AppendRealDigit[char]; END;l4268 VlFR:PROCEDURE= BEGIN state_ValFrt; END;l4268 Vl1I:PROCEDURE= BEGIN Mark[]; VlIn[]; END;l4268 StBl:PROCEDURE= BEGIN state_StrLit; nestcount_1; Mark[]; END;l4268 NsR0:PROCEDURE= BEGIN ReturnStringRemainder[0]; ReturnStringToken[]; END;l4268 NsR1:PROCEDURE= BEGIN ReturnStringRemainder[1]; ReturnStringToken[]; END;l4268 NMR0:PROCEDURE= BEGIN ReturnStringRemainder[0]; ReturnStringToken[]; END;l4268 NMR1:PROCEDURE= BEGIN ReturnStringRemainder[1]; ReturnStringToken[]; END;l4268 Name:PROCEDURE= BEGIN state_Ident; END;l4268 Nam1:PROCEDURE= BEGIN state_Ident; Mark[]; END;l4268 BrvF:PROCEDURE= BEGIN savestate_state; state_BravoS; END;l4268 NIR0:PROCEDURE= BEGIN ReturnStringRemainder[0]; ReturnNumber[]; END;l4268 NIR1:PROCEDURE= BEGIN ReturnStringRemainder[1]; ReturnNumber[]; END;l4268 NFR0:PROCEDURE= BEGIN ReturnStringRemainder[0]; ReturnReal[]; END;l4268 NFR1:PROCEDURE= BEGIN ReturnStringRemainder[1]; ReturnReal[]; END;l4268 StC1:PROCEDURE= BEGIN nestcount _ nestcount +1; END;l4268 CDR0:PROCEDURE= BEGIN nestcount _ nestcount - 1; IF nestcount = 0 THEN BEGIN ReturnStringRemainder[0]; ReturnStringLitToken[]; END; END;l4268 NNR0:PROCEDURE= BEGIN state_savestate; END;l4268 RetN:PROCEDURE= BEGIN ReturnNull[]; END;l4268 SErr:PROCEDURE= BEGIN BadSyntax[]; END;l4268 StreamDispatch: ARRAY State OF ARRAY ClassRange OF PROCEDURE_ -- Dlm Sgn Dot Num Oth Lpr Rpr Trl Crt Eos Esc -- --NuetSt-- [[NullS,PrNmS,Frc1S,Vl1IS,Nam1S,StBlS,SErrS,BrvFS,NsPsS,RetNS,NullS], --PreNum-- [NsR0S,NameS,FracS,VlInS,NameS,NsR1S,SErrS,NsR1S,NsR0S,NsRS ,NsR0S], --PreFrt-- [NsR0S,NameS,NameS,VlF2S,NameS,NsR1S,SErrS,NsR1S,NsR0S,NsRS ,NsR0S], --ValInt-- [NIR0S,NameS,VlF1S,VlInS,NameS,NIR1S,SErrS,NIR1S,NIR0S,NIRS ,NIR0S], --ValRel-- [NFR0S,NameS,VlFRS,VlRlS,NameS,NFR1S,SErrS,NFR1S,NFR0S,NFRS ,NFR0S], --ValFrt-- [NFR0S,NameS,NameS,VlFrS,NameS,NFR1S,SErrS,NFR1S,NFR0S,NFRS ,NFR0S], --Ident -- [NMR0S,NameS,NameS,NameS,NameS,NMR1S,SErrS,NMR1S,NMR0S,NMRS ,NMR0S], --StrLit-- [StLtS,StLtS,StLtS,StLtS,StLtS,StC1S,CDR0S,BrvFS,StLtS,SErr ,NullS], --BravoS-- [NullS,NullS,NullS,NullS,NullS,NullS,NullS,NullS,NNR0S,NNR0S,NullS]]; -- The following are the action routines associated with the above state transitions. NullS: PROCEDURE = BEGIN END;l4268 NsPsS:PROCEDURE= BEGIN ReturnStreamRemainder[0]; token_TRUE; NameFlag _ TRUE; END;l4268 PrNmS:PROCEDURE= BEGIN state_PreNum; valnum _ FALSE; SMark[]; StoreChar[]; IF char= '- THEN positive _ FALSE; END;l4268 FracS:PROCEDURE= BEGIN state_PreFrt; rvalue _ ivalue; dval_rval; valnum _ FALSE; StoreChar[]; END;l4268 Frc1S:PROCEDURE= BEGIN SMark[]; FracS[]; END;l4268 VlF2S:PROCEDURE= BEGIN state_ValFrt; rvalue_ivalue; dval_rval; valnum _ TRUE; AppendFractDigit[char]; StoreChar[]; END;l4268 VlF1S:PROCEDURE= BEGIN state_ValFrt; rvalue_ivalue; dval_rval; StoreChar[]; END;l4268 VlFrS:PROCEDURE= BEGIN valnum _ TRUE; AppendFractDigit[char]; StoreChar[]; END;l4268 VlInS:PROCEDURE= BEGIN state_ValInt; valnum _ TRUE; StoreChar[]; IF ~ AppendDigit[char] THEN state_ValRel; END;l4268 VlRlS:PROCEDURE= BEGIN AppendRealDigit[char]; END;l4268 VlFRS:PROCEDURE= BEGIN state_ValFrt; StoreChar[]; END;l4268 Vl1IS:PROCEDURE= BEGIN SMark[]; VlInS[]; END;l4268 StBlS:PROCEDURE= BEGIN state_StrLit; nestcount_1; SMark[]; END;l4268 NsRS:PROCEDURE= BEGIN ReturnStreamToken[]; END;l4268 NsR0S:PROCEDURE= BEGIN ReturnStreamRemainder[0]; ReturnStreamToken[]; END;l4268 NsR1S:PROCEDURE= BEGIN ReturnStreamRemainder[1]; ReturnStreamToken[]; END;l4268 NMRS:PROCEDURE= BEGIN ReturnStreamToken[]; END;l4268 NMR0S:PROCEDURE= BEGIN ReturnStreamRemainder[0]; ReturnStreamToken[]; END;l4268 NMR1S:PROCEDURE= BEGIN ReturnStreamRemainder[1]; ReturnStreamToken[]; END;l4268 NameS:PROCEDURE= BEGIN state_Ident; StoreChar[]; END;l4268 Nam1S:PROCEDURE= BEGIN state_Ident; SMark[]; StoreChar[]; END;l4268 BrvFS:PROCEDURE= BEGIN savestate_state; state_BravoS; END;l4268 NIRS:PROCEDURE= BEGIN ReturnNumber[]; END;l4268 NIR0S:PROCEDURE= BEGIN ReturnStreamRemainder[0]; ReturnNumber[]; END;l4268 NIR1S:PROCEDURE= BEGIN ReturnStreamRemainder[1]; ReturnNumber[]; END;l4268 NFRS:PROCEDURE= BEGIN ReturnReal[]; END;l4268 NFR0S:PROCEDURE= BEGIN ReturnStreamRemainder[0]; ReturnReal[]; END;l4268 NFR1S:PROCEDURE= BEGIN ReturnStreamRemainder[1]; ReturnReal[]; END;l4268 StLtS:PROCEDURE= BEGIN StoreChar[]; END;l4268 StC1S:PROCEDURE= BEGIN nestcount _ nestcount +1; StoreChar[]; END;l4268 CDR0S:PROCEDURE= BEGIN nestcount _ nestcount - 1; IF nestcount = 0 THEN BEGIN ReturnStreamRemainder[0]; ReturnStreamLitToken[]; END ELSE StoreChar[]; END;l4268 NNR0S:PROCEDURE= BEGIN state_savestate; END;l4268 RetNS:PROCEDURE= BEGIN ReturnNull[]; END;l4268 SErrS:PROCEDURE= BEGIN BadSyntax[]; END;l4268 --These support the above dispatch functions: ReturnNumber: PROCEDURE = BEGIN IF ~positive THEN ivalue _ -ivalue; IF ivalue < 32768 AND ivalue >= -LONG[32768] THEN BEGIN Push[[lit,IntegerType[LowHalf[ivalue]]],stack]; END ELSE BEGIN Push[[lit,LongIntegerType[ivalue]],stack]; END; token_TRUE; NameFlag_FALSE; END;l4268 ReturnReal: PROCEDURE = BEGIN Push[[lit,RealType[IF ~positive THEN -rvalue ELSE rvalue]],stack]; token_TRUE; END;l4268 LowHalf: PROCEDURE [li: LONG INTEGER] RETURNS [INTEGER] = INLINE BEGIN RETURN[Inline.LowHalf[li]] END; ReturnNull: PROCEDURE = BEGIN ob:NullType Object; ob _ [nolit,NullType[]]; Push[ob,stack]; token_TRUE; END;l4268 Mark: PROCEDURE = INLINE BEGIN savecount_charcount; END;l4268 SMark: PROCEDURE = INLINE BEGIN [string.Address,string.Offset]_JaMVMDefs.AllocateCharsVM[0]; string.Length_0; END;l4268 StoreChar: PROCEDURE = INLINE BEGIN JaMVMDefs.PutCharVM[char,string.Address,string.Offset,string.Length]; string.Length _ string.Length+1; END;l4268 ReturnStreamRemainder:PROCEDURE [lst:[0..1] ] = BEGIN IF lst = 1 THEN streamsource.SHandle.putback[streamsource.SHandle,char]; Push[streamsource,stack]; IF char = CR THEN Push[prompt,stack]; END;l4268 ReturnStringRemainder:PROCEDURE [lst:[0..1] ] = BEGIN ofst: CARDINAL_stringsource.Offset+charcount-lst; string.Length _ stringsource.Length-(charcount-lst); string.Address _ stringsource.Address+Inline.BITSHIFT[ofst,-1]; string.Offset _ Inline.BITAND[ofst,1]; Push[string,stack]; END; ReturnStreamToken:PROCEDURE= BEGIN [,]_JaMVMDefs.AllocateCharsVM[string.Length]; Push[string,stack]; NameFlag _ TRUE; token_TRUE; END;l4268 ReturnStreamLitToken:PROCEDURE= BEGIN st:StringType Object_ [lit,StringType[ Offset: string.Offset, Length: string.Length, Address: string.Address]]; [,]_JaMVMDefs.AllocateCharsVM[string.Length]; Push[st,stack]; token_TRUE; END;l4268 ReturnStringToken:PROCEDURE= BEGIN st:StringType Object _ [nolit,StringType[,,]]; ofst:CARDINAL_stringsource.Offset + savecount-1; st.Length _ charcount - savecount; st.Address _ stringsource.Address + Inline.BITSHIFT[ofst,-1]; st.Offset _ Inline.BITAND[ofst,1]; Push[st,stack]; NameFlag _ TRUE; token_TRUE; END; ReturnStringLitToken: PROCEDURE = BEGIN st:StringType Object _ [lit,StringType[,,]]; ofst:CARDINAL_stringsource.Offset + savecount; st.Length _ charcount - savecount-1; st.Address _ stringsource.Address + Inline.BITSHIFT[ofst,-1]; st.Offset _ Inline.BITAND[ofst,1]; Push[st,stack]; token_TRUE; END; AppendDigit: PROCEDURE [d:CHARACTER] RETURNS [b:BOOLEAN] = --INLINE?-- BEGIN MaxVal:LONG INTEGER = 214748364; MaxD: INTEGER = 7; v:INTEGER _ LOOPHOLE[d - '0]; SELECT ivalue FROM < MaxVal =>BEGIN ivalue _ ivalue*10+v; RETURN[TRUE]; END; = MaxVal => IF v <= MaxD THEN BEGIN ivalue _ ivalue*10+v; RETURN[TRUE]; END ELSE IF ~positive AND v = 8 THEN BEGIN ivalue _ 20000000000B; positive _ TRUE; RETURN [TRUE]; END ELSE BEGIN rivalue:REAL_ivalue; rv:REAL_v; rvalue_rivalue*10+rv; RETURN [FALSE]; END;l4268 > MaxVal =>l4268 BEGIN rivalue:REAL_ivalue; rv:REAL_v; rvalue_rivalue*10+rv; RETURN [FALSE]; END;l6808 ENDCASE; END;l4268 AppendRealDigit: PROCEDURE [d:CHARACTER] = INLINE BEGIN v:REAL _ d - '0; rvalue _ rvalue*10 + v; END;l4268 AppendFractDigit: PROCEDURE [d:CHARACTER] = INLINE BEGIN v:REAL _ d - '0; rvalue _ rvalue + v*dval; dval_dval*rval; END;l4268 ConvertOverflow: PROCEDURE = { OPEN JaMExecDefs; ERROR JaMError[overflow,TRUE] }; BadSyntax: PROCEDURE = { OPEN JaMExecDefs; ERROR JaMError[syntaxerr,TRUE] }; Token: PUBLIC PROCEDURE = BEGIN frame:Frame _JaMControlDefs.GetCurrentFrame[]; ob: Object _ Pop[frame.opstk]; WITH dob:ob SELECT FROM StringType => BEGIN strng:StringType Object _ dob; []_StringToken[strng,frame.opstk]; NullChk[]; END; StreamType =>BEGIN strm:StreamType Object _ dob; []_StreamToken[strm,frame.opstk]; NullChk[]; END; ENDCASE => { OPEN JaMExecDefs; ERROR JaMError[typechk,TRUE] }; END; NullChk: PUBLIC PROCEDURE = BEGIN frame:Frame _JaMControlDefs.GetCurrentFrame[]; ob: Object _ Top[frame.opstk]; WITH dob:ob SELECT FROM NullType => BEGIN []_Pop[frame.opstk]; Push[false,frame.opstk]; END; ENDCASE =>BEGIN Push[true,frame.opstk]; END; END; -- Some global variables known to the routines. rvalue: REAL; -- initial value for scanned real. ivalue: LONG INTEGER _ 0; -- initial value for scanned integer. string: StringType Object; prompt: StringType Object; true: BooleanType Object _ [lit,BooleanType[BooleanVal: TRUE]]; false: BooleanType Object _ [lit,BooleanType[BooleanVal: FALSE]]; charclass: ClassRange; --Current Character Class state: State; --Current State savestate: State; --Saved state for Bravo format nestcount: INTEGER _0; --Balanced parentheses count. char: CHARACTER _ 40C; --Set Up Initially with SP. charcount,savecount: CARDINAL _ 0; --Character positions in string. positive: BOOLEAN_ TRUE; --the sign of the current number being scanned. rval: REAL=.1; --constant used for real conversion. dval: REAL_rval; valnum: BOOLEAN _ TRUE; --valid number indicator. token: BOOLEAN _TRUE; NameFlag: BOOLEAN _ TRUE; -- Initialization InitCharClasses; STOP; prompt _ JaMLiteralDefs.MakeStringObject[".prompt"L]; { OPEN JaMControlDefs; RegisterCommand[".token"L,Token]; }; END. DKW March 28, 1980 4:52 PM added StartScanner DKW March 28, 1980 4:52 PM added names for character classes, InitCharClasses DKW April 1, 1980 3:50 PM now uses NotifyCommand, NotifyStringObject DKW May 31, 1980 1:08 AM updated for Mesa6 DKW July 14, 1980 11:21 PM uses InlineDefs.LowHalf instead of MACHINE CODE DKW December 8, 1980 12:16 PM InlineDefs => Inline, IODefs => Ascii several procedures made INLINE DKW February 10, 1981 6:12 PM imports errors from JaMExecDefs; initializes after STOP