-- JaMScanner.mesa -- Written by: John Warnock, March 7, 1979 -- Last changed by Doug Wyatt, December 8, 1980 12:21 PM DIRECTORY JaMScannerDefs, JaMMasterDefs USING [Frame, Object, Stack], JaMControlDefs USING [GetCurrentFrame, NotifyCommand, NotifyStringObject], JaMExecDefs USING [JaMError], JaMStackDefs USING [Pop, Push, Top], JaMStartDefs USING [GetJaMStream], JaMVMDefs USING [AllocateCharsVM, GetCharsVM, PutCharVM], Ascii USING [CR,SP,TAB,LF,ControlZ,ESC], Inline USING [BITAND, BITSHIFT, LowHalf]; -- This program implements a very simple lexical scanner which works in the -- following way. When the routine "StreamToken" or "StringToken" is called -- then the first token is returned on the top of stack followed by the -- remainder of the string at next-on-stack. -- The implementation strategy is based on a state transition table. -- The indexes into the table are: "charclass" and "state". These two indexes -- determine a procedure which is called to do the appropriate thing on a -- given state transition. JaMScanner: PROGRAM IMPORTS JaMControlDefs,JaMExecDefs,JaMStackDefs,JaMStartDefs,JaMVMDefs, Inline EXPORTS JaMScannerDefs = BEGIN OPEN JaMStackDefs,JaMMasterDefs; -- StreamToken is the version of the scanner that takes -- a stream as its input. StreamToken: PUBLIC PROCEDURE [streamobject: StreamType Object,sStack:Stack] RETURNS [BOOLEAN] = BEGIN -- This is the body of the stream token routine. charcount _ 0; ivalue _ 0; token _ FALSE; NameFlag _ FALSE; positive _ TRUE; streamsource _ streamobject; state _ NuetSt; stack _ sStack; string _ [streamsource.litflag,StringType[0,0,]]; DO OPEN stream:streamsource.SHandle; IF stream.endof[@stream] THEN BEGIN charclass _ Eos; IF @stream # JaMStartDefs.GetJaMStream[] THEN stream.destroy[@stream] ELSE stream.reset[@stream]; END ELSE BEGIN char _ stream.get[@stream]; charclass _ CharacterClass[char]; END; StreamDispatch [state][charclass][]; IF token THEN RETURN [NameFlag]; ENDLOOP; END; -- StringToken is the version of the scanner that takes -- a string as its input. StringToken: PUBLIC PROCEDURE [stringobject:StringType Object,sStack:Stack] RETURNS [BOOLEAN] = BEGIN -- This is the body of the string token routine. curcnt: CARDINAL_0; tsoff: CARDINAL_4; tempstring: STRING _ [8]; charcount _ 0; ivalue _ 0; token _FALSE; NameFlag _ FALSE; positive_TRUE; state _ NuetSt; stringsource_stringobject; stack_sStack; string_[stringsource.litflag,StringType[0,0,]]; JaMVMDefs.GetCharsVM[stringsource.Address,stringsource.Offset, @tempstring.text,0,8]; DO IF charcount >= stringsource.Length THEN charclass _ Eos ELSE BEGIN -- the buffering scheme implemented here is not logically -- necessary but is here for efficiency. char _ tempstring[curcnt]; IF (curcnt_curcnt+1) >= 8 THEN BEGIN JaMVMDefs.GetCharsVM[stringsource.Address+tsoff,stringsource.Offset, @tempstring.text,0,8]; tsoff_tsoff+4; curcnt_0; END; charclass _ CharacterClass[char]; END; charcount _ charcount + 1; StringDispatch [state][charclass][]; IF token THEN RETURN [NameFlag]; ENDLOOP; END; CR: CHARACTER = Ascii.CR; stringsource: StringType Object; streamsource: StreamType Object; stack: Stack; ClassRange: TYPE = [0..10]; State: TYPE = [0..8]; Dlm: ClassRange = 0; -- delimiter (SP,TAB,LF,comma) Sgn: ClassRange = 1; -- Sign (+ or -) Dot: ClassRange = 2; -- Period (.) Num: ClassRange = 3; -- numeric Oth: ClassRange = 4; -- other Lpr: ClassRange = 5; -- ( Rpr: ClassRange = 6; -- ) Trl: ClassRange = 7; -- Bravo trailer (^Z) Crt: ClassRange = 8; -- CR Eos: ClassRange = 9; -- end of source Esc: ClassRange = 10; -- escape CharClassIndex: TYPE = CHARACTER[0C..177C]; CharClassArray: TYPE = ARRAY CharClassIndex OF ClassRange; charclassarray: CharClassArray; CharacterClass: PROCEDURE[c: CHARACTER] RETURNS[ClassRange] = INLINE BEGIN RETURN[charclassarray[c]] END; InitCharClasses: PROCEDURE = BEGIN OPEN Ascii; c: CHARACTER; class: POINTER TO CharClassArray=@charclassarray; FOR c IN[0C..177C] DO class[c]_Oth ENDLOOP; -- default is Oth FOR c IN['0..'9] DO class[c]_Num ENDLOOP; -- Numeric (0 to 9) class[SP]_Dlm; class[TAB]_Dlm; class[LF]_Dlm; class[',]_Dlm; -- Delimiters class['+]_Sgn; class['-]_Sgn; -- Signs]_ plus and minus class['.]_Dot; -- Period class['(]_Lpr; class[')]_Rpr; -- Parentheses class[ControlZ]_Trl; -- Bravo formatting delimiter class[CR]_Crt; -- Return and end of Bravo formatting class[ESC]_Esc; -- Escape END; NuetSt:State = 0; PreNum:State = 1; PreFrt:State = 2; ValInt:State = 3; ValRel:State = 4; ValFrt:State = 5; Ident :State = 6; StrLit:State = 7; BravoS:State = 8; StringDispatch: ARRAY State OF ARRAY ClassRange OF PROCEDURE_ -- Dlm Sgn Dot Num Oth Lpr Rpr Trl Crt Eos Esc -- --NuetSt-- [[Null,PrNm,Frc1,Vl1I,Nam1,StBl,SErr,BrvF,Null,RetN,Null], --PreNum-- [NsR0,Name,Frac,VlIn,Name,NsR1,SErr,NsR1,NsR0,NsR1,NsR0], --PreFrt-- [NsR0,Name,Name,VlF2,Name,NsR1,SErr,NsR1,NsR0,NsR1,NsR0], --ValInt-- [NIR0,Name,VlF1,VlIn,Name,NIR1,SErr,NIR1,NIR0,NIR1,NIR0], --ValRel-- [NFR0,Name,VlFR,VlRl,Name,NFR1,SErr,NFR1,NFR0,NFR1,NFR0], --ValFrt-- [NFR0,Name,Name,VlFr,Name,NFR1,SErr,NFR1,NFR0,NFR1,NFR0], --Ident -- [NMR0,Name,Name,Name,Name,NMR1,SErr,NMR1,NMR0,NMR1,NMR0], --StrLit-- [Null,Null,Null,Null,Null,StC1,CDR0,BrvF,Null,SErr,Null], --BravoS-- [Null,Null,Null,Null,Null,Null,Null,Null,NNR0,NNR0,Null]]; -- The following are the action routines associated with the above state transitions. Null:PROCEDURE=(600)\1059b10B158b12B68b11B728b12B68b11B3339b1B86b1B1b4B BEGIN END;l4268 PrNm:PROCEDURE=\1b4B BEGIN state_PreNum; valnum _ FALSE; Mark[]; IF char= '- THEN positive _ FALSE; END;l4268 Frac:PROCEDURE=\1b4B BEGIN state_PreFrt; dval_rval; rvalue _ ivalue; valnum _ FALSE; END;l4268 Frc1:PROCEDURE=\1b4B BEGIN Mark[]; Frac[]; END;l4268 VlF1:PROCEDURE=\1b4B BEGIN state_ValFrt; dval_rval; rvalue_ivalue; END;l4268 VlF2:PROCEDURE=\b5B BEGIN state_ValFrt; rvalue_ivalue; dval_rval; valnum _ TRUE; AppendFractDigit[char]; END;l4268 VlFr:PROCEDURE=\1b4B BEGIN valnum _ TRUE; AppendFractDigit[char]; END;l4268 VlIn:PROCEDURE=\1b4B BEGIN state_ValInt; valnum _ TRUE; IF ~ AppendDigit[char] THEN state_ValRel; END;l4268 VlRl:PROCEDURE=\b5B BEGIN AppendRealDigit[char]; END;l4268 VlFR:PROCEDURE=\b5B BEGIN state_ValFrt; END;l4268 Vl1I:PROCEDURE=\1b4B BEGIN Mark[]; VlIn[]; END;l4268 StBl:PROCEDURE=\1b4B BEGIN state_StrLit; nestcount_1; Mark[]; END;l4268 NsR0:PROCEDURE=\1b4B BEGIN ReturnStringRemainder[0]; ReturnStringToken[]; END;l4268 NsR1:PROCEDURE=\1b4B BEGIN ReturnStringRemainder[1]; ReturnStringToken[]; END;l4268 NMR0:PROCEDURE=\1b4B BEGIN ReturnStringRemainder[0]; ReturnStringToken[]; END;l4268 NMR1:PROCEDURE=\1b4B BEGIN ReturnStringRemainder[1]; ReturnStringToken[]; END;l4268 Name:PROCEDURE=\1b4B BEGIN state_Ident; END;l4268 Nam1:PROCEDURE=\1b4B BEGIN state_Ident; Mark[]; END;l4268 BrvF:PROCEDURE=\b5B BEGIN savestate_state; state_BravoS; END;l4268 NIR0:PROCEDURE=\b5B BEGIN ReturnStringRemainder[0]; ReturnNumber[]; END;l4268 NIR1:PROCEDURE=\1b4B BEGIN ReturnStringRemainder[1]; ReturnNumber[]; END;l4268 NFR0:PROCEDURE=\2b4B BEGIN ReturnStringRemainder[0]; ReturnReal[]; END;l4268 NFR1:PROCEDURE=\1b4B BEGIN ReturnStringRemainder[1]; ReturnReal[]; END;l4268 StC1:PROCEDURE=\2b4B BEGIN nestcount _ nestcount +1; END;l4268 CDR0:PROCEDURE=\2b4B BEGIN nestcount _ nestcount - 1; IF nestcount = 0 THEN BEGIN ReturnStringRemainder[0]; ReturnStringLitToken[]; END; END;l4268 NNR0:PROCEDURE=\1b4B BEGIN state_savestate; END;l4268 RetN:PROCEDURE=\1b4B BEGIN ReturnNull[]; END;l4268 SErr:PROCEDURE=\1b4B BEGIN BadSyntax[]; END;l4268 StreamDispatch: ARRAY State OF ARRAY ClassRange OF PROCEDURE_ -- Dlm Sgn Dot Num Oth Lpr Rpr Trl Crt Eos Esc -- --NuetSt-- [[NullS,PrNmS,Frc1S,Vl1IS,Nam1S,StBlS,SErrS,BrvFS,NsPsS,RetNS,NullS], --PreNum-- [NsR0S,NameS,FracS,VlInS,NameS,NsR1S,SErrS,NsR1S,NsR0S,NsRS ,NsR0S], --PreFrt-- [NsR0S,NameS,NameS,VlF2S,NameS,NsR1S,SErrS,NsR1S,NsR0S,NsRS ,NsR0S], --ValInt-- [NIR0S,NameS,VlF1S,VlInS,NameS,NIR1S,SErrS,NIR1S,NIR0S,NIRS ,NIR0S], --ValRel-- [NFR0S,NameS,VlFRS,VlRlS,NameS,NFR1S,SErrS,NFR1S,NFR0S,NFRS ,NFR0S], --ValFrt-- [NFR0S,NameS,NameS,VlFrS,NameS,NFR1S,SErrS,NFR1S,NFR0S,NFRS ,NFR0S], --Ident -- [NMR0S,NameS,NameS,NameS,NameS,NMR1S,SErrS,NMR1S,NMR0S,NMRS ,NMR0S], --StrLit-- [StLtS,StLtS,StLtS,StLtS,StLtS,StC1S,CDR0S,BrvFS,StLtS,SErr ,NullS], --BravoS-- [NullS,NullS,NullS,NullS,NullS,NullS,NullS,NullS,NNR0S,NNR0S,NullS]]; -- The following are the action routines associated with the above state transitions. NullS: PROCEDURE =\877b1B86b6B BEGIN END;l4268 NsPsS:PROCEDURE=\2b5B BEGIN ReturnStreamRemainder[0]; token_TRUE; NameFlag _ TRUE; END;l4268 PrNmS:PROCEDURE=\1b5B BEGIN state_PreNum; valnum _ FALSE; SMark[]; StoreChar[]; IF char= '- THEN positive _ FALSE; END;l4268 FracS:PROCEDURE=\1b5B BEGIN state_PreFrt; rvalue _ ivalue; dval_rval; valnum _ FALSE; StoreChar[]; END;l4268 Frc1S:PROCEDURE=\1b5B BEGIN SMark[]; FracS[]; END;l4268 VlF2S:PROCEDURE=\b6B BEGIN state_ValFrt; rvalue_ivalue; dval_rval; valnum _ TRUE; AppendFractDigit[char]; StoreChar[]; END;l4268 VlF1S:PROCEDURE=\1b5B BEGIN state_ValFrt; rvalue_ivalue; dval_rval; StoreChar[]; END;l4268 VlFrS:PROCEDURE=\1b5B BEGIN valnum _ TRUE; AppendFractDigit[char]; StoreChar[]; END;l4268 VlInS:PROCEDURE=\1b5B BEGIN state_ValInt; valnum _ TRUE; StoreChar[]; IF ~ AppendDigit[char] THEN state_ValRel; END;l4268 VlRlS:PROCEDURE=\b5B BEGIN AppendRealDigit[char]; END;l4268 VlFRS:PROCEDURE=\b6B BEGIN state_ValFrt; StoreChar[]; END;l4268 Vl1IS:PROCEDURE=\1b5B BEGIN SMark[]; VlInS[]; END;l4268 StBlS:PROCEDURE=\1b5B BEGIN state_StrLit; nestcount_1; SMark[]; END;l4268 NsRS:PROCEDURE=\1b4B BEGIN ReturnStreamToken[]; END;l4268 NsR0S:PROCEDURE=\1b5B BEGIN ReturnStreamRemainder[0]; ReturnStreamToken[]; END;l4268 NsR1S:PROCEDURE=\1b5B BEGIN ReturnStreamRemainder[1]; ReturnStreamToken[]; END;l4268 NMRS:PROCEDURE=\1b4B BEGIN ReturnStreamToken[]; END;l4268 NMR0S:PROCEDURE=\1b5B BEGIN ReturnStreamRemainder[0]; ReturnStreamToken[]; END;l4268 NMR1S:PROCEDURE=\1b5B BEGIN ReturnStreamRemainder[1]; ReturnStreamToken[]; END;l4268 NameS:PROCEDURE=\1b5B BEGIN state_Ident; StoreChar[]; END;l4268 Nam1S:PROCEDURE=\1b5B BEGIN state_Ident; SMark[]; StoreChar[]; END;l4268 BrvFS:PROCEDURE=\b6B BEGIN savestate_state; state_BravoS; END;l4268 NIRS:PROCEDURE=\1b4B BEGIN ReturnNumber[]; END;l4268 NIR0S:PROCEDURE=\1b5B BEGIN ReturnStreamRemainder[0]; ReturnNumber[]; END;l4268 NIR1S:PROCEDURE=\1b5B BEGIN ReturnStreamRemainder[1]; ReturnNumber[]; END;l4268 NFRS:PROCEDURE=\1b4B BEGIN ReturnReal[]; END;l4268 NFR0S:PROCEDURE=\1b5B BEGIN ReturnStreamRemainder[0]; ReturnReal[]; END;l4268 NFR1S:PROCEDURE=\1b5B BEGIN ReturnStreamRemainder[1]; ReturnReal[]; END;l4268 StLtS:PROCEDURE=\1b5B BEGIN StoreChar[]; END;l4268 StC1S:PROCEDURE=\2b5B BEGIN nestcount _ nestcount +1; StoreChar[]; END;l4268 CDR0S:PROCEDURE=\2b5B BEGIN nestcount _ nestcount - 1; IF nestcount = 0 THEN BEGIN ReturnStreamRemainder[0]; ReturnStreamLitToken[]; END ELSE StoreChar[]; END;l4268 NNR0S:PROCEDURE=\1b5B BEGIN state_savestate; END;l4268 RetNS:PROCEDURE=\1b5B BEGIN ReturnNull[]; END;l4268 SErrS:PROCEDURE=\1b5B BEGIN BadSyntax[]; END;l4268 --These support the above dispatch functions: ReturnNumber: PROCEDURE =\50b14B BEGIN IF ~positive THEN ivalue _ -ivalue; IF ivalue < 32768 AND ivalue >= -LONG[32768] THEN BEGIN Push[[lit,IntegerType[LowHalf[ivalue]]],stack]; END ELSE BEGIN Push[[lit,LongIntegerType[ivalue]],stack]; END; token_TRUE; NameFlag_FALSE; END;l4268 ReturnReal: PROCEDURE =\b13B BEGIN Push[[lit,RealType[IF ~positive THEN -rvalue ELSE rvalue]],stack]; token_TRUE; END;l4268 LowHalf: PROCEDURE [li: LONG INTEGER] RETURNS [INTEGER] = INLINE BEGIN RETURN[Inline.LowHalf[li]] END; ReturnNull: PROCEDURE =\1b7B102b10B BEGIN ob:NullType Object; ob _ [nolit,NullType[]]; Push[ob,stack]; token_TRUE; END;l4268 Mark: PROCEDURE = INLINE\1b4B BEGIN savecount_charcount; END;l4268 SMark: PROCEDURE = INLINE\1b5B BEGIN [string.Address,string.Offset]_JaMVMDefs.AllocateCharsVM[0]; string.Length_0; END;l4268 StoreChar: PROCEDURE = INLINE\1b9B BEGIN JaMVMDefs.PutCharVM[char,string.Address,string.Offset,string.Length]; string.Length _ string.Length+1; END;l4268 ReturnStreamRemainder:PROCEDURE [lst:[0..1] ] =\2b21B BEGIN IF lst = 1 THEN streamsource.SHandle.putback[streamsource.SHandle,char]; Push[streamsource,stack]; IF char = CR THEN Push[prompt,stack]; END;l4268 ReturnStringRemainder:PROCEDURE [lst:[0..1] ] = BEGIN ofst: CARDINAL_stringsource.Offset+charcount-lst; string.Length _ stringsource.Length-(charcount-lst); string.Address _ stringsource.Address+Inline.BITSHIFT[ofst,-1]; string.Offset _ Inline.BITAND[ofst,1]; Push[string,stack]; END; ReturnStreamToken:PROCEDURE=\b22B272bv17BV BEGIN [,]_JaMVMDefs.AllocateCharsVM[string.Length]; Push[string,stack]; NameFlag _ TRUE; token_TRUE; END;l4268 ReturnStreamLitToken:PROCEDURE=\1bv20BV BEGIN st:StringType Object_ [lit,StringType[ Offset: string.Offset, Length: string.Length, Address: string.Address]]; [,]_JaMVMDefs.AllocateCharsVM[string.Length]; Push[st,stack]; token_TRUE; END;l4268 ReturnStringToken:PROCEDURE= BEGIN st:StringType Object _ [nolit,StringType[,,]]; ofst:CARDINAL_stringsource.Offset + savecount-1; st.Length _ charcount - savecount; st.Address _ stringsource.Address + Inline.BITSHIFT[ofst,-1]; st.Offset _ Inline.BITAND[ofst,1]; Push[st,stack]; NameFlag _ TRUE; token_TRUE; END; ReturnStringLitToken: PROCEDURE = BEGIN st:StringType Object _ [lit,StringType[,,]]; ofst:CARDINAL_stringsource.Offset + savecount; st.Length _ charcount - savecount-1; st.Address _ stringsource.Address + Inline.BITSHIFT[ofst,-1]; st.Offset _ Inline.BITAND[ofst,1]; Push[st,stack]; token_TRUE; END; AppendDigit: PROCEDURE [d:CHARACTER] RETURNS [b:BOOLEAN] = --INLINE?--\b1v17BV306b1v20BV289b11B BEGIN MaxVal:LONG INTEGER = 214748364; MaxD: INTEGER = 7; v:INTEGER _ LOOPHOLE[d - '0]; SELECT ivalue FROM < MaxVal =>BEGIN ivalue _ ivalue*10+v; RETURN[TRUE]; END; = MaxVal => IF v <= MaxD THEN BEGIN ivalue _ ivalue*10+v; RETURN[TRUE]; END ELSE IF ~positive AND v = 8 THEN BEGIN ivalue _ 20000000000B; positive _ TRUE; RETURN [TRUE]; END ELSE BEGIN rivalue:REAL_ivalue; rv:REAL_v; rvalue_rivalue*10+rv; RETURN [FALSE]; END;l4268 > MaxVal =>l4268 BEGIN rivalue:REAL_ivalue; rv:REAL_v; rvalue_rivalue*10+rv; RETURN [FALSE]; END;l6808 ENDCASE; END;l4268 AppendRealDigit: PROCEDURE [d:CHARACTER] = INLINE\b16B BEGIN v:REAL _ d - '0; rvalue _ rvalue*10 + v; END;l4268 AppendFractDigit: PROCEDURE [d:CHARACTER] = INLINE\2b16B BEGIN v:REAL _ d - '0; rvalue _ rvalue + v*dval; dval_dval*rval; END;l4268 ConvertOverflow:PROCEDURE =\1b15B BEGIN JaMExecDefs.JaMError[OverFlowErr,TRUE]; END;l4268 BadSyntax:PROCEDURE =\1b9B BEGIN JaMExecDefs.JaMError[SyntaxErr,TRUE]; END;l4268 Token: PUBLIC PROCEDURE = BEGIN frame:Frame _JaMControlDefs.GetCurrentFrame[]; ob: Object _ Pop[frame.opstk]; WITH dob:ob SELECT FROM StringType => BEGIN strng:StringType Object _ dob; []_StringToken[strng,frame.opstk]; NullChk[]; END; StreamType =>BEGIN strm:StreamType Object _ dob; []_StreamToken[strm,frame.opstk]; NullChk[]; END; ENDCASE =>BEGIN JaMExecDefs.JaMError[TypeErr,TRUE]; END; END; NullChk: PUBLIC PROCEDURE = BEGIN frame:Frame _JaMControlDefs.GetCurrentFrame[]; ob: Object _ Top[frame.opstk]; WITH dob:ob SELECT FROM NullType => BEGIN []_Pop[frame.opstk]; Push[false,frame.opstk]; END; ENDCASE =>BEGIN Push[true,frame.opstk]; END; END; -- Some global variables known to the routines. rvalue:REAL; -- initial value for scanned real. ivalue:LONG INTEGER _ 0; -- initial value for scanned integer. string:StringType Object; TypeErr:StringType Object; SyntaxErr:StringType Object; OverFlowErr:StringType Object; prompt:StringType Object; true:BooleanType Object _ [lit,BooleanType[BooleanVal:TRUE]]; false:BooleanType Object _ [lit,BooleanType[BooleanVal:FALSE]]; charclass:ClassRange; --Current Character Class state:State; --Current State savestate:State; --Saved state for Bravo format nestcount:INTEGER _0; --Balanced parentheses count. char:CHARACTER _ 40C; --Set Up Initially with SP. charcount,savecount:CARDINAL _ 0; --Character positions in string. positive:BOOLEAN_ TRUE; --the sign of the current number being scanned. rval:REAL=.1; --constant used for real conversion. dval:REAL_rval; valnum:BOOLEAN _ TRUE; --valid number indicator. token:BOOLEAN _TRUE; NameFlag:BOOLEAN _ TRUE; StartScanner: PROCEDURE = BEGIN OPEN JaMControlDefs; --Set up string objects NotifyStringObject[@TypeErr, ".typechk"L]; NotifyStringObject[@SyntaxErr, ".syntaxerr"L]; NotifyStringObject[@OverFlowErr, ".overflow"L]; NotifyStringObject[@prompt, ".prompt"L]; -- and one command NotifyCommand[".token"L,Token]; END; -- Initialization InitCharClasses; StartScanner; END. DKW March 28, 1980 4:52 PM added StartScanner\1b5B420b8B1223b12B DKW March 28, 1980 4:52 PM added names for character classes, InitCharClasses DKW April 1, 1980 3:50 PM now uses NotifyCommand, NotifyStringObject DKW May 31, 1980 1:08 AM updated for Mesa6 DKW July 14, 1980 11:21 PM uses InlineDefs.LowHalf instead of MACHINE CODE DKW December 8, 1980 12:16 PM InlineDefs => Inline, IODefs => Ascii several procedures made INLINE