DIRECTORY Ascii USING [ControlZ, CR, LF, SP, TAB], JaMBasic USING [Object], JaMInternal USING [Frame, Stack], JaMOps USING [Array, Assert, AStore, Bug, CountToMark, Install, InstallReason, MakeName, MakeString, Pop, Push, RegisterExplicit, StringToName, SubString, Text], JaMStorage, JaMScanner, JaMVM USING [GetChar], StreamDefs USING [StreamHandle]; JaMScannerImpl: PROGRAM IMPORTS JaMOps, JaMScanner, JaMStorage, JaMVM EXPORTS JaMOps, JaMScanner = { OPEN VM:JaMVM, JaMScanner, JaMStorage, JaMOps, JaMInternal, JaMBasic; zone: UNCOUNTED ZONE = Zone[]; classArray: LONG POINTER TO ClassArray _ NIL; overflow: PUBLIC name Object; CharClass: PROC[char: CHARACTER] RETURNS[Class] = INLINE { RETURN[classArray[IF char<200C THEN char ELSE char-200B]] }; InitClassArray: PROC = { Assert[classArray=NIL]; classArray _ zone.NEW[ClassArray _ ALL[oth]]; -- default is oth FOR c: CHARACTER IN ['0..'9] DO classArray[c] _ num ENDLOOP; -- Numeric (0 to 9) classArray[0C] _ dlm; classArray[Ascii.SP] _ dlm; classArray[Ascii.TAB] _ dlm; classArray[Ascii.LF] _ dlm; classArray[',] _ dlm; -- Delimiters classArray['.] _ dot; -- Period, decimal point classArray['+] _ classArray['-] _ sgn; -- Signs classArray['B] _ classArray['b] _ oct; -- Octal suffix classArray['E] _ classArray['e] _ exp; -- Exponent classArray['(] _ lpr; -- Left paren classArray[')] _ rpr; -- Right paren classArray['{] _ lbr; -- Left brace classArray['}] _ rbr; -- Right brace classArray['"] _ quo; -- String quote classArray['\\] _ bsl; -- Backslash; escape char in quoted strings classArray['%] _ com; -- Comment classArray[Ascii.CR] _ crt; -- Return; end of comment }; FreeClassArray: PROC = { Assert[classArray#NIL]; zone.FREE[@classArray]; }; StreamToken: PUBLIC PROC[frame: Frame, stream: StreamHandle] RETURNS[found,error: BOOLEAN, token: Object] = { stack: Stack _ frame.opstk; nest: CARDINAL _ 0; localtext: STRING _ [255]; text: TextObject _ TextNew[localtext]; found _ error _ FALSE; DO ENABLE UNWIND => TextFree[@text]; -- token loop state: State _ start; TextReset[@text]; IF error THEN state _ aend -- finish all pending arrays ELSE UNTIL state IN FinalState DO -- character loop char: CHARACTER; class: Class; action: Action; IF stream.endof[stream] THEN { char _ 0C; class _ eos } -- end of source ELSE { char _ stream.get[stream]; class _ CharClass[char] }; SELECT char FROM Ascii.ControlZ => class _ com; -- treat Bravo trailer as comment 0C => class _ eos; -- treat null as end of source; for Tioga ENDCASE; [state,action] _ Transition[state,class]; -- do a state transition SELECT action FROM -- deal with character null,skip => { }; take => { TextPut[@text,char] }; back => { stream.putback[stream,char] }; ENDCASE => ERROR Bug; ENDLOOP; SELECT state FROM null => IF nest>0 THEN { error _ TRUE; LOOP } ELSE EXIT; inum => token _ MakeInteger[TextRead[@text]]; onum => token _ MakeOctal[TextRead[@text]]; rnum => token _ MakeReal[TextRead[@text]]; name,oops => token _ MakeName[TextRead[@text]]; strg => token _ MakeString[TextRead[@text]]; strg2 => { txt: Text; [txt,error] _ RemoveEscapes[TextRead[@text]]; token _ MakeString[txt] }; part => { error _ TRUE; token _ MakeString[TextRead[@text]] }; abeg => { nest _ nest + 1; token _ [L,mark[]] }; aend => IF nest>0 THEN { size: CARDINAL _ CountToMark[stack]; -- count number of elements array: array Object _ Array[size]; -- allocate array AStore[stack,array]; -- package it up Assert[Pop[stack].type=mark]; -- remove mark token _ array; nest _ nest - 1 } ELSE token _ MakeName[TextRead[@text]]; ENDCASE; IF nest>0 THEN Push[stack,token] ELSE { found _ TRUE; EXIT }; ENDLOOP; -- token loop TextFree[@text]; RETURN[found,error,token]; }; StringToken: PUBLIC PROC[frame: Frame, ob: string Object] RETURNS[found,error: BOOLEAN, token: Object, rem: string Object] = { stack: Stack _ frame.opstk; i: CARDINAL _ 0; substr: string Object; nest: CARDINAL _ 0; beg: CARDINAL _ 0; len: CARDINAL _ 0; found _ error _ FALSE; DO -- token loop state: State _ start; beg _ i; len _ 0; IF error THEN state _ aend -- finish all pending arrays ELSE UNTIL state IN FinalState DO -- character loop char: CHARACTER; class: Class; action: Action; IF i>=ob.length THEN { char _ 0C; class _ eos } -- end of source ELSE { char _ VM.GetChar[ob,i]; class _ CharClass[char]; i _ i + 1 }; [state,action] _ Transition[state,class]; -- do a state transition SELECT action FROM -- deal with character skip => { beg _ i }; null => { }; take => { len _ len + 1 }; back => { i _ i - 1 }; ENDCASE => ERROR Bug; ENDLOOP; substr _ SubString[ob,beg,len]; SELECT state FROM null => IF nest>0 THEN { error _ TRUE; LOOP } ELSE EXIT; inum => token _ StringToInteger[substr]; onum => token _ StringToOctal[substr]; rnum => token _ StringToReal[substr]; name,oops => { token _ StringToName[substr]; token.tag _ X }; strg => { token _ substr; token.tag _ L }; part,strg2 => { error _ TRUE; token _ substr; token.tag _ L }; abeg => { nest _ nest + 1; token _ [L,mark[]] }; aend => IF nest>0 THEN { len: CARDINAL _ CountToMark[stack]; -- count number of elements array: array Object _ Array[len]; -- allocate array AStore[stack,array]; -- package it up Assert[Pop[stack].type=mark]; -- remove mark token _ array; nest _ nest - 1 } ELSE { token _ StringToName[substr]; token.tag _ X }; ENDCASE; IF nest>0 THEN Push[stack,token] ELSE { found _ TRUE; EXIT }; ENDLOOP; -- token loop RETURN[found,error,token,SubString[ob,i,ob.length-i]]; }; Transition: PROC[state: State, class: Class] RETURNS[State,Action] = INLINE { SELECT state FROM start => SELECT class FROM eos => { RETURN[null,null] }; -- no more tokens crt,dlm => { RETURN[start,skip] }; -- ignore com => { RETURN[comnt,skip] }; -- begin comment quo => { RETURN[qstr,skip] }; -- begin string with next char lpr => { RETURN[paren,skip] }; -- begin string with next char rpr => { RETURN[oops,take] }; -- extra right paren lbr => { RETURN[abeg,take] }; -- begin array rbr => { RETURN[aend,take] }; -- end array num => { RETURN[ipart,take] }; -- begin integer sgn => { RETURN[msign,take] }; -- mantissa may follow dot => { RETURN[point,take] }; -- fraction may follow oct,exp,bsl,oth => { RETURN[ident,take] }; -- begin a name ENDCASE; comnt => SELECT class FROM eos => { RETURN[null,null] }; -- no more tokens crt => { RETURN[start,skip] }; -- end of comment dlm,com,lpr,rpr,lbr,rbr,num,sgn,dot,oct,exp,bsl,quo,oth => { RETURN[comnt,skip] }; -- ignore ENDCASE; msign => SELECT class FROM eos,crt,dlm => { RETURN[name,null] }; -- finish as a name com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish num => { RETURN[ipart,take] }; -- begin integer dot => { RETURN[point,take] }; -- fraction may follow sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number ENDCASE; point => SELECT class FROM eos,crt,dlm => { RETURN[name,null] }; -- finish as a name com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish num => { RETURN[fpart,take] }; -- begin fraction dot,sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number ENDCASE; ipart => SELECT class FROM eos,crt,dlm => { RETURN[inum,null] }; -- finish integer com,lpr,rpr,lbr,rbr,quo => { RETURN[inum,back] }; -- back up and finish integer num => { RETURN[ipart,take] }; -- add integer digit dot => { RETURN[fpart,take] }; -- fraction follows oct => { RETURN[octal,take] }; -- octal number exp => { RETURN[expon,take] }; -- exponent follows sgn,bsl,oth => { RETURN[ident,take] }; -- not a number ENDCASE; octal => SELECT class FROM eos,crt,dlm => { RETURN[onum,null] }; -- finish octal com,lpr,rpr,lbr,rbr,quo => { RETURN[onum,back] }; -- back up and finish octal num,dot,sgn,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number ENDCASE; fpart => SELECT class FROM eos,crt,dlm => { RETURN[rnum,null] }; -- finish real com,lpr,rpr,lbr,rbr,quo => { RETURN[rnum,back] }; -- back up and finish real num => { RETURN[fpart,take] }; -- add fraction digit exp => { RETURN[expon,take] }; -- exponent follows dot,sgn,oct,bsl,oth => { RETURN[ident,take] }; -- not a number ENDCASE; expon => SELECT class FROM eos,crt,dlm => { RETURN[name,null] }; -- finish as a name com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish num => { RETURN[epart,take] }; -- first exponent digit sgn => { RETURN[esign,take] }; -- exponent sign; digit should follow dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number ENDCASE; esign => SELECT class FROM eos,crt,dlm => { RETURN[name,null] }; -- finish as a name com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish num => { RETURN[epart,take] }; -- first exponent digit sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number ENDCASE; epart => SELECT class FROM eos,crt,dlm => { RETURN[rnum,null] }; -- finish real com,lpr,rpr,lbr,rbr,quo => { RETURN[rnum,back] }; -- back up and finish real num => { RETURN[epart,take] }; -- add exponent digit sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- not a number ENDCASE; ident => SELECT class FROM eos,crt,dlm => { RETURN[name,null] }; -- finish name com,lpr,rpr,lbr,rbr,quo => { RETURN[name,back] }; -- back up and finish name num,sgn,dot,oct,exp,bsl,oth => { RETURN[ident,take] }; -- append ENDCASE; qstr => SELECT class FROM eos => { RETURN[part,null] }; -- premature end bsl => { RETURN[qstr3,take] }; -- read char after \ quo => { RETURN[strg,null] }; -- finish string crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,oth => { RETURN[qstr,take] }; -- append ENDCASE; qstr2 => SELECT class FROM eos => { RETURN[part,null] }; -- premature end bsl => { RETURN[qstr3,take] }; -- read char after \ quo => { RETURN[strg2,null] }; -- finish string with \'s crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,oth => { RETURN[qstr2,take] }; -- append ENDCASE; qstr3 => SELECT class FROM eos => { RETURN[part,null] }; -- premature end crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,lpr,rpr,bsl,quo,oth => { RETURN[qstr2,take] }; -- append ENDCASE; paren => SELECT class FROM eos => { RETURN[part,null] }; -- premature end lpr => { RETURN[SUCC[paren],take] }; -- up a level rpr => { RETURN[strg,null] }; -- finish string crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,quo,bsl,oth => { RETURN[paren,take] }; -- append ENDCASE; ENDCASE => SELECT class FROM eos => { RETURN[part,null] }; -- premature end lpr => { RETURN[SUCC[state],take] }; -- open rpr => { RETURN[PRED[state],take] }; -- close crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,quo,bsl,oth => { RETURN[state,take] }; -- append ENDCASE; ERROR Bug; -- unknown state or class }; LineComplete: PUBLIC PROC[text: Text] RETURNS[BOOLEAN] = { nest: CARDINAL _ 0; i: CARDINAL _ 0; DO -- token loop state: State _ start; UNTIL state IN FinalState DO -- character loop char: CHARACTER; class: Class; action: Action; IF i>=text.length THEN { char _ 0C; class _ eos } -- end of source ELSE { class _ CharClass[char _ text[i]]; i _ i + 1 }; [state,action] _ Transition[state,class]; -- do a state transition SELECT action FROM -- deal with character null => { }; skip => { --beg _ beg + 1-- }; take => { --len _ len + 1-- }; back => { i _ i - 1 }; ENDCASE => ERROR Bug; ENDLOOP; SELECT state FROM null => EXIT; part => RETURN[FALSE]; -- incomplete: inside a string abeg => { nest _ nest + 1 }; aend => { IF nest>0 THEN nest _ nest - 1 }; ENDCASE; ENDLOOP; -- token loop RETURN[nest=0]; -- complete if not inside an array }; InstallScanner: PROC[why: InstallReason, frame: Frame] = { SELECT why FROM init => InitClassArray[]; free => FreeClassArray[]; register => { overflow _ MakeName[".overflow"L]; RegisterExplicit[frame, ".token"L, JToken]; }; ENDCASE; }; Install[InstallScanner]; }. TJaMScannerImpl.mesa Original version by John Warnock, March 7, 1979 Paxton, 22-Jan-82 10:19:46 Russ Atkinson, July 22, 1983 6:39 pm This program implements a very simple lexical scanner which works in the following way. When the routine "StreamToken" or "StringToken" is called then the first token is returned on the top of stack followed by the remainder of the source at next-on-stack. Globals Procedures state and text now describe a token state and substr now describe a token So far: sign for mantissa So far: (sign and`) decimal point So far: valid integer So far: valid integer followed by "B" So far: valid mantissa containing decimal point So far: valid mantissa followed by "E" So far: valid mantissa followed by "E" and a sign So far: valid real with exponent So far: an identifier state and text now describe a token Initialization Ê J˜šœ™Jšœ0™0Jšœ™J™$—J˜šÏk ˜ Jš œœ œœœœ˜(Jšœ œ ˜Jšœ œ˜!šœ˜ Jšœ”˜”—J˜ J˜ Jšœœ ˜Jšœ œ˜ J˜—Jšœ€™€J˜šœ˜Jšœ&˜-Jšœ˜Jšœœ>˜EJ˜—šœ™J˜Jšœ œœ ˜Jš œ œœœœ˜-Jšœ œ ˜J˜—Jšœ ™ J˜š Ïn œœ œœ œ˜:Jšœ œ œœ˜Jšœ$œ ˜0šœœœ˜JšœœŸ˜@Jšœ#Ÿ˜4JšœŸ˜%JšœŸ˜,J˜ Jšœ#˜'—Jšœ˜—Jš œœœ œœ˜=JšœŸ ˜—J˜Jšœ˜J˜J˜—šž œœœ!˜9Jšœœ(˜DJ˜Jšœœ˜J˜Jšœœ˜Jšœœ˜Jšœœ˜Jšœœ˜šœŸ ˜J˜'JšœœŸ˜7š œœœ œŸ˜3Jšœ œ˜.JšœœŸ˜@Jšœ œ5˜EJšœ*Ÿ˜BšœœŸ˜)J˜J˜ J˜J˜Jšœœ˜—Jšœ˜—J˜Jšœ%™%šœ˜Jš œœœ œœœœ˜8J˜(J˜&J˜%Jšœ9œ˜=Jšœ&œ˜*Jšœœœ˜>Jšœ$œ ˜0šœœœ˜JšœœŸ˜?Jšœ"Ÿ˜3JšœŸ˜%JšœŸ˜,J˜ Jšœ-œ˜5—Jšœ˜—Jš œœœ œœ˜=JšœŸ ˜—Jšœ0˜6J˜J˜—šž œœ˜,Jšœœœ˜2šœ œ˜Jšœ œŸ˜/Jšœ œŸ ˜,Jšœ œŸ˜/Jšœ œŸ˜Jšœ˜—šœ œ˜Jšœ!™!JšœœŸ˜9JšœœŸ˜GJšœ œŸ˜0JšœœŸ˜BJšœ˜—šœ œ˜Jšœ™JšœœŸ˜7JšœœŸ˜OJšœ œŸ˜3Jšœ œŸ˜2Jšœ œŸ˜.Jšœ œŸ˜2JšœœŸ˜6Jšœ˜—šœ œ˜Jšœ%™%JšœœŸ˜5JšœœŸ˜MJšœ!œŸ˜FJšœ˜—šœ œ˜Jšœ/™/JšœœŸ˜4JšœœŸ˜LJšœ œŸ˜4Jšœ œŸ˜2JšœœŸ˜>Jšœ˜—šœ œ˜Jšœ&™&JšœœŸ˜9JšœœŸ˜GJšœ œŸ˜6Jšœ œŸ%˜DJšœœŸ˜>Jšœ˜—šœ œ˜Jšœ1™1JšœœŸ˜9JšœœŸ˜GJšœ œŸ˜6JšœœŸ˜BJšœ˜—šœ œ˜Jšœ ™ JšœœŸ˜4JšœœŸ˜LJšœ œŸ˜4JšœœŸ˜BJšœ˜—šœ œ˜Jšœ™JšœœŸ˜4JšœœŸ˜LJšœ!œŸ ˜@Jšœ˜—šœœ˜Jšœ œŸ˜.Jšœ œŸ˜3Jšœ œŸ˜.Jšœ9œŸ ˜WJšœ˜—šœ œ˜Jšœ œŸ˜.Jšœ œŸ˜3Jšœ œŸ˜8Jšœ9œŸ ˜XJšœ˜—šœ œ˜Jšœ œŸ˜.JšœAœŸ ˜`Jšœ˜—šœ œ˜Jšœ œŸ˜.Jšœ œœŸ ˜2Jšœ œŸ˜.Jšœ9œŸ ˜XJšœ˜—šœœ˜Jšœ œŸ˜.Jšœ œœŸ˜,Jšœ œœŸ˜-Jšœ9œŸ ˜XJšœ˜—JšœŸ˜$J˜J˜—š ž œœœ œœ˜:Jšœœ˜Jšœœ˜šœŸ ˜J˜šœœ œŸ˜.Jšœ œ˜.JšœœŸ˜BJšœ2˜6Jšœ*Ÿ˜BšœœŸ˜)J˜ Jšœ Ÿœ˜Jšœ Ÿœ˜J˜Jšœœ˜—Jšœ˜—Jšœ#™#šœ˜Jšœœ˜ JšœœœŸ˜5J˜Jšœ œœ˜+Jšœ˜—JšœŸ ˜—Jšœ Ÿ"˜2J˜J˜J˜—Jšœ™J˜šžœœ'œ˜JJ˜J˜˜ J˜"J˜+J˜—Jšœ˜J˜J˜—J˜J˜J˜J˜J˜—…—-P=î