-- JaMScannerImpl.mesa -- Original version by John Warnock, March 7, 1979 -- Last changed by Paxton, 20-Oct-81 14:19:39 DIRECTORY JaMBasic USING [Object], JaMInternal USING [Frame, Stack], JaMOps USING [Array, Assert, AStore, Bug, CountToMark, Error, GetStream, Install, InstallReason, KillStream, MakeName, MakeString, Pop, Push, PushBoolean, RegisterExplicit, StringToName, SubString, Text, typechk], JaMStorage, JaMVM USING [GetChar, GetText], Ascii USING [ControlZ, CR, LF, SP, TAB], Inline USING [BITSHIFT, LongNumber], Real USING [ReadReal, RealException], StreamDefs USING [StreamHandle]; -- This program implements a very simple lexical scanner which works in the -- following way. When the routine "StreamToken" or "StringToken" is called -- then the first token is returned on the top of stack followed by the -- remainder of the source at next-on-stack. JaMScannerImpl: PROGRAM IMPORTS JaMOps, JaMStorage, JaMVM, Inline, Real EXPORTS JaMOps = { OPEN VM:JaMVM, JaMStorage, JaMOps, JaMInternal, JaMBasic; -- Types StreamHandle: TYPE = StreamDefs.StreamHandle; Class: TYPE = { eos, -- end of source crt, -- CR dlm, -- delimiter (SP,TAB,LF,comma) com, -- % comment lpr, -- ( start of string token rpr, -- ) end of string token lbr, -- { start of array token rbr, -- } end of array token num, -- 0..9 numeric sgn, -- + - sign dot, -- . period exp, -- E e exponent oct, -- B b octal suffix oth -- other }; ClassIndex: TYPE = CHARACTER[0C..177C]; ClassArray: TYPE = ARRAY ClassIndex OF Class; State: TYPE = MACHINE DEPENDENT { null, -- end of source, no token found name, -- text is a name strg, -- text is a string literal, without enclosing parens inum, -- text is an integer onum, -- text is an octal number, with final "B" rnum, -- text is a real number abeg, -- text is "{" aend, -- text is "}" oops, -- text is erroneous, like ")" part, -- text is a partial string, up to end of source start, -- skipping leading delimiters comnt, -- skipping over a comment ident, -- scanning an identifier msign, -- so far: sgn ipart, -- so far: num | msign num | ipart num octal, -- so far: ipart B point, -- so far: dot | msign dot fpart, -- so far: point num | ipart dot | fpart num expon, -- so far: ipart E | fpart E esign, -- so far: expon + | expon - epart, -- so far: esign num | epart num paren, -- inside one level of parentheses -- states >paren indicate levels of nested parens (LAST[CARDINAL]) }; FinalState: TYPE = State[null..start); Action: TYPE = { null, -- do nothing (end of source or trailing char) skip, -- skip leading character take, -- append character to text back -- put character back into source }; -- Globals zone: UNCOUNTED ZONE = Zone[]; classArray: LONG POINTER TO ClassArray ← NIL; overflow: name Object; -- Procedures CharClass: PROC[char: CHARACTER] RETURNS[Class] = INLINE { RETURN[classArray[IF char<200C THEN char ELSE char-200B]] }; InitClassArray: PROC = { Assert[classArray=NIL]; classArray ← zone.NEW[ClassArray ← ALL[oth]]; -- default is oth FOR c: CHARACTER IN ['0..'9] DO classArray[c] ← num ENDLOOP; -- Numeric (0 to 9) classArray[0C] ← dlm; classArray[Ascii.SP] ← dlm; classArray[Ascii.TAB] ← dlm; classArray[Ascii.LF] ← dlm; classArray[',] ← dlm; -- Delimiters classArray['.] ← dot; -- Period, decimal point classArray['+] ← classArray['-] ← sgn; -- Signs classArray['B] ← classArray['b] ← oct; -- Octal suffix classArray['E] ← classArray['e] ← exp; -- Exponent classArray['(] ← lpr; -- Left paren classArray[')] ← rpr; -- Right paren classArray['{] ← lbr; -- Left brace classArray['}] ← rbr; -- Right brace classArray['%] ← com; -- Comment classArray[Ascii.CR] ← crt; -- Return; end of comment }; FreeClassArray: PROC = { Assert[classArray#NIL]; zone.FREE[@classArray]; }; StreamToken: PUBLIC PROC[frame: Frame, stream: StreamHandle] RETURNS[found,error: BOOLEAN, token: Object] = { stack: Stack ← frame.opstk; nest: CARDINAL ← 0; localtext: STRING ← [255]; text: TextObject ← TextNew[localtext]; found ← error ← FALSE; DO ENABLE UNWIND => TextFree[@text]; -- token loop state: State ← start; TextReset[@text]; IF error THEN state ← aend -- finish all pending arrays ELSE UNTIL state IN FinalState DO -- character loop char: CHARACTER; class: Class; action: Action; IF stream.endof[stream] THEN { char ← 0C; class ← eos } -- end of source ELSE { char ← stream.get[stream]; class ← CharClass[char] }; SELECT char FROM Ascii.ControlZ => class ← com; -- treat Bravo trailer as comment 0C => class ← eos; -- treat null as end of source; for Tioga ENDCASE; [state,action] ← Transition[state,class]; -- do a state transition SELECT action FROM -- deal with character null,skip => { }; take => { TextPut[@text,char] }; back => { stream.putback[stream,char] }; ENDCASE => ERROR Bug; ENDLOOP; -- state and text now describe a token SELECT state FROM null => IF nest>0 THEN { error ← TRUE; LOOP } ELSE EXIT; inum => token ← MakeInteger[TextRead[@text]]; onum => token ← MakeOctal[TextRead[@text]]; rnum => token ← MakeReal[TextRead[@text]]; name,oops => token ← MakeName[TextRead[@text]]; strg => token ← MakeString[TextRead[@text]]; part => { error ← TRUE; token ← MakeString[TextRead[@text]] }; abeg => { nest ← nest + 1; token ← [L,mark[]] }; aend => IF nest>0 THEN { size: CARDINAL ← CountToMark[stack]; -- count number of elements array: array Object ← Array[size]; -- allocate array AStore[stack,array]; -- package it up Assert[Pop[stack].type=mark]; -- remove mark token ← array; nest ← nest - 1 } ELSE token ← MakeName[TextRead[@text]]; ENDCASE; IF nest>0 THEN Push[stack,token] ELSE { found ← TRUE; EXIT }; ENDLOOP; -- token loop TextFree[@text]; RETURN[found,error,token]; }; StringToken: PUBLIC PROC[frame: Frame, ob: string Object] RETURNS[found,error: BOOLEAN, token: Object, rem: string Object] = { stack: Stack ← frame.opstk; i: CARDINAL ← 0; substr: string Object; nest: CARDINAL ← 0; beg: CARDINAL ← 0; len: CARDINAL ← 0; found ← error ← FALSE; DO -- token loop state: State ← start; beg ← i; len ← 0; IF error THEN state ← aend -- finish all pending arrays ELSE UNTIL state IN FinalState DO -- character loop char: CHARACTER; class: Class; action: Action; IF i>=ob.length THEN { char ← 0C; class ← eos } -- end of source ELSE { char ← VM.GetChar[ob,i]; class ← CharClass[char]; i ← i + 1 }; [state,action] ← Transition[state,class]; -- do a state transition SELECT action FROM -- deal with character skip => { beg ← i }; null => { }; take => { len ← len + 1 }; back => { i ← i - 1 }; ENDCASE => ERROR Bug; ENDLOOP; substr ← SubString[ob,beg,len]; -- state and substr now describe a token SELECT state FROM null => IF nest>0 THEN { error ← TRUE; LOOP } ELSE EXIT; inum => token ← StringToInteger[substr]; onum => token ← StringToOctal[substr]; rnum => token ← StringToReal[substr]; name,oops => { token ← StringToName[substr]; token.tag ← X }; strg => { token ← substr; token.tag ← L }; part => { error ← TRUE; token ← substr; token.tag ← L }; abeg => { nest ← nest + 1; token ← [L,mark[]] }; aend => IF nest>0 THEN { len: CARDINAL ← CountToMark[stack]; -- count number of elements array: array Object ← Array[len]; -- allocate array AStore[stack,array]; -- package it up Assert[Pop[stack].type=mark]; -- remove mark token ← array; nest ← nest - 1 } ELSE { token ← StringToName[substr]; token.tag ← X }; ENDCASE; IF nest>0 THEN Push[stack,token] ELSE { found ← TRUE; EXIT }; ENDLOOP; -- token loop RETURN[found,error,token,SubString[ob,i,ob.length-i]]; }; LineComplete: PUBLIC PROC[text: Text] RETURNS[BOOLEAN] = { nest: CARDINAL ← 0; i: CARDINAL ← 0; DO -- token loop state: State ← start; UNTIL state IN FinalState DO -- character loop char: CHARACTER; class: Class; action: Action; IF i>=text.length THEN { char ← 0C; class ← eos } -- end of source ELSE { class ← CharClass[char ← text[i]]; i ← i + 1 }; [state,action] ← Transition[state,class]; -- do a state transition SELECT action FROM -- deal with character null => { }; skip => { --beg ← beg + 1-- }; take => { --len ← len + 1-- }; back => { i ← i - 1 }; ENDCASE => ERROR Bug; ENDLOOP; -- state and text now describe a token SELECT state FROM null => EXIT; part => RETURN[FALSE]; -- incomplete: inside a string abeg => { nest ← nest + 1 }; aend => { IF nest>0 THEN nest ← nest - 1 }; ENDCASE; ENDLOOP; -- token loop RETURN[nest=0]; -- complete if not inside an array }; Transition: PROC[state: State, class: Class] RETURNS[State,Action] = INLINE { SELECT state FROM start => SELECT class FROM eos => { RETURN[null,null] }; -- no more tokens crt,dlm => { RETURN[start,skip] }; -- ignore com => { RETURN[comnt,skip] }; -- begin comment lpr => { RETURN[paren,skip] }; -- begin string with next char rpr => { RETURN[oops,take] }; -- extra right paren lbr => { RETURN[abeg,take] }; -- begin array rbr => { RETURN[aend,take] }; -- end array num => { RETURN[ipart,take] }; -- begin integer sgn => { RETURN[msign,take] }; -- mantissa may follow dot => { RETURN[point,take] }; -- fraction may follow oct,exp,oth => { RETURN[ident,take] }; -- begin a name ENDCASE; comnt => SELECT class FROM eos => { RETURN[null,null] }; -- no more tokens crt => { RETURN[start,skip] }; -- end of comment dlm,com,lpr,rpr,lbr,rbr,num,sgn,dot,oct,exp,oth => { RETURN[comnt,skip] }; -- ignore ENDCASE; msign => SELECT class FROM -- So far: sign for mantissa eos,crt,dlm => { RETURN[name,null] }; -- finish as a name com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish num => { RETURN[ipart,take] }; -- begin integer dot => { RETURN[point,take] }; -- fraction may follow sgn,oct,exp,oth => { RETURN[ident,take] }; -- not a number ENDCASE; point => SELECT class FROM -- So far: (sign and`) decimal point eos,crt,dlm => { RETURN[name,null] }; -- finish as a name com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish num => { RETURN[fpart,take] }; -- begin fraction dot,sgn,oct,exp,oth => { RETURN[ident,take] }; -- not a number ENDCASE; ipart => SELECT class FROM -- So far: valid integer eos,crt,dlm => { RETURN[inum,null] }; -- finish integer com,lpr,rpr,lbr,rbr => { RETURN[inum,back] }; -- back up and finish integer num => { RETURN[ipart,take] }; -- add integer digit dot => { RETURN[fpart,take] }; -- fraction follows oct => { RETURN[octal,take] }; -- octal number exp => { RETURN[expon,take] }; -- exponent follows sgn,oth => { RETURN[ident,take] }; -- not a number ENDCASE; octal => SELECT class FROM -- So far: valid integer followed by "B" eos,crt,dlm => { RETURN[onum,null] }; -- finish octal com,lpr,rpr,lbr,rbr => { RETURN[onum,back] }; -- back up and finish octal num,dot,sgn,oct,exp,oth => { RETURN[ident,take] }; -- not a number ENDCASE; fpart => SELECT class FROM -- So far: valid mantissa containing decimal point eos,crt,dlm => { RETURN[rnum,null] }; -- finish real com,lpr,rpr,lbr,rbr => { RETURN[rnum,back] }; -- back up and finish real num => { RETURN[fpart,take] }; -- add fraction digit exp => { RETURN[expon,take] }; -- exponent follows dot,sgn,oct,oth => { RETURN[ident,take] }; -- not a number ENDCASE; expon => SELECT class FROM -- So far: valid mantissa followed by "E" eos,crt,dlm => { RETURN[name,null] }; -- finish as a name com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish num => { RETURN[epart,take] }; -- first exponent digit sgn => { RETURN[esign,take] }; -- exponent sign; digit should follow dot,oct,exp,oth => { RETURN[ident,take] }; -- not a number ENDCASE; esign => SELECT class FROM -- So far: valid mantissa followed by "E" and a sign eos,crt,dlm => { RETURN[name,null] }; -- finish as a name com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish num => { RETURN[epart,take] }; -- first exponent digit sgn,dot,oct,exp,oth => { RETURN[ident,take] }; -- not a number ENDCASE; epart => SELECT class FROM -- So far: valid real with exponent eos,crt,dlm => { RETURN[rnum,null] }; -- finish real com,lpr,rpr,lbr,rbr => { RETURN[rnum,back] }; -- back up and finish real num => { RETURN[epart,take] }; -- add exponent digit sgn,dot,oct,exp,oth => { RETURN[ident,take] }; -- not a number ENDCASE; ident => SELECT class FROM -- So far: an identifier eos,crt,dlm => { RETURN[name,null] }; -- finish name com,lpr,rpr,lbr,rbr => { RETURN[name,back] }; -- back up and finish name num,sgn,dot,oct,exp,oth => { RETURN[ident,take] }; -- append ENDCASE; paren => SELECT class FROM eos => { RETURN[part,null] }; -- premature end lpr => { RETURN[SUCC[paren],take] }; -- up a level rpr => { RETURN[strg,null] }; -- finish string crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,oth => { RETURN[paren,take] }; -- append ENDCASE; ENDCASE => SELECT class FROM eos => { RETURN[part,null] }; -- premature end lpr => { RETURN[SUCC[state],take] }; -- open rpr => { RETURN[PRED[state],take] }; -- close crt,dlm,com,lbr,rbr,num,sgn,dot,oct,exp,oth => { RETURN[state,take] }; -- append ENDCASE; ERROR Bug; -- unknown state or class }; MakeOctal: PROC[text: LONG STRING] RETURNS[Object] = { val: Inline.LongNumber ← [lc[0]]; neg: BOOLEAN ← FALSE; i: CARDINAL ← 0; SELECT text[0] FROM '+ => { i ← 1 }; '- => { neg ← TRUE; i ← 1 }; ENDCASE; FOR j: CARDINAL IN[i..text.length-1) DO c: CHARACTER ← text[j]; -- use shifting to multiply by 8 val.high ← Inline.BITSHIFT[val.high,3] + Inline.BITSHIFT[val.low,3-16]; val.low ← Inline.BITSHIFT[val.low,3]; IF c>'0 THEN val.lc ← val.lc + (c-'0); ENDLOOP; RETURN[[L,integer[IF neg THEN -val.li ELSE val.li]]]; }; MakeInteger: PROC[text: LONG STRING] RETURNS[Object] = { val: LONG CARDINAL ← 0; max: LONG CARDINAL = LAST[LONG CARDINAL]; lim: LONG CARDINAL = max/10; -- lim*10 <= max int: LONG INTEGER; neg: BOOLEAN ← FALSE; i: CARDINAL ← 0; SELECT text[0] FROM '+ => { i ← 1 }; '- => { neg ← TRUE; i ← 1 }; ENDCASE; FOR j: CARDINAL IN[i..text.length) WHILE val<max DO c: CHARACTER ← text[j]; val ← IF val<=lim THEN val*10 ELSE max; IF c>'0 THEN { d: CARDINAL ← c-'0; val ← IF d<(max-val) THEN val+d ELSE max }; ENDLOOP; int ← LOOPHOLE[val]; IF neg THEN int ← -int; IF (int<0)=neg THEN RETURN[[L,integer[int]]] ELSE RETURN[MakeReal[text]]; }; MakeReal: PROC[text: LONG STRING] RETURNS[Object] = { i: CARDINAL ← 0; Get: PROC RETURNS[CHARACTER] = { c: CHARACTER ← Ascii.CR; IF i<text.length THEN { c ← text[i]; i ← i + 1 }; RETURN[c] }; ob: Object; { ENABLE Real.RealException => { ob ← overflow; CONTINUE }; r: REAL ← Real.ReadReal[Get]; ob ← [L,real[r]] }; RETURN[ob]; }; StringToOctal: PROC[string: string Object] RETURNS[Object] = { text: STRING ← [20]; IF string.length>text.maxlength THEN { len: CARDINAL = text.maxlength; VM.GetText[SubString[string,string.length-len,len],text]; text[0] ← VM.GetChar[string,0] } -- preserve sign ELSE VM.GetText[string,text]; RETURN[MakeOctal[text]]; }; StringToInteger: PROC[string: string Object] RETURNS[Object] = { text: STRING ← [20]; IF string.length>text.maxlength THEN RETURN[StringToReal[string]] ELSE { VM.GetText[string,text]; RETURN[MakeInteger[text]] }; }; StringToReal: PROC[string: string Object] RETURNS[Object] = { i: CARDINAL ← 0; Get: PROC RETURNS[CHARACTER] = { c: CHARACTER ← Ascii.CR; IF i<string.length THEN { c ← VM.GetChar[string,i]; i ← i + 1 }; RETURN[c] }; ob: Object; { ENABLE Real.RealException => { ob ← overflow; CONTINUE }; r: REAL ← Real.ReadReal[Get]; ob ← [L,real[r]] }; RETURN[ob]; }; -- One intrinsic JToken: PUBLIC PROC[frame: Frame] = { ob: Object ← Pop[frame.opstk]; found,error: BOOLEAN; tok,rem: Object; WITH ob:ob SELECT FROM string => [found,error,tok,rem] ← StringToken[frame,ob]; stream => { s: StreamHandle ← GetStream[ob]; [found,error,tok] ← StreamToken[frame,s]; IF found THEN rem ← ob ELSE KillStream[ob]; }; ENDCASE => ERROR Error[typechk]; IF found THEN { Push[frame.opstk,rem]; Push[frame.opstk,tok] }; PushBoolean[frame.opstk, found]; -- ignores error flag }; -- Initialization InstallScanner: PROC[why: InstallReason, frame: Frame] = { SELECT why FROM init => InitClassArray[]; free => FreeClassArray[]; register => { overflow ← MakeName[".overflow"L]; RegisterExplicit[frame, ".token"L, JToken]; }; ENDCASE; }; Install[InstallScanner]; }. Paxton x added test for 0C in StreamToken