DIRECTORY Ascii, IO, M3Scan, RefText, Rope; M3ScanImpl: CEDAR PROGRAM IMPORTS Ascii, IO, RefText, Rope EXPORTS M3Scan = BEGIN OPEN M3Scan; GetM3Token: PUBLIC PROC [stream: IO.STREAM, buffer: REF TEXT, flushComments: BOOL ¬ FALSE] RETURNS [tokenKind: TokenKind, token: REF TEXT, charsSkipped: INT, error: TokenError] = { char: CHAR; NextChar: PROC RETURNS [CHAR] = INLINE { char ¬ stream.GetChar[]; token ¬ RefText.InlineAppendChar[token, char]; RETURN [Ascii.Lower[char]] }; NextCharSkip: PROC RETURNS [CHAR] = INLINE { char ¬ stream.GetChar[]; charsSkipped ¬ charsSkipped + 1; RETURN [char]; }; PutbackChar: PROC = { stream.Backup[token[token.length ¬ token.length - 1]]; }; AcceptExtendedChar: PROC RETURNS [success: BOOL] = { ENABLE IO.EndOfStream => GO TO Failure; SELECT NextChar[] FROM 'n, 'r, 't, 'f, '\', '\", '\\ => RETURN[TRUE]; IN ['0..'7] => { THROUGH [1..2] DO IF NextChar[] NOT IN ['0..'7] THEN GO TO Failure ENDLOOP; RETURN[TRUE]; }; ENDCASE => GO TO Failure; EXITS Failure => { error ¬ $extendedChar; RETURN[FALSE] } }; AcceptRealTail: PROC [] = { DO SELECT NextChar[ ! IO.EndOfStream => GO TO Accept] FROM IN ['0..'9] => NULL; 'e, 'd, 'x => { [] ¬ AcceptExponent[]; RETURN }; ENDCASE => { PutbackChar[]; RETURN }; ENDLOOP; EXITS Accept => RETURN }; AcceptExponent: PROC RETURNS [success: BOOL] = { SELECT NextChar[ ! IO.EndOfStream => GO TO Failure] FROM '-, '+ => [] ¬ NextChar[ ! IO.EndOfStream => GO TO Failure]; ENDCASE => NULL; IF char NOT IN ['0..'9] THEN GO TO Failure; DO IF NextChar[ ! IO.EndOfStream => GO TO Success] NOT IN ['0..'9] THEN { PutbackChar[]; GO TO Success } ENDLOOP; EXITS Success => RETURN [TRUE]; Failure => { error ¬ $numericLiteral; RETURN [FALSE] }; }; error ¬ $none; token ¬ buffer; token.length ¬ 0; charsSkipped ¬ 0; { DO token.length ¬ 0; SELECT NextChar[ ! IO.EndOfStream => GO TO EOFReturn] FROM IN [IO.NUL .. IO.SP] => { charsSkipped ¬ charsSkipped + 1; }; '( => { prev: CHAR ¬ IO.CR; depth: INT ¬ 1; tokenKind ¬ $tokenSINGLE; IF NextChar[! IO.EndOfStream => GO TO Return] # '* THEN GO TO PutbackReturn; tokenKind ¬ $tokenCOMMENT; IF flushComments THEN { charsSkipped ¬ charsSkipped + 2; -- for the leading two dashes DO SELECT NextCharSkip[ ! IO.EndOfStream => GO TO eof] FROM ') => IF prev = '* THEN {IF (depth ¬ depth-1) = 0 THEN EXIT}; '* => IF prev = '( THEN {depth ¬ depth+1; char ¬ 'x}; ENDCASE; prev ¬ char; ENDLOOP; EXITS eof => {}; } ELSE DO SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM ') => IF prev = '* THEN {IF (depth ¬ depth-1) = 0 THEN GO TO Return}; '* => IF prev = '( THEN {depth ¬ depth+1; char ¬ 'x}; ENDCASE; prev ¬ char; ENDLOOP; }; '< => { prev: CHAR ¬ IO.CR; depth: INT ¬ 0; tokenKind ¬ $tokenSINGLE; SELECT NextChar[! IO.EndOfStream => GO TO Return] FROM '* => depth ¬ 1; ': => GO TO DoubleReturn; '= => GO TO DoubleReturn; ENDCASE => GO TO PutbackReturn; tokenKind ¬ $tokenPRAGMA; DO SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM '> => IF prev = '* THEN {IF (depth ¬ depth-1) = 0 THEN GO TO Return}; '* => IF prev = '< THEN {depth ¬ depth+1; char ¬ 'x}; ENDCASE; prev ¬ char; ENDLOOP; }; IN ['a .. 'z] => { tokenKind ¬ $tokenID; DO SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM IN ['a..'z], IN ['0..'9], '_ => NULL; ENDCASE => GO TO PutbackReturn; ENDLOOP; }; IN ['0 .. '9] => { tokenKind ¬ $tokenDECIMAL; WHILE NextChar[ ! IO.EndOfStream => GO TO Return] IN ['0..'9] DO ENDLOOP; SELECT Ascii.Lower[char] FROM '. => { IF NextChar[ ! IO.EndOfStream => GO TO Invalid] IN ['0 .. '9] THEN { -- REAL tokenKind ¬ $tokenREAL; AcceptRealTail[]; GO TO Return } ELSE { -- DECIMAL followed by dotdot PutbackChar[]; GO TO PutbackReturn } }; '_ => { tokenKind ¬ $tokenBASED; DO SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM IN ['0 .. '9] => NULL; IN ['a .. 'f] => NULL; ENDCASE => GO TO PutbackReturn ENDLOOP; }; ENDCASE => GO TO PutbackReturn; EXITS Invalid => { error ¬ $numericLiteral; GO TO ErrorReturn } }; '. => { tokenKind ¬ $tokenSINGLE; SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM '. => GO TO DoubleReturn; -- dotdot ENDCASE => GO TO PutbackReturn; -- dot }; ',, ';, '#, '+, '-, '*, '/, '^, '&, '), '[, '], '{, '}, '| => GO TO SingleReturn; '\' => { tokenKind ¬ $tokenCHAR; SELECT NextChar[ ! IO.EndOfStream => GO TO Illegal] FROM '\\ => IF NOT AcceptExtendedChar[] THEN GO TO ErrorReturn; '\' => GO TO Illegal; ENDCASE => NULL; IF NextChar[ ! IO.EndOfStream => GO TO Illegal] # '\' THEN GO TO Illegal; EXITS Illegal => { error ¬ $charLiteral; GO TO ErrorReturn }; }; '\" => { tokenKind ¬ $tokenTEXT; DO c: CHAR ¬ NextChar[ ! IO.EndOfStream => GO TO Illegal]; SELECT c FROM '\" => GO TO Return; '\\ => IF NOT AcceptExtendedChar[] THEN GO TO ErrorReturn; ENDCASE => NULL; ENDLOOP; EXITS Illegal => { error ¬ $stringLiteral; GO TO ErrorReturn }; }; '= => { tokenKind ¬ $tokenSINGLE; SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM '> => GO TO DoubleReturn; ENDCASE => GO TO PutbackReturn; }; '>, ': => { tokenKind ¬ $tokenSINGLE; SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM '= => GO TO DoubleReturn; ENDCASE => GO TO PutbackReturn; }; ENDCASE => { error ¬ $singleChar; GO TO ErrorReturn }; ENDLOOP; EXITS Return => { RETURN }; PutbackReturn => { PutbackChar[]; RETURN }; SingleReturn => { tokenKind ¬ $tokenSINGLE; RETURN }; DoubleReturn => { tokenKind ¬ $tokenDOUBLE; RETURN }; EOFReturn => { tokenKind ¬ $tokenEOF; RETURN }; ErrorReturn => { tokenKind ¬ $tokenERROR; RETURN }; } }; GetM3TokenRope: PUBLIC PROC [stream: IO.STREAM, flushComments: BOOL] RETURNS [tokenKind: TokenKind, token: Rope.ROPE, charsSkipped: INT] = { buffer: REF TEXT = RefText.ObtainScratch[100]; { ENABLE UNWIND => RefText.ReleaseScratch[buffer]; tokenText: REF TEXT; [tokenKind: tokenKind, token: tokenText, charsSkipped: charsSkipped] ¬ GetM3Token[stream, buffer, flushComments]; SELECT tokenKind FROM tokenEOF => ERROR IO.EndOfStream[stream]; tokenERROR => ERROR IO.Error[$SyntaxError, stream]; ENDCASE; token ¬ Rope.FromRefText[tokenText]; }; RefText.ReleaseScratch[buffer]; RETURN [tokenKind, token, charsSkipped]; }; ScanBack: PUBLIC PROC [base: Rope.ROPE, from: INT] RETURNS [start: INT] = { len: INT ~ base.Length[]; lm1: INT ~ len-1; char: CHAR ¬ ' ; NextChar: PROC RETURNS [CHAR] = INLINE { IF (from ¬ from-1) < 0 THEN ERROR IO.EndOfStream[NIL]; char ¬ base.Fetch[from]; RETURN [Ascii.Lower[char]]}; PeekNext: PROC RETURNS [CHAR] = INLINE { IF from>0 THEN RETURN base.Fetch[from-1] ELSE RETURN [' ]}; PeekPrev: PROC RETURNS [CHAR] = INLINE { IF from0 DO SELECT NextChar[] FROM '* => IF NextChar[]=open THEN depth ¬ depth-1; close => IF NextChar[]='* THEN depth ¬ depth+1; ENDCASE => NULL; ENDLOOP; RETURN [from]}; { ENABLE IO.EndOfStream => GOTO PutbackReturn; maybeInside: BOOL ¬ TRUE; WHILE NextChar[] IN [IO.NUL .. IO.SP] DO SELECT char FROM '\n, '\r, '\l => maybeInside ¬ FALSE; ENDCASE => NULL; ENDLOOP; SELECT char FROM '\', '\" => { sc: CHAR ~ char; p1: INT ~ from; DO pos, n: INT ¬ 0; WHILE NextChar[] # sc DO IF maybeInside THEN SELECT char FROM '\n, '\r, '\l => RETURN [p1]; ENDCASE => NULL; ENDLOOP; pos ¬ from; WHILE NextChar[] = '\\ DO n ¬ n+1 ENDLOOP; IF (n MOD 2) = 0 THEN RETURN [pos] ELSE IF n>0 THEN maybeInside ¬ FALSE; from ¬ from+1; ENDLOOP}; IN ['A..'Z], IN ['a..'z], IN ['0..'9], '_, '+, '-, '. => { last: CHAR ¬ char; DO SELECT NextChar[] FROM IN ['a..'z], IN ['0..'9], '_, '+, '- => NULL; '. => IF last='. THEN RETURN [from]; ENDCASE => EXIT; last ¬ char ENDLOOP; RETURN [from+1]}; ') => IF NextChar[] = '* THEN RETURN SkipNester['(, ')] ELSE RETURN [from+1]; '> => SELECT NextChar[] FROM '* => RETURN SkipNester['<, '>]; '= => RETURN [from]; ENDCASE => RETURN [from+1]; '* => SELECT PeekNext[] FROM '(, '< => RETURN [from-1]; ENDCASE => SELECT PeekPrev[] FROM ') => RETURN SkipNester['(, ')]; '> => RETURN SkipNester['<, '>]; ENDCASE => RETURN [from]; '= => SELECT NextChar[] FROM ':, '<, '> => RETURN [from]; ENDCASE => RETURN [from+1]; ': => SELECT NextChar[] FROM '< => RETURN [from]; ENDCASE => RETURN [from+1]; ENDCASE => RETURN [from]; EXITS PutbackReturn => {from ¬ from+1; RETURN [from]} }}; END. R M3ScanImpl.mesa Copyright Σ 1991, 1992 by Xerox Corporation. All rights reserved. Spreitze, May 8, 1992 3:21 pm PDT No accumulation or lower casing necessary here NOTE: the acceptance procs below return with no extra chars read in case of acceptance, and with the first erroneous char read in case of non-acceptance. have seen '\\; looking for rest of extendedChar have seen ?num.digit; looking for ?num?exponent have seen (E|e); looking for ?(+|-)num white space left-paren or comment "Sys. Prog. with M3" gives no consideration to string literals within comments. Must include the trailing *) in the comment less-than, less-or-equal, subtype, or pragma Must include the trailing *> in the pragma identifier numeric literal, this gets hairy either a dotdot or a REAL or a dot CHAR literal TEXT literal either '=' or '=>' either '>' or '>=' ΚP•NewlineDelimiter –(cedarcode) style™codešœ™Kšœ Οeœ7™BK™!—K˜KšΟk œžœ˜+K˜šΟn œžœž˜Kšžœžœ˜ Kšžœ˜—K˜Kšœžœžœ˜K˜šŸ œžœžœ žœžœ žœžœžœžœžœžœžœžœ˜΄Kšœžœ˜ š Ÿœžœžœžœžœ˜(K˜K˜.Kšžœ˜Kšœ˜—š Ÿ œžœžœžœžœ˜,K™.K˜K˜ Kšžœ˜Kšœ˜—šŸ œžœ˜K˜6K˜K˜—Kšœ™™™šŸœžœžœ žœ˜4Kšœ/™/Kšžœžœžœžœ ˜'šžœ ž˜Kšœ!žœžœ˜.šžœ˜Kšžœžœžœ žœžœ žœžœžœ žœ˜KKšžœžœ˜ K˜—Kšžœžœžœ ˜—Kšžœ%žœžœ˜9K˜—šŸœžœ˜Kšœ ŸœŸœŸœ™/š žœžœ žœžœžœ ž˜:Kšžœ žœ˜Kšœ'žœ˜0Kšžœžœ˜%Kšžœ˜—Kšžœ ž˜Kšœ˜—šŸœžœžœ žœ˜0Kš œ ŸœŸœŸœŸœŸœŸœ™&š žœ žœžœžœ ž˜8Kšœžœžœžœ ˜K˜—šœ˜Kšžœ™ K˜šž˜šœžœ ˜Kšœžœžœžœ ˜#—šžœž˜ Kšœžœžœ˜Kš œžœžœžœžœžœ ˜:Kšžœžœ˜—Kšžœ˜—Kšžœ'žœžœ˜@K˜—šœ˜Kšœ™K˜š žœ žœžœžœ ž˜7Kšœžœžœ˜Kšžœžœžœ˜—K˜—šœ ˜ Kšœ™K˜š žœ žœžœžœ ž˜7Kšœžœžœ˜Kšžœžœžœ˜—K˜—šžœ˜ K˜Kšžœžœ ˜K˜——Kšžœ˜ šž˜Kšœ žœ˜Kšœ"žœ˜+Kšœ,žœ˜5Kšœ,žœ˜5Kšœ'žœ˜0Kšœ+žœ˜4—Kšœ˜——K˜—K˜šŸœžœžœ žœžœžœžœ$žœžœ˜Kšœžœžœ˜.šœžœžœ#˜2Kšœ žœžœ˜˜FKšœ*˜*—šžœ ž˜Kšœ žœžœ˜)Kšœžœžœ˜3Kšžœ˜—K˜$K˜—Kšœ˜Kšžœ"˜(K˜—K˜šŸœžœžœ žœžœžœ žœ˜KKšœžœ˜Kšœžœ ˜Kšœžœ˜š Ÿœžœžœžœžœ˜(Kš žœžœžœžœ žœ˜6K˜Kšžœ˜—š Ÿœžœžœžœžœ˜(Kš žœžœžœžœžœ˜;—š Ÿœžœžœžœžœ˜(Kš žœ žœžœžœžœ˜=—š Ÿ œžœžœžœžœ˜6Kšœžœ˜šžœ ž˜šžœ ž˜Kšœžœžœ˜.Kšœ žœžœ˜/Kšžœžœ˜—Kšžœ˜—Kšžœ ˜—Kšœžœžœžœ˜.Kšœ žœžœ˜K˜š žœ žœžœžœžœžœž˜(šžœž˜Kšœžœ˜%Kšžœžœ˜—Kšžœ˜—šžœž˜šœ ˜ Kšœžœ˜Kšœžœ˜šž˜Kšœžœ˜šžœž˜šžœ žœžœž˜$Kšœžœ˜Kšžœžœ˜—Kšžœ˜—K˜ Kšžœžœ žœ˜*Kšžœžœžœžœ˜"Kšžœžœžœžœ˜%K˜Kšžœ˜ ——šžœ žœ žœ˜:Kšœžœ˜šžœžœ ž˜Kšžœ žœžœ˜-Kšœžœ žœžœ˜$Kšžœžœžœ˜%—Kšžœ ˜—Kš œžœžœžœžœžœ ˜Mšœžœ ž˜Kšœžœ˜ Kšœžœ˜Kšžœžœ ˜—šœžœ ž˜Kšœ žœ ˜šžœžœ ž˜!Kšœžœ˜ Kšœžœ˜ Kšžœžœ˜——šœžœ ž˜Kšœžœ˜Kšžœžœ ˜—šœžœ ž˜Kšœžœ˜Kšžœžœ ˜—Kšžœžœ˜—Kšžœ"žœ˜5Kšœ˜—K˜Kšžœ˜—…— "2Δ