M3ScanImpl.mesa
Copyright Ó 1991, 1992 by Xerox Corporation. All rights reserved.
Spreitze, May 8, 1992 3:21 pm PDT
DIRECTORY Ascii, IO, M3Scan, RefText, Rope;
M3ScanImpl: CEDAR PROGRAM
IMPORTS Ascii, IO, RefText, Rope
EXPORTS M3Scan
= BEGIN OPEN M3Scan;
GetM3Token: PUBLIC PROC [stream: IO.STREAM, buffer: REF TEXT, flushComments: BOOL ¬ FALSE] RETURNS [tokenKind: TokenKind, token: REF TEXT, charsSkipped: INT, error: TokenError] = {
char: CHAR;
NextChar: PROC RETURNS [CHAR] = INLINE {
char ¬ stream.GetChar[];
token ¬ RefText.InlineAppendChar[token, char];
RETURN [Ascii.Lower[char]]
};
NextCharSkip: PROC RETURNS [CHAR] = INLINE {
No accumulation or lower casing necessary here
char ¬ stream.GetChar[];
charsSkipped ¬ charsSkipped + 1;
RETURN [char];
};
PutbackChar: PROC = {
stream.Backup[token[token.length ¬ token.length - 1]];
};
NOTE: the acceptance procs below return with no extra chars read in case of acceptance, and with the first erroneous char read in case of non-acceptance.
AcceptExtendedChar: PROC RETURNS [success: BOOL] = {
have seen '\\; looking for rest of extendedChar
ENABLE IO.EndOfStream => GO TO Failure;
SELECT NextChar[] FROM
'n, 'r, 't, 'f, '\', '\", '\\ => RETURN[TRUE];
IN ['0..'7] => {
THROUGH [1..2] DO IF NextChar[] NOT IN ['0..'7] THEN GO TO Failure ENDLOOP;
RETURN[TRUE];
};
ENDCASE => GO TO Failure;
EXITS Failure => { error ¬ $extendedChar; RETURN[FALSE] }
};
AcceptRealTail: PROC [] = {
have seen ?num.digit; looking for ?num?exponent
DO SELECT NextChar[ ! IO.EndOfStream => GO TO Accept] FROM
IN ['0..'9] => NULL;
'e, 'd, 'x => { [] ¬ AcceptExponent[]; RETURN };
ENDCASE => { PutbackChar[]; RETURN };
ENDLOOP;
EXITS Accept => RETURN
};
AcceptExponent: PROC RETURNS [success: BOOL] = {
have seen (E|e); looking for ?(+|-)num
SELECT NextChar[ ! IO.EndOfStream => GO TO Failure] FROM
'-, '+ => [] ¬ NextChar[ ! IO.EndOfStream => GO TO Failure];
ENDCASE => NULL;
IF char NOT IN ['0..'9] THEN GO TO Failure;
DO
IF NextChar[ ! IO.EndOfStream => GO TO Success] NOT IN ['0..'9] THEN {
PutbackChar[]; GO TO Success
}
ENDLOOP;
EXITS
Success => RETURN [TRUE];
Failure => { error ¬ $numericLiteral; RETURN [FALSE] };
};
error ¬ $none;
token ¬ buffer;
token.length ¬ 0;
charsSkipped ¬ 0;
{
DO
token.length ¬ 0;
SELECT NextChar[ ! IO.EndOfStream => GO TO EOFReturn] FROM
IN [IO.NUL .. IO.SP] => {
white space
charsSkipped ¬ charsSkipped + 1;
};
'( => {
left-paren or comment
prev: CHAR ¬ IO.CR;
depth: INT ¬ 1;
tokenKind ¬ $tokenSINGLE;
IF NextChar[! IO.EndOfStream => GO TO Return] # '* THEN GO TO PutbackReturn;
tokenKind ¬ $tokenCOMMENT;
IF flushComments
THEN {
charsSkipped ¬ charsSkipped + 2; -- for the leading two dashes
DO
SELECT NextCharSkip[ ! IO.EndOfStream => GO TO eof] FROM
') => IF prev = '* THEN {IF (depth ¬ depth-1) = 0 THEN EXIT};
'* => IF prev = '( THEN {depth ¬ depth+1; char ¬ 'x};
"Sys. Prog. with M3" gives no consideration to string literals within comments.
ENDCASE;
prev ¬ char;
ENDLOOP;
EXITS eof => {};
}
ELSE
DO
SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
') => IF prev = '* THEN {IF (depth ¬ depth-1) = 0 THEN GO TO Return};
Must include the trailing *) in the comment
'* => IF prev = '( THEN {depth ¬ depth+1; char ¬ 'x};
ENDCASE;
prev ¬ char;
ENDLOOP;
};
'< => {
less-than, less-or-equal, subtype, or pragma
prev: CHAR ¬ IO.CR;
depth: INT ¬ 0;
tokenKind ¬ $tokenSINGLE;
SELECT NextChar[! IO.EndOfStream => GO TO Return] FROM
'* => depth ¬ 1;
': => GO TO DoubleReturn;
'= => GO TO DoubleReturn;
ENDCASE => GO TO PutbackReturn;
tokenKind ¬ $tokenPRAGMA;
DO
SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
'> => IF prev = '* THEN {IF (depth ¬ depth-1) = 0 THEN GO TO Return};
Must include the trailing *> in the pragma
'* => IF prev = '< THEN {depth ¬ depth+1; char ¬ 'x};
ENDCASE;
prev ¬ char;
ENDLOOP;
};
IN ['a .. 'z] => {
identifier
tokenKind ¬ $tokenID;
DO
SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
IN ['a..'z], IN ['0..'9], '← => NULL;
ENDCASE => GO TO PutbackReturn;
ENDLOOP;
};
IN ['0 .. '9] => {
numeric literal, this gets hairy
tokenKind ¬ $tokenDECIMAL;
WHILE NextChar[ ! IO.EndOfStream => GO TO Return] IN ['0..'9] DO ENDLOOP;
SELECT Ascii.Lower[char] FROM
'. => {
IF NextChar[ ! IO.EndOfStream => GO TO Invalid] IN ['0 .. '9] THEN { -- REAL
tokenKind ¬ $tokenREAL;
AcceptRealTail[]; GO TO Return }
ELSE { -- DECIMAL followed by dotdot
PutbackChar[]; GO TO PutbackReturn }
};
'← => {
tokenKind ¬ $tokenBASED;
DO SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
IN ['0 .. '9] => NULL;
IN ['a .. 'f] => NULL;
ENDCASE => GO TO PutbackReturn ENDLOOP;
};
ENDCASE => GO TO PutbackReturn;
EXITS Invalid => { error ¬ $numericLiteral; GO TO ErrorReturn }
};
'. => {
either a dotdot or a REAL or a dot
tokenKind ¬ $tokenSINGLE;
SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
'. => GO TO DoubleReturn; -- dotdot
ENDCASE => GO TO PutbackReturn; -- dot
};
',, ';, '#, '+, '-, '*, '/, '^, '&, '), '[, '], '{, '}, '| =>
GO TO SingleReturn;
'\' => {
CHAR literal
tokenKind ¬ $tokenCHAR;
SELECT NextChar[ ! IO.EndOfStream => GO TO Illegal] FROM
'\\ => IF NOT AcceptExtendedChar[] THEN GO TO ErrorReturn;
'\' => GO TO Illegal;
ENDCASE => NULL;
IF NextChar[ ! IO.EndOfStream => GO TO Illegal] # '\' THEN GO TO Illegal;
EXITS Illegal => { error ¬ $charLiteral; GO TO ErrorReturn };
};
'\" => {
TEXT literal
tokenKind ¬ $tokenTEXT;
DO
c: CHAR ¬ NextChar[
! IO.EndOfStream => GO TO Illegal];
SELECT c FROM
'\" => GO TO Return;
'\\ => IF NOT AcceptExtendedChar[] THEN GO TO ErrorReturn;
ENDCASE => NULL;
ENDLOOP;
EXITS Illegal => { error ¬ $stringLiteral; GO TO ErrorReturn };
};
'= => {
either '=' or '=>'
tokenKind ¬ $tokenSINGLE;
SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
'> => GO TO DoubleReturn;
ENDCASE => GO TO PutbackReturn;
};
'>, ': => {
either '>' or '>='
tokenKind ¬ $tokenSINGLE;
SELECT NextChar[ ! IO.EndOfStream => GO TO Return] FROM
'= => GO TO DoubleReturn;
ENDCASE => GO TO PutbackReturn;
};
ENDCASE => {
error ¬ $singleChar;
GO TO ErrorReturn
};
ENDLOOP;
EXITS
Return => { RETURN };
PutbackReturn => { PutbackChar[]; RETURN };
SingleReturn => { tokenKind ¬ $tokenSINGLE; RETURN };
DoubleReturn => { tokenKind ¬ $tokenDOUBLE; RETURN };
EOFReturn => { tokenKind ¬ $tokenEOF; RETURN };
ErrorReturn => { tokenKind ¬ $tokenERROR; RETURN };
}
};
GetM3TokenRope: PUBLIC PROC [stream: IO.STREAM, flushComments: BOOL] RETURNS [tokenKind: TokenKind, token: Rope.ROPE, charsSkipped: INT] = {
buffer: REF TEXT = RefText.ObtainScratch[100];
{ ENABLE UNWIND => RefText.ReleaseScratch[buffer];
tokenText: REF TEXT;
[tokenKind: tokenKind, token: tokenText, charsSkipped: charsSkipped] ¬
GetM3Token[stream, buffer, flushComments];
SELECT tokenKind FROM
tokenEOF => ERROR IO.EndOfStream[stream];
tokenERROR => ERROR IO.Error[$SyntaxError, stream];
ENDCASE;
token ¬ Rope.FromRefText[tokenText];
};
RefText.ReleaseScratch[buffer];
RETURN [tokenKind, token, charsSkipped];
};
ScanBack: PUBLIC PROC [base: Rope.ROPE, from: INT] RETURNS [start: INT] = {
len: INT ~ base.Length[];
lm1: INT ~ len-1;
char: CHAR ¬ ' ;
NextChar: PROC RETURNS [CHAR] = INLINE {
IF (from ¬ from-1) < 0 THEN ERROR IO.EndOfStream[NIL];
char ¬ base.Fetch[from];
RETURN [Ascii.Lower[char]]};
PeekNext: PROC RETURNS [CHAR] = INLINE {
IF from>0 THEN RETURN base.Fetch[from-1] ELSE RETURN [' ]};
PeekPrev: PROC RETURNS [CHAR] = INLINE {
IF from<lm1 THEN RETURN base.Fetch[from+1] ELSE RETURN [' ]};
SkipNester: PROC [open, close: CHAR] RETURNS [INT] ~ {
depth: INT ¬ 1;
WHILE depth>0 DO
SELECT NextChar[] FROM
'* => IF NextChar[]=open THEN depth ¬ depth-1;
close => IF NextChar[]='* THEN depth ¬ depth+1;
ENDCASE => NULL;
ENDLOOP;
RETURN [from]};
{ ENABLE IO.EndOfStream => GOTO PutbackReturn;
maybeInside: BOOL ¬ TRUE;
WHILE NextChar[] IN [IO.NUL .. IO.SP] DO
SELECT char FROM
'\n, '\r, '\l => maybeInside ¬ FALSE;
ENDCASE => NULL;
ENDLOOP;
SELECT char FROM
'\', '\" => {
sc: CHAR ~ char;
p1: INT ~ from;
DO
pos, n: INT ¬ 0;
WHILE NextChar[] # sc DO
IF maybeInside THEN SELECT char FROM
'\n, '\r, '\l => RETURN [p1];
ENDCASE => NULL;
ENDLOOP;
pos ¬ from;
WHILE NextChar[] = '\\ DO n ¬ n+1 ENDLOOP;
IF (n MOD 2) = 0 THEN RETURN [pos]
ELSE IF n>0 THEN maybeInside ¬ FALSE;
from ¬ from+1;
ENDLOOP};
IN ['A..'Z], IN ['a..'z], IN ['0..'9], '←, '+, '-, '. => {
last: CHAR ¬ char;
DO SELECT NextChar[] FROM
IN ['a..'z], IN ['0..'9], '←, '+, '- => NULL;
'. => IF last='. THEN RETURN [from];
ENDCASE => EXIT; last ¬ char ENDLOOP;
RETURN [from+1]};
') => IF NextChar[] = '* THEN RETURN SkipNester['(, ')] ELSE RETURN [from+1];
'> => SELECT NextChar[] FROM
'* => RETURN SkipNester['<, '>];
'= => RETURN [from];
ENDCASE => RETURN [from+1];
'* => SELECT PeekNext[] FROM
'(, '< => RETURN [from-1];
ENDCASE => SELECT PeekPrev[] FROM
') => RETURN SkipNester['(, ')];
'> => RETURN SkipNester['<, '>];
ENDCASE => RETURN [from];
'= => SELECT NextChar[] FROM
':, '<, '> => RETURN [from];
ENDCASE => RETURN [from+1];
': => SELECT NextChar[] FROM
'< => RETURN [from];
ENDCASE => RETURN [from+1];
ENDCASE => RETURN [from];
EXITS PutbackReturn => {from ¬ from+1; RETURN [from]}
}};
END.