LexerImpl.mesa
Copyright Ó 1992, 1993 by Xerox Corporation. All rights reserved.
Hopcroft, July 27, 1989 0:18:47 am PDT
Coolidge, July 12, 1990 3:50 pm PDT
Laurie Horton, November 8, 1991 4:03 pm PST
Willie-s, March 29, 1993 2:53 pm PST
DIRECTORY
Ascii USING [SP, CR, LF, FF, TAB, BS, BEL],
Commander USING[CommandProc, Register],
Convert USING [CardFromRope, IntFromRope, RealFromRope],
PFS USING [PathFromRope, StreamOpen],
PFSNames USING [PATH],
IO USING[Close, EndOfStream, GetChar, GetTokenRope, int, IDProc, PeekChar, PutF1, PutRope, RIS, rope, STREAM, TokenKind],
Lexer USING [],
Rope USING[Cat, Concat, Fetch, FromChar, Length, ROPE, Substr];
LexerImpl: CEDAR PROGRAM
IMPORTS Commander, Convert, IO, PFS, Rope EXPORTS Lexer ~ {
TokenKind: TYPE = { tokenERROR, tokenID, tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL, tokenROPE, tokenCHAR, tokenATOM, tokenSINGLE, tokenDOUBLE, tokenTRIPLE, tokenCOMMENT, tokenEOF, tokenNUMBER };
State: TYPE = { LeadingSpace, StartComment, Comment, EndComment, Char, String, CharStringIdentifier, FloatIntHexOct, Identifier, Punctuation, FloatHexOctal, FloatOctal, Hex, FloatInt, IntModUORL, IntModU, IntModL, Float, Fraction, FractionOrDot, ExponentSign, Exponent, FloatModFOrL, Done, Error };
inputStream: IO.STREAM;
haveAChar: BOOL;
thisChar: CHAR;
currentChar: CHAR;
GetNextChar: PROC [] RETURNS [CHAR] ~ {
RETURN[inputStream.GetChar[]]
c:CHAR ← PeekNextChar[];
haveAChar ← FALSE;
RETURN [c]
};
PeekNextChar: PROC [] RETURNS [CHAR] ~ {
RETURN[inputStream.PeekChar[]]
IF haveAChar THEN
RETURN [currentChar]
ELSE {
currentChar ← inputStream.GetChar[];
haveAChar ← TRUE;
RETURN [currentChar]
}
};
IsPunctuation: PROC [c:CHAR] RETURNS [BOOL] ~ {
RETURN [SELECT c FROM
'!, '*, '%, '/, '^, '&, '(, '), ',, '., ':, '?, '[, '], '{, '}, '~, '+, '-, '<, '>, ';, '=, '', '", '| => TRUE
ENDCASE => FALSE]
};
IsDecimalDigit: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['0 .. '9]] };
Is1To9: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['1 .. '9]] };
IsHexDigit: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['0 .. '9] OR c IN ['A .. 'F] OR c IN ['a .. 'f]] };
IsOctalDigit: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['0 .. '7]] };
IsAlphaNum: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['0 .. '9] OR c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='←] };
IsNonDigit: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='←] };
IsWhite: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c = Ascii.SP OR c = Ascii.CR OR c = Ascii.LF OR c = Ascii.TAB] };
GetCTokenInit: PUBLIC PROC [] RETURNS [] ~ {
haveAChar ¬ FALSE;
};
GetCTokenRope: PUBLIC PROC [inStream:IO.STREAM, junk:BOOL] RETURNS [tokenKind:IO.TokenKind,token:Rope.ROPE,charsSkipped:INT] ~ {
tokenValue:REF ANY;
[tokenKind, token, charsSkipped, tokenValue] ¬ GetCTokenRopeAndValue[inStream, junk];
RETURN [tokenKind, token, charsSkipped]
};
GetCTokenRopeAndValue: PUBLIC PROC [inStream:IO.STREAM, junk:BOOL]
RETURNS [tokenKind:IO.TokenKind,token:Rope.ROPE,charsSkipped:INT,
tokenValue:REF ANY] ~ {
state: State ¬ LeadingSpace;
tokenValid: BOOL ¬ FALSE;
inputStream ¬ inStream;
token ¬ NIL;
charsSkipped ¬ 0;
WHILE state # Done DO
BEGIN
ENABLE IO.EndOfStream => {
IF tokenValid THEN
GOTO GoodExit
ELSE
GOTO BadExit
};
SELECT state FROM
LeadingSpace => {
tokenKind ¬ tokenEOF;
tokenValid ¬ TRUE;
WHILE IsWhite[PeekNextChar[]] DO
[] ¬ GetNextChar[]
ENDLOOP;
tokenValid ¬ FALSE;
state ¬ SELECT thisChar ¬ PeekNextChar[] FROM
'' => Char,
'" => String,
'L => CharStringIdentifier,
'/ => StartComment,
'. => FractionOrDot,
ENDCASE =>
SELECT TRUE FROM
IsDecimalDigit[thisChar] => FloatIntHexOct,
IsNonDigit[thisChar] => Identifier,
IsPunctuation[thisChar] => Punctuation,
ENDCASE => Error;
};
Char => {
[] ¬ GetNextChar[];
token ¬ "'";
IF (thisChar ¬ GetNextChar[]) = '\\ THEN
token ¬ token.Concat[EscapeChar[]]
ELSE
token ¬ token.Concat[Rope.FromChar[thisChar]];
tokenKind ¬ tokenCHAR;
IF GetNextChar[] # '' THEN
state ¬ Error
ELSE {
token ¬ token.Concat["'"];
tokenValid ¬ TRUE;
state ¬ Done
}
};
String => {
[] ¬ GetNextChar[];
token ¬ "\"";
WHILE PeekNextChar[] # '" DO
thisChar ¬ GetNextChar[];
IF thisChar = '\\ THEN
token ¬ token.Concat[EscapeChar[]]
ELSE
token ¬ token.Concat[Rope.FromChar[thisChar]]
ENDLOOP;
tokenKind ¬ tokenROPE;
IF GetNextChar[] # '" THEN
state ¬ Error
ELSE {
token ¬ token.Concat["\""];
tokenValid ¬ TRUE;
state ¬ Done
}
};
CharStringIdentifier => {
[] ¬ GetNextChar[];
token ¬ "L";
tokenKind ¬ tokenID;
tokenValid ¬ TRUE;
thisChar ¬ PeekNextChar[];
SELECT thisChar FROM
'' => {
tokenValid ¬ FALSE;
state ¬ Char
};
'" => {
tokenValid ¬ FALSE;
state ¬ String
};
ENDCASE => {
tokenKind ¬ tokenID;
token ¬ "L";
WHILE IsAlphaNum[PeekNextChar[]] DO
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ¬ Done;
};
};
Identifier => {
tokenKind ¬ tokenID;
tokenValid ¬ TRUE;
[] ¬ GetNextChar[];
token ¬ Rope.FromChar[thisChar];
WHILE IsAlphaNum[PeekNextChar[]] DO
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ¬ Done
};
StartComment => {
tokenKind ¬ tokenSINGLE;
token ¬ "/";
tokenValid ¬ TRUE;
[] ¬ GetNextChar[];
IF (thisChar ¬ PeekNextChar[]) = '* THEN {
tokenValid ¬ FALSE;
[] ¬ GetNextChar[];
state ¬ Comment;
}
ELSE IF thisChar = '= THEN {
[] ¬ GetNextChar[];
token ¬ "/=";
tokenKind ¬ tokenDOUBLE;
state ¬ Done
}
ELSE
state ¬ Done
};
Comment => {
WHILE PeekNextChar[] # '* DO
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ¬ EndComment;
};
EndComment => {
[] ¬ GetNextChar[];
IF PeekNextChar[] = '/ THEN {
[] ¬ GetNextChar[];
tokenKind ¬ tokenCOMMENT;
tokenValid ¬ TRUE;
state ¬ Done;
}
ELSE {
token ¬ token.Concat["*"];
state ¬ Comment;
};
};
Punctuation => {
thisChar ¬ GetNextChar[];
token ¬ Rope.FromChar[thisChar];
tokenKind ¬ tokenSINGLE;
tokenValid ¬ TRUE;
state ¬ Done;
SELECT thisChar FROM
'!, '*, '%, '/, '=, '^ => {
IF PeekNextChar[] = '= THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
};
};
'| => {
IF PeekNextChar[] = '= OR PeekNextChar[] = '| THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
}
};
'& => {
IF PeekNextChar[] = '= OR PeekNextChar[] = '& THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
};
};
'(, '), ',, '., ':, '; , '?, '[, '], '{, '}, '~ => tokenKind ¬ tokenSINGLE;
'+ => {
IF PeekNextChar[] = '= OR PeekNextChar[] = '+ THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
};
};
'- => {
IF PeekNextChar[] = '= OR PeekNextChar[] = '- OR PeekNextChar[] = '> THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
};
};
'< =>
IF PeekNextChar[] = '= THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
}
ELSE IF PeekNextChar[]= '< THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
IF PeekNextChar[] = '= THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
}
};
'> =>
IF PeekNextChar[]='= THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
}
ELSE IF PeekNextChar[]= '> THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
IF PeekNextChar[] = '= THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
}
};
ENDCASE => {
tokenValid ¬ FALSE;
state ¬ Error
}
};
FloatIntHexOct => {
thisChar ¬ PeekNextChar[];
state ¬
IF thisChar = '0 THEN
FloatHexOctal
ELSE IF thisChar = '. THEN
Fraction
ELSE IF Is1To9[thisChar] THEN
FloatInt
ELSE
Error;
};
FloatHexOctal => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenOCTAL;
tokenValid ¬ TRUE;
thisChar ¬ PeekNextChar[];
IF thisChar = 'x THEN {
tokenValid ¬ FALSE;
state ¬ Hex
}
ELSE IF thisChar = '8 OR thisChar = '9 THEN {
tokenKind ¬ tokenREAL;
state ¬ Float
}
ELSE IF IsOctalDigit[thisChar] THEN
state ¬ FloatOctal
ELSE IF thisChar = '. THEN {
tokenKind ¬ tokenREAL;
state ¬ Fraction
}
ELSE IF thisChar = 'E OR thisChar = 'e THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE {
tokenKind ¬ tokenOCTAL;
state ¬ IntModUORL;
}
};
FloatOctal => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ¬ PeekNextChar[];
IF thisChar = '8 OR thisChar = '9 THEN {
tokenKind ¬ tokenREAL;
state ¬ Float
}
ELSE IF IsOctalDigit[thisChar] THEN
state ¬ FloatOctal
ELSE IF thisChar = '. THEN {
tokenKind ¬ tokenREAL;
state ¬ Fraction
}
ELSE IF thisChar = 'E OR thisChar = 'e THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE {
state ¬ IntModUORL;
tokenKind ¬ tokenOCTAL
};
};
Hex => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenHEX;
WHILE IsHexDigit[PeekNextChar[]] DO
tokenValid ¬ TRUE;
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
ENDLOOP;
state ¬ IntModUORL;
};
FloatInt => {
tokenKind ¬ tokenDECIMAL;
tokenValid ¬ TRUE;
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ¬ PeekNextChar[];
IF thisChar = '. THEN
state ¬ Fraction
ELSE IF thisChar = 'e OR thisChar = 'E THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE IF NOT IsDecimalDigit[thisChar] THEN {
tokenKind ¬ tokenDECIMAL;
state ¬ IntModUORL;
};
};
IntModUORL => {
thisChar ¬ PeekNextChar[];
IF thisChar = 'u OR thisChar = 'U THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ IntModL;
}
ELSE IF thisChar = 'l OR thisChar = 'L THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ IntModU;
}
ELSE
state ¬ Done;
};
IntModU => {
thisChar ¬ PeekNextChar[];
IF thisChar = 'u OR thisChar = 'U THEN
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ Done
};
IntModL => {
thisChar ¬ PeekNextChar[];
IF thisChar = 'l OR thisChar = 'L THEN
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ Done
};
Float => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ¬ PeekNextChar[];
IF thisChar = '. THEN
state ¬ Fraction
ELSE IF thisChar = 'e OR thisChar = 'E THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE IF NOT IsDecimalDigit[thisChar] THEN {
tokenKind ¬ tokenREAL;
state ¬ FloatModFOrL;
};
};
FractionOrDot => {
token ¬ ".";
[] ¬ GetNextChar[];
tokenKind ¬ tokenSINGLE;
tokenValid ¬ TRUE;
thisChar ¬ PeekNextChar[];
IF IsDecimalDigit[thisChar] THEN {
tokenKind ¬ tokenREAL;
state ¬ Fraction
}
ELSE {
tokenKind ¬ tokenSINGLE;
state ¬ Done
}
};
Fraction => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenREAL;
thisChar ¬ PeekNextChar[];
IF thisChar = 'e OR thisChar = 'E THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE IF NOT IsDecimalDigit[thisChar] THEN {
state ¬ FloatModFOrL;
};
};
ExponentSign => {
tokenKind ¬ tokenREAL;
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ¬ PeekNextChar[];
IF thisChar = '+ OR thisChar = '- THEN
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
IF IsDecimalDigit[PeekNextChar[]] THEN {
tokenValid ¬ TRUE;
state ¬ Exponent
}
ELSE
state ¬ Error;
};
Exponent => {
WHILE IsDecimalDigit[PeekNextChar[]] DO
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
ENDLOOP;
tokenKind ¬ tokenREAL;
state ¬ FloatModFOrL;
};
FloatModFOrL => {
thisChar ¬ PeekNextChar[];
IF thisChar = 'f OR thisChar = 'F OR thisChar = 'l OR thisChar = 'L THEN
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ Done;
};
Error => {
[] ¬ GetNextChar[];
tokenKind ¬ tokenERROR;
state ¬ Done;
};
ENDCASE => state ¬ Error;
END
REPEAT
GoodExit => {};
BadExit => tokenKind ¬ tokenERROR
ENDLOOP;
SELECT tokenKind FROM
tokenDECIMAL =>
tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token,10]];
tokenHEX => {
tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token.Substr[2,token.Length[]-2],16]];
tokenKind ¬ tokenDECIMAL
};
tokenOCTAL => {
tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token,8]];
tokenKind ¬ tokenDECIMAL
};
tokenREAL =>
tokenValue ¬ NEW[REAL ¬ Convert.RealFromRope[token]];
tokenCHAR =>
tokenValue ¬ NEW[CHAR ¬ Rope.Fetch[token,1]];
tokenROPE =>
tokenValue ¬ NEW[Rope.ROPE ¬ token.Substr[1,token.Length[]-2]];
ENDCASE => {};
RETURN [tokenKind, token, 0, tokenValue]
};
EscapeChar: PROC [] RETURNS [Rope.ROPE] ~ {
first,second,third:Rope.ROPE;
number:Rope.ROPE;
newChar:CHAR;
newCharValid:BOOL ¬ TRUE;
c:CHAR ¬ GetNextChar[];
SELECT c FROM
'n => newChar ¬ Ascii.LF;
't => newChar ¬ Ascii.TAB;
Vertical Tab must be finished
'v => newChar ¬ '^;
'b => newChar ¬ Ascii.BS;
'r => newChar ¬ Ascii.CR;
'f => newChar ¬ Ascii.FF;
'a => newChar ¬ Ascii.BEL;
'\\ => newChar ¬ '\\;
'' => newChar ¬ '';
'" => newChar ¬ '";
'? => newChar ¬ '?;
'x => {
first ¬ Rope.FromChar[GetNextChar[]];
IF IsHexDigit[PeekNextChar[]] THEN
second ¬ Rope.FromChar[GetNextChar[]]
ELSE
second ¬ NIL;
number ¬ Rope.Concat[first, second];
newChar ¬ VAL[CARDINAL[Convert.CardFromRope[number,16]]]
};
Ascii.LF => newCharValid ¬ FALSE;
ENDCASE => {
IF IsOctalDigit[c] THEN {
first ¬ Rope.FromChar[c];
IF IsOctalDigit[PeekNextChar[]] THEN {
second ¬ Rope.FromChar[GetNextChar[]];
IF IsOctalDigit[PeekNextChar[]] THEN
third ¬ Rope.FromChar[GetNextChar[]]
ELSE
third ¬ NIL
}
ELSE
second ¬ third ¬ NIL;
number ¬ Rope.Cat[first,second,third];
newChar ¬ VAL[CARDINAL[Convert.CardFromRope[number,8]]]
}
ELSE
newCharValid ¬ FALSE;
};
IF newCharValid THEN
RETURN [Rope.FromChar[newChar]]
ELSE
RETURN [NIL];
};
Test: Commander.CommandProc ~ {
charsSkipped:INT;
sourceStream:IO.STREAM;
token:Rope.ROPE;
tokenKind:IO.TokenKind ¬ tokenID;
commandLineStream:IO.STREAM ¬ IO.RIS[cmd.commandLine];
filename:PFSNames.PATH ¬ PFS.PathFromRope[IO.GetTokenRope[commandLineStream,IO.IDProc].token];
IO.Close[commandLineStream];
sourceStream ¬ PFS.StreamOpen[filename];
GetCTokenInit[];
WHILE tokenKind # tokenEOF DO
[tokenKind,token,charsSkipped] ¬ GetCTokenRope[sourceStream, TRUE !
IO.EndOfStream => GOTO EOS];
SELECT tokenKind FROM
tokenID =>
IO.PutF1[cmd.out,"Identifier = /%g/\n",IO.rope[token]];
tokenROPE =>
IO.PutF1[cmd.out,"String constant= /%g/\n",IO.rope[token]];
tokenCHAR =>
IO.PutF1[cmd.out,"Character constant = /%g/\n",IO.rope[token]];
tokenSINGLE =>
IO.PutF1[cmd.out,"Single punctuation = /%g/\n",IO.rope[token]];
tokenDOUBLE =>
IO.PutF1[cmd.out,"Double punctuation = /%g/\n",IO.rope[token]];
tokenTRIPLE =>
IO.PutF[cmd.out,"Triple punctuation = /%g/\n",IO.rope[token]];
tokenCOMMENT =>
IO.PutF1[cmd.out,"Comment = /%g/\n",IO.rope[token]];
tokenHEX =>
IO.PutF1[cmd.out,"Hexidecimal constant = /%g/\n",IO.rope[token]];
tokenDECIMAL =>
IO.PutF1[cmd.out,"Integer constant = /%g/\n",IO.rope[token]];
tokenREAL =>
IO.PutF1[cmd.out,"Real constant = /%g/\n",IO.rope[token]];
tokenOCTAL =>
IO.PutF1[cmd.out,"Octal constant = /%g/\n",IO.rope[token]];
tokenEOF =>
IO.PutRope[cmd.out,"End of file\n"];
tokenERROR =>
IO.PutRope[cmd.out,"Misformed token\n"];
ENDCASE =>
IO.PutF1[cmd.out,"Bad token kind %g\n",IO.int[ORD[tokenKind]]];
ENDLOOP;
EXITS
EOS => IO.PutRope[cmd.out,"End of Stream\n"];
};
Commander.Register["Test",Test];
}...