LexerImpl.mesa
Hopcroft, July 27, 1989 0:18:47 am PDT
Coolidge, July 12, 1990 3:50 pm PDT
Laurie Horton, November 8, 1991 4:03 pm PST
DIRECTORY
Ascii USING [SP, CR, LF, FF, TAB, BS, BEL],
Commander USING[CommandProc, Register],
Convert USING [CardFromRope, IntFromRope, RealFromRope],
PFS USING [PathFromRope, StreamOpen],
PFSNames USING [PATH],
IO USING[Close, EndOfStream, GetChar, GetTokenRope, int, IDProc, PeekChar, PutF, RIS, rope, STREAM, TokenKind],
Lexer USING [],
Rope USING[Cat, Concat, Fetch, FromChar, Length, ROPE, Substr];
LexerImpl: CEDAR PROGRAM
IMPORTS Commander, Convert, IO, PFS, Rope EXPORTS Lexer ~ {
TokenKind: TYPE = { tokenERROR, tokenID, tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL, tokenROPE, tokenCHAR, tokenATOM, tokenSINGLE, tokenDOUBLE, tokenTRIPLE, tokenCOMMENT, tokenEOF, tokenNUMBER };
State: TYPE = { LeadingSpace, StartComment, Comment, EndComment, Char, String, CharStringIdentifier, FloatIntHexOct, Identifier, Punctuation, FloatHexOctal, FloatOctal, Hex, FloatInt, IntModUORL, IntModU, IntModL, Float, Fraction, FractionOrDot, ExponentSign, Exponent, FloatModFOrL, Done, Error };
inputStream: IO.STREAM;
haveAChar: BOOL;
thisChar: CHAR;
currentChar: CHAR;
GetNextChar: PROC [] RETURNS [CHAR] ~ {
RETURN[inputStream.GetChar[]]
c:CHAR ← PeekNextChar[];
haveAChar ← FALSE;
RETURN [c]
};
PeekNextChar: PROC [] RETURNS [CHAR] ~ {
RETURN[inputStream.PeekChar[]]
IF haveAChar THEN
RETURN [currentChar]
ELSE {
currentChar ← inputStream.GetChar[];
haveAChar ← TRUE;
RETURN [currentChar]
}
};
IsPunctuation: PROC [c:CHAR] RETURNS [BOOL] ~ {
RETURN [SELECT c FROM
'!, '*, '%, '/, '^, '&, '(, '), ',, '., ':, '?, '[, '], '{, '}, '~, '+, '-, '<, '>, ';, '=, '', '", '| => TRUE
ENDCASE => FALSE]
};
IsDecimalDigit: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['0 .. '9]] };
Is1To9: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['1 .. '9]] };
IsHexDigit: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['0 .. '9] OR c IN ['A .. 'F] OR c IN ['a .. 'f]] };
IsOctalDigit: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['0 .. '7]] };
IsAlphaNum: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['0 .. '9] OR c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='←] };
IsNonDigit: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='←] };
IsWhite: PROC [c:CHAR] RETURNS [BOOL]
= INLINE { RETURN [c = Ascii.SP OR c = Ascii.CR OR c = Ascii.LF OR c = Ascii.TAB] };
GetCTokenInit: PUBLIC PROC [] RETURNS [] ~ {
haveAChar ← FALSE;
};
GetCTokenRope: PUBLIC PROC [inStream:IO.STREAM, junk:BOOL] RETURNS [tokenKind:IO.TokenKind,token:Rope.ROPE,charsSkipped:INT] ~ {
tokenValue:REF ANY;
[tokenKind, token, charsSkipped, tokenValue] ← GetCTokenRopeAndValue[inStream, junk];
RETURN [tokenKind, token, charsSkipped]
};
GetCTokenRopeAndValue: PUBLIC PROC [inStream:IO.STREAM, junk:BOOL]
RETURNS
[tokenKind:IO.TokenKind,token:Rope.ROPE,charsSkipped:INT,
tokenValue:REF ANY] ~ {
state: State ← LeadingSpace;
tokenValid: BOOL ← FALSE;
inputStream ← inStream;
token ← NIL;
charsSkipped ← 0;
WHILE state # Done DO
BEGIN
ENABLE IO.EndOfStream => {
IF tokenValid THEN
GOTO GoodExit
ELSE
GOTO BadExit
};
SELECT state FROM
LeadingSpace => {
tokenKind ← tokenEOF;
tokenValid ← TRUE;
WHILE IsWhite[PeekNextChar[]] DO
[] ← GetNextChar[]
ENDLOOP;
tokenValid ← FALSE;
state ← SELECT thisChar ← PeekNextChar[] FROM
'' => Char,
'" => String,
'L => CharStringIdentifier,
'/ => StartComment,
'. => FractionOrDot,
ENDCASE =>
SELECT TRUE FROM
IsDecimalDigit[thisChar] => FloatIntHexOct,
IsNonDigit[thisChar] => Identifier,
IsPunctuation[thisChar] => Punctuation,
ENDCASE => Error;
};
Char => {
[] ← GetNextChar[];
token ← "'";
IF (thisChar ← GetNextChar[]) = '\\ THEN
token ← token.Concat[EscapeChar[]]
ELSE
token ← token.Concat[Rope.FromChar[thisChar]];
tokenKind ← tokenCHAR;
IF GetNextChar[] # '' THEN
state ← Error
ELSE {
token ← token.Concat["'"];
tokenValid ← TRUE;
state ← Done
}
};
String => {
[] ← GetNextChar[];
token ← "\"";
WHILE PeekNextChar[] # '" DO
thisChar ← GetNextChar[];
IF thisChar = '\\ THEN
token ← token.Concat[EscapeChar[]]
ELSE
token ← token.Concat[Rope.FromChar[thisChar]]
ENDLOOP;
tokenKind ← tokenROPE;
IF GetNextChar[] # '" THEN
state ← Error
ELSE {
token ← token.Concat["\""];
tokenValid ← TRUE;
state ← Done
}
};
CharStringIdentifier => {
[] ← GetNextChar[];
token ← "L";
tokenKind ← tokenID;
tokenValid ← TRUE;
thisChar ← PeekNextChar[];
SELECT thisChar FROM
'' => {
tokenValid ← FALSE;
state ← Char
};
'" => {
tokenValid ← FALSE;
state ← String
};
ENDCASE => {
tokenKind ← tokenID;
token ← "L";
WHILE IsAlphaNum[PeekNextChar[]] DO
token ← token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ← Done;
};
};
Identifier => {
tokenKind ← tokenID;
tokenValid ← TRUE;
[] ← GetNextChar[];
token ← Rope.FromChar[thisChar];
WHILE IsAlphaNum[PeekNextChar[]] DO
token ← token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ← Done
};
StartComment => {
tokenKind ← tokenSINGLE;
token ← "/";
tokenValid ← TRUE;
[] ← GetNextChar[];
IF (thisChar ← PeekNextChar[]) = '* THEN {
tokenValid ← FALSE;
[] ← GetNextChar[];
state ← Comment;
}
ELSE IF thisChar = '= THEN {
[] ← GetNextChar[];
token ← "/=";
tokenKind ← tokenDOUBLE;
state ← Done
}
ELSE
state ← Done
};
Comment => {
WHILE PeekNextChar[] # '* DO
token ← token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ← EndComment;
};
EndComment => {
[] ← GetNextChar[];
IF PeekNextChar[] = '/ THEN {
[] ← GetNextChar[];
tokenKind ← tokenCOMMENT;
tokenValid ← TRUE;
state ← Done;
}
ELSE {
token ← token.Concat["*"];
state ← Comment;
};
};
Punctuation => {
thisChar ← GetNextChar[];
token ← Rope.FromChar[thisChar];
tokenKind ← tokenSINGLE;
tokenValid ← TRUE;
state ← Done;
SELECT thisChar FROM
'!, '*, '%, '/, '=, '^ => {
IF PeekNextChar[] = '= THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
};
};
'| => {
IF PeekNextChar[] = '= OR PeekNextChar[] = '| THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
}
};
'& => {
IF PeekNextChar[] = '= OR PeekNextChar[] = '& THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
};
};
'(, '), ',, '., ':, '; , '?, '[, '], '{, '}, '~ => tokenKind ← tokenSINGLE;
'+ => {
IF PeekNextChar[] = '= OR PeekNextChar[] = '+ THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
};
};
'- => {
IF PeekNextChar[] = '= OR PeekNextChar[] = '- OR PeekNextChar[] = '> THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
};
};
'< =>
IF PeekNextChar[] = '= THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
}
ELSE IF PeekNextChar[]= '< THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
IF PeekNextChar[] = '= THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
}
};
'> =>
IF PeekNextChar[]='= THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
}
ELSE IF PeekNextChar[]= '> THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenDOUBLE;
IF PeekNextChar[] = '= THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
}
};
ENDCASE => {
tokenValid ← FALSE;
state ← Error
}
};
FloatIntHexOct => {
thisChar ← PeekNextChar[];
state ←
IF thisChar = '0 THEN
FloatHexOctal
ELSE IF thisChar = '. THEN
Fraction
ELSE IF Is1To9[thisChar] THEN
FloatInt
ELSE
Error;
};
FloatHexOctal => {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenOCTAL;
tokenValid ← TRUE;
thisChar ← PeekNextChar[];
IF thisChar = 'x THEN {
tokenValid ← FALSE;
state ← Hex
}
ELSE IF thisChar = '8 OR thisChar = '9 THEN {
tokenKind ← tokenREAL;
state ← Float
}
ELSE IF IsOctalDigit[thisChar] THEN
state ← FloatOctal
ELSE IF thisChar = '. THEN {
tokenKind ← tokenREAL;
state ← Fraction
}
ELSE IF thisChar = 'E OR thisChar = 'e THEN {
tokenValid ← FALSE;
state ← ExponentSign
}
ELSE {
tokenKind ← tokenOCTAL;
state ← IntModUORL;
}
};
FloatOctal => {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ← PeekNextChar[];
IF thisChar = '8 OR thisChar = '9 THEN {
tokenKind ← tokenREAL;
state ← Float
}
ELSE IF IsOctalDigit[thisChar] THEN
state ← FloatOctal
ELSE IF thisChar = '. THEN {
tokenKind ← tokenREAL;
state ← Fraction
}
ELSE IF thisChar = 'E OR thisChar = 'e THEN {
tokenValid ← FALSE;
state ← ExponentSign
}
ELSE {
state ← IntModUORL;
tokenKind ← tokenOCTAL
};
};
Hex => {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenHEX;
WHILE IsHexDigit[PeekNextChar[]] DO
tokenValid ← TRUE;
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
ENDLOOP;
state ← IntModUORL;
};
FloatInt => {
tokenKind ← tokenDECIMAL;
tokenValid ← TRUE;
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ← PeekNextChar[];
IF thisChar = '. THEN
state ← Fraction
ELSE IF thisChar = 'e OR thisChar = 'E THEN {
tokenValid ← FALSE;
state ← ExponentSign
}
ELSE IF NOT IsDecimalDigit[thisChar] THEN {
tokenKind ← tokenDECIMAL;
state ← IntModUORL;
};
};
IntModUORL => {
thisChar ← PeekNextChar[];
IF thisChar = 'u OR thisChar = 'U THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
state ← IntModL;
}
ELSE IF thisChar = 'l OR thisChar = 'L THEN {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
state ← IntModU;
}
ELSE
state ← Done;
};
IntModU => {
thisChar ← PeekNextChar[];
IF thisChar = 'u OR thisChar = 'U THEN
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
state ← Done
};
IntModL => {
thisChar ← PeekNextChar[];
IF thisChar = 'l OR thisChar = 'L THEN
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
state ← Done
};
Float => {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ← PeekNextChar[];
IF thisChar = '. THEN
state ← Fraction
ELSE IF thisChar = 'e OR thisChar = 'E THEN {
tokenValid ← FALSE;
state ← ExponentSign
}
ELSE IF NOT IsDecimalDigit[thisChar] THEN {
tokenKind ← tokenREAL;
state ← FloatModFOrL;
};
};
FractionOrDot => {
token ← ".";
[] ← GetNextChar[];
tokenKind ← tokenSINGLE;
tokenValid ← TRUE;
thisChar ← PeekNextChar[];
IF IsDecimalDigit[thisChar] THEN {
tokenKind ← tokenREAL;
state ← Fraction
}
ELSE {
tokenKind ← tokenSINGLE;
state ← Done
}
};
Fraction => {
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ← tokenREAL;
thisChar ← PeekNextChar[];
IF thisChar = 'e OR thisChar = 'E THEN {
tokenValid ← FALSE;
state ← ExponentSign
}
ELSE IF NOT IsDecimalDigit[thisChar] THEN {
state ← FloatModFOrL;
};
};
ExponentSign => {
tokenKind ← tokenREAL;
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ← PeekNextChar[];
IF thisChar = '+ OR thisChar = '- THEN
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
IF IsDecimalDigit[PeekNextChar[]] THEN {
tokenValid ← TRUE;
state ← Exponent
}
ELSE
state ← Error;
};
Exponent => {
WHILE IsDecimalDigit[PeekNextChar[]] DO
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
ENDLOOP;
tokenKind ← tokenREAL;
state ← FloatModFOrL;
};
FloatModFOrL => {
thisChar ← PeekNextChar[];
IF thisChar = 'f OR thisChar = 'F OR thisChar = 'l OR thisChar = 'L THEN
token ← token.Concat[Rope.FromChar[GetNextChar[]]];
state ← Done;
};
Error => {
[] ← GetNextChar[];
tokenKind ← tokenERROR;
state ← Done;
};
ENDCASE => state ← Error;
END
REPEAT
GoodExit => {};
BadExit => tokenKind ← tokenERROR
ENDLOOP;
SELECT tokenKind FROM
tokenDECIMAL =>
tokenValue ← NEW[INT ← Convert.IntFromRope[token,10]];
tokenHEX => {
tokenValue ← NEW[INT ← Convert.IntFromRope[token.Substr[2,token.Length[]-2],16]];
tokenKind ← tokenDECIMAL
};
tokenOCTAL => {
tokenValue ← NEW[INT ← Convert.IntFromRope[token,8]];
tokenKind ← tokenDECIMAL
};
tokenREAL =>
tokenValue ← NEW[REAL ← Convert.RealFromRope[token]];
tokenCHAR =>
tokenValue ← NEW[CHAR ← Rope.Fetch[token,1]];
tokenROPE =>
tokenValue ← NEW[Rope.ROPE ← token.Substr[1,token.Length[]-2]];
ENDCASE => {};
RETURN [tokenKind, token, 0, tokenValue]
};
EscapeChar: PROC [] RETURNS [Rope.ROPE] ~ {
first,second,third:Rope.ROPE;
number:Rope.ROPE;
newChar:CHAR;
newCharValid:BOOL ← TRUE;
c:CHAR ← GetNextChar[];
SELECT c FROM
'n => newChar ← Ascii.LF;
't => newChar ← Ascii.TAB;
Vertical Tab must be finished
'v => newChar ← '^;
'b => newChar ← Ascii.BS;
'r => newChar ← Ascii.CR;
'f => newChar ← Ascii.FF;
'a => newChar ← Ascii.BEL;
'\\ => newChar ← '\\;
'' => newChar ← '';
'" => newChar ← '";
'? => newChar ← '?;
'x => {
first ← Rope.FromChar[GetNextChar[]];
IF IsHexDigit[PeekNextChar[]] THEN
second ← Rope.FromChar[GetNextChar[]]
ELSE
second ← NIL;
number ← Rope.Cat[first,second];
newChar ← VAL[CARDINAL[Convert.CardFromRope[number,16]]]
};
Ascii.LF => newCharValid ← FALSE;
ENDCASE => {
IF IsOctalDigit[c] THEN {
first ← Rope.FromChar[c];
IF IsOctalDigit[PeekNextChar[]] THEN {
second ← Rope.FromChar[GetNextChar[]];
IF IsOctalDigit[PeekNextChar[]] THEN
third ← Rope.FromChar[GetNextChar[]]
ELSE
third ← NIL
}
ELSE
second ← third ← NIL;
number ← Rope.Cat[first,second,third];
newChar ← VAL[CARDINAL[Convert.CardFromRope[number,8]]]
}
ELSE
newCharValid ← FALSE;
};
IF newCharValid THEN
RETURN [Rope.FromChar[newChar]]
ELSE
RETURN [NIL];
};
Test: Commander.CommandProc ~ {
charsSkipped:INT;
sourceStream:IO.STREAM;
token:Rope.ROPE;
tokenKind:IO.TokenKind ← tokenID;
commandLineStream:IO.STREAM ← IO.RIS[cmd.commandLine];
filename:PFSNames.PATHPFS.PathFromRope[IO.GetTokenRope[commandLineStream,IO.IDProc].token];
IO.Close[commandLineStream];
sourceStream ← PFS.StreamOpen[filename];
GetCTokenInit[];
WHILE tokenKind # tokenEOF DO
[tokenKind,token,charsSkipped] ← GetCTokenRope[sourceStream, TRUE !
IO.EndOfStream => GOTO EOS];
SELECT tokenKind FROM
tokenID =>
IO.PutF[cmd.out,"Identifier = /%g/\n",IO.rope[token]];
tokenROPE =>
IO.PutF[cmd.out,"String constant= /%g/\n",IO.rope[token]];
tokenCHAR =>
IO.PutF[cmd.out,"Character constant = /%g/\n",IO.rope[token]];
tokenSINGLE =>
IO.PutF[cmd.out,"Single punctuation = /%g/\n",IO.rope[token]];
tokenDOUBLE =>
IO.PutF[cmd.out,"Double punctuation = /%g/\n",IO.rope[token]];
tokenTRIPLE =>
IO.PutF[cmd.out,"Triple punctuation = /%g/\n",IO.rope[token]];
tokenCOMMENT =>
IO.PutF[cmd.out,"Comment = /%g/\n",IO.rope[token]];
tokenHEX =>
IO.PutF[cmd.out,"Hexidecimal constant = /%g/\n",IO.rope[token]];
tokenDECIMAL =>
IO.PutF[cmd.out,"Integer constant = /%g/\n",IO.rope[token]];
tokenREAL =>
IO.PutF[cmd.out,"Real constant = /%g/\n",IO.rope[token]];
tokenOCTAL =>
IO.PutF[cmd.out,"Octal constant = /%g/\n",IO.rope[token]];
tokenEOF =>
IO.PutF[cmd.out,"End of file\n"];
tokenERROR =>
IO.PutF[cmd.out,"Misformed token\n"];
ENDCASE =>
IO.PutF[cmd.out,"Bad token kind %g\n",IO.int[ORD[tokenKind]]];
ENDLOOP;
EXITS
EOS => IO.PutF[cmd.out,"End of Stream\n"];
};
Commander.Register["Test",Test];
}...