LexerImpl.mesa
Copyright Ó 1992, 1993 by Xerox Corporation. All rights reserved.
Hopcroft, July 27, 1989 0:18:47 am PDT
Coolidge, July 12, 1990 3:50 pm PDT
Laurie Horton, November 8, 1991 4:03 pm PST
Willie-s, March 29, 1993 2:53 pm PST
DIRECTORY
Ascii USING [SP, CR, LF, FF, TAB, BS, BEL],
Commander USING[CommandProc, Register],
Convert USING [CardFromRope, IntFromRope, RealFromRope],
PFS USING [PathFromRope, StreamOpen],
PFSNames USING [PATH],
IO USING[Close, EndOfStream, GetChar, GetTokenRope, int, IDProc, PeekChar, PutF1, PutRope, RIS, rope, STREAM, TokenKind],
Lexer USING [],
Rope USING[Cat, Concat, Fetch, FromChar, Length, ROPE, Substr];
LexerImpl: CEDAR PROGRAM
IMPORTS Commander, Convert, IO, PFS, Rope EXPORTS Lexer ~ {
TokenKind: TYPE = { tokenERROR, tokenID, tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL, tokenROPE, tokenCHAR, tokenATOM, tokenSINGLE, tokenDOUBLE, tokenTRIPLE, tokenCOMMENT, tokenEOF, tokenNUMBER };
State: TYPE = { LeadingSpace, StartComment, Comment, EndComment, Char, String, CharStringIdentifier, FloatIntHexOct, Identifier, Punctuation, FloatHexOctal, FloatOctal, Hex, FloatInt, IntModUORL, IntModU, IntModL, Float, Fraction, FractionOrDot, ExponentSign, Exponent, FloatModFOrL, Done, Error };
inputStream: IO.STREAM;
haveAChar: BOOL;
thisChar: CHAR;
currentChar: CHAR;
GetNextChar:
PROC []
RETURNS [
CHAR] ~ {
RETURN[inputStream.GetChar[]]
c:CHAR ← PeekNextChar[];
haveAChar ← FALSE;
RETURN [c]
};
PeekNextChar:
PROC []
RETURNS [
CHAR] ~ {
RETURN[inputStream.PeekChar[]]
IF haveAChar THEN
RETURN [currentChar]
ELSE {
currentChar ← inputStream.GetChar[];
haveAChar ← TRUE;
RETURN [currentChar]
}
};
IsPunctuation:
PROC [c:
CHAR]
RETURNS [
BOOL] ~ {
RETURN [
SELECT c
FROM
'!, '*, '%, '/, '^, '&, '(, '), ',, '., ':, '?, '[, '], '{, '}, '~, '+, '-, '<, '>, ';, '=, '', '", '| => TRUE
ENDCASE => FALSE]
};
IsDecimalDigit:
PROC [c:
CHAR]
RETURNS [
BOOL]
= INLINE { RETURN [c IN ['0 .. '9]] };
Is1To9:
PROC [c:
CHAR]
RETURNS [
BOOL]
= INLINE { RETURN [c IN ['1 .. '9]] };
IsHexDigit:
PROC [c:
CHAR]
RETURNS [
BOOL]
= INLINE { RETURN [c IN ['0 .. '9] OR c IN ['A .. 'F] OR c IN ['a .. 'f]] };
IsOctalDigit:
PROC [c:
CHAR]
RETURNS [
BOOL]
= INLINE { RETURN [c IN ['0 .. '7]] };
IsAlphaNum:
PROC [c:
CHAR]
RETURNS [
BOOL]
= INLINE { RETURN [c IN ['0 .. '9] OR c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='←] };
IsNonDigit:
PROC [c:
CHAR]
RETURNS [
BOOL]
= INLINE { RETURN [c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='←] };
IsWhite:
PROC [c:
CHAR]
RETURNS [
BOOL]
= INLINE { RETURN [c = Ascii.SP OR c = Ascii.CR OR c = Ascii.LF OR c = Ascii.TAB] };
GetCTokenInit:
PUBLIC
PROC []
RETURNS [] ~ {
haveAChar ¬ FALSE;
};
GetCTokenRope:
PUBLIC
PROC [inStream:
IO.
STREAM, junk:
BOOL]
RETURNS [tokenKind:
IO.TokenKind,token:Rope.
ROPE,charsSkipped:
INT] ~ {
tokenValue:REF ANY;
[tokenKind, token, charsSkipped, tokenValue] ¬ GetCTokenRopeAndValue[inStream, junk];
RETURN [tokenKind, token, charsSkipped]
};
GetCTokenRopeAndValue:
PUBLIC
PROC [inStream:
IO.
STREAM, junk:
BOOL]
RETURNS [tokenKind:
IO.TokenKind,token:Rope.
ROPE,charsSkipped:
INT,
tokenValue:
REF
ANY] ~ {
state: State ¬ LeadingSpace;
tokenValid: BOOL ¬ FALSE;
inputStream ¬ inStream;
token ¬ NIL;
WHILE state # Done
DO
BEGIN
ENABLE
IO.EndOfStream => {
IF tokenValid
THEN
GOTO GoodExit
};
SELECT state
FROM
LeadingSpace => {
tokenKind ¬ tokenEOF;
tokenValid ¬ TRUE;
WHILE IsWhite[PeekNextChar[]]
DO
[] ¬ GetNextChar[]
ENDLOOP;
tokenValid ¬ FALSE;
state ¬
SELECT thisChar ¬ PeekNextChar[]
FROM
'' => Char,
'" => String,
'L => CharStringIdentifier,
'/ => StartComment,
'. => FractionOrDot,
ENDCASE =>
SELECT
TRUE
FROM
IsDecimalDigit[thisChar] => FloatIntHexOct,
IsNonDigit[thisChar] => Identifier,
IsPunctuation[thisChar] => Punctuation,
ENDCASE => Error;
};
Char => {
[] ¬ GetNextChar[];
token ¬ "'";
IF (thisChar ¬ GetNextChar[]) = '\\
THEN
token ¬ token.Concat[EscapeChar[]]
ELSE
token ¬ token.Concat[Rope.FromChar[thisChar]];
tokenKind ¬ tokenCHAR;
IF GetNextChar[] # ''
THEN
state ¬ Error
ELSE {
token ¬ token.Concat["'"];
tokenValid ¬ TRUE;
state ¬ Done
}
};
String => {
[] ¬ GetNextChar[];
token ¬ "\"";
WHILE PeekNextChar[] # '"
DO
thisChar ¬ GetNextChar[];
IF thisChar = '\\
THEN
token ¬ token.Concat[EscapeChar[]]
ELSE
token ¬ token.Concat[Rope.FromChar[thisChar]]
ENDLOOP;
tokenKind ¬ tokenROPE;
IF GetNextChar[] # '"
THEN
state ¬ Error
ELSE {
token ¬ token.Concat["\""];
tokenValid ¬ TRUE;
state ¬ Done
}
};
CharStringIdentifier => {
[] ¬ GetNextChar[];
token ¬ "L";
tokenKind ¬ tokenID;
tokenValid ¬ TRUE;
thisChar ¬ PeekNextChar[];
SELECT thisChar
FROM
'' => {
tokenValid ¬ FALSE;
state ¬ Char
};
'" => {
tokenValid ¬ FALSE;
state ¬ String
};
ENDCASE => {
tokenKind ¬ tokenID;
token ¬ "L";
WHILE IsAlphaNum[PeekNextChar[]]
DO
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ¬ Done;
};
};
Identifier => {
tokenKind ¬ tokenID;
tokenValid ¬ TRUE;
[] ¬ GetNextChar[];
token ¬ Rope.FromChar[thisChar];
WHILE IsAlphaNum[PeekNextChar[]]
DO
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ¬ Done
};
StartComment => {
tokenKind ¬ tokenSINGLE;
token ¬ "/";
tokenValid ¬ TRUE;
[] ¬ GetNextChar[];
IF (thisChar ¬ PeekNextChar[]) = '*
THEN {
tokenValid ¬ FALSE;
[] ¬ GetNextChar[];
state ¬ Comment;
}
ELSE
IF thisChar = '=
THEN {
[] ¬ GetNextChar[];
token ¬ "/=";
tokenKind ¬ tokenDOUBLE;
state ¬ Done
}
};
Comment => {
WHILE PeekNextChar[] # '*
DO
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]
ENDLOOP;
state ¬ EndComment;
};
EndComment => {
[] ¬ GetNextChar[];
IF PeekNextChar[] = '/
THEN {
[] ¬ GetNextChar[];
tokenKind ¬ tokenCOMMENT;
tokenValid ¬ TRUE;
state ¬ Done;
}
ELSE {
token ¬ token.Concat["*"];
state ¬ Comment;
};
};
Punctuation => {
thisChar ¬ GetNextChar[];
token ¬ Rope.FromChar[thisChar];
tokenKind ¬ tokenSINGLE;
tokenValid ¬ TRUE;
state ¬ Done;
SELECT thisChar
FROM
'!, '*, '%, '/, '=, '^ => {
IF PeekNextChar[] = '=
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
};
};
'| => {
IF PeekNextChar[] = '=
OR PeekNextChar[] = '|
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
}
};
'& => {
IF PeekNextChar[] = '=
OR PeekNextChar[] = '&
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
};
};
'(, '), ',, '., ':, '; , '?, '[, '], '{, '}, '~ => tokenKind ¬ tokenSINGLE;
'+ => {
IF PeekNextChar[] = '=
OR PeekNextChar[] = '+
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
};
};
'- => {
IF PeekNextChar[] = '=
OR PeekNextChar[] = '-
OR PeekNextChar[] = '>
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
};
};
'< =>
IF PeekNextChar[] = '=
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
}
ELSE
IF PeekNextChar[]= '<
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
IF PeekNextChar[] = '=
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
}
};
'> =>
IF PeekNextChar[]='=
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
}
ELSE
IF PeekNextChar[]= '>
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenDOUBLE;
IF PeekNextChar[] = '=
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
}
};
ENDCASE => {
tokenValid ¬ FALSE;
state ¬ Error
}
};
FloatIntHexOct => {
thisChar ¬ PeekNextChar[];
state ¬
IF thisChar = '0
THEN
FloatHexOctal
ELSE
IF thisChar = '.
THEN
Fraction
ELSE
IF Is1To9[thisChar]
THEN
FloatInt
};
FloatHexOctal => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenOCTAL;
tokenValid ¬ TRUE;
thisChar ¬ PeekNextChar[];
IF thisChar = 'x
THEN {
tokenValid ¬ FALSE;
state ¬ Hex
}
ELSE
IF thisChar = '8
OR thisChar = '9
THEN {
tokenKind ¬ tokenREAL;
state ¬ Float
}
ELSE
IF IsOctalDigit[thisChar]
THEN
state ¬ FloatOctal
ELSE
IF thisChar = '.
THEN {
tokenKind ¬ tokenREAL;
state ¬ Fraction
}
ELSE
IF thisChar = 'E
OR thisChar = 'e
THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE {
tokenKind ¬ tokenOCTAL;
state ¬ IntModUORL;
}
};
FloatOctal => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ¬ PeekNextChar[];
IF thisChar = '8
OR thisChar = '9
THEN {
tokenKind ¬ tokenREAL;
state ¬ Float
}
ELSE
IF IsOctalDigit[thisChar]
THEN
state ¬ FloatOctal
ELSE
IF thisChar = '.
THEN {
tokenKind ¬ tokenREAL;
state ¬ Fraction
}
ELSE
IF thisChar = 'E
OR thisChar = 'e
THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE {
state ¬ IntModUORL;
tokenKind ¬ tokenOCTAL
};
};
Hex => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenHEX;
WHILE IsHexDigit[PeekNextChar[]]
DO
tokenValid ¬ TRUE;
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
ENDLOOP;
state ¬ IntModUORL;
};
FloatInt => {
tokenKind ¬ tokenDECIMAL;
tokenValid ¬ TRUE;
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ¬ PeekNextChar[];
IF thisChar = '.
THEN
state ¬ Fraction
ELSE
IF thisChar = 'e
OR thisChar = 'E
THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE
IF
NOT IsDecimalDigit[thisChar]
THEN {
tokenKind ¬ tokenDECIMAL;
state ¬ IntModUORL;
};
};
IntModUORL => {
thisChar ¬ PeekNextChar[];
IF thisChar = 'u
OR thisChar = 'U
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ IntModL;
}
ELSE
IF thisChar = 'l
OR thisChar = 'L
THEN {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ IntModU;
}
};
IntModU => {
thisChar ¬ PeekNextChar[];
IF thisChar = 'u
OR thisChar = 'U
THEN
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ Done
};
IntModL => {
thisChar ¬ PeekNextChar[];
IF thisChar = 'l
OR thisChar = 'L
THEN
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ Done
};
Float => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ¬ PeekNextChar[];
IF thisChar = '.
THEN
state ¬ Fraction
ELSE
IF thisChar = 'e
OR thisChar = 'E
THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE
IF
NOT IsDecimalDigit[thisChar]
THEN {
tokenKind ¬ tokenREAL;
state ¬ FloatModFOrL;
};
};
FractionOrDot => {
token ¬ ".";
[] ¬ GetNextChar[];
tokenKind ¬ tokenSINGLE;
tokenValid ¬ TRUE;
thisChar ¬ PeekNextChar[];
IF IsDecimalDigit[thisChar]
THEN {
tokenKind ¬ tokenREAL;
state ¬ Fraction
}
ELSE {
tokenKind ¬ tokenSINGLE;
state ¬ Done
}
};
Fraction => {
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
tokenKind ¬ tokenREAL;
thisChar ¬ PeekNextChar[];
IF thisChar = 'e
OR thisChar = 'E
THEN {
tokenValid ¬ FALSE;
state ¬ ExponentSign
}
ELSE
IF
NOT IsDecimalDigit[thisChar]
THEN {
state ¬ FloatModFOrL;
};
};
ExponentSign => {
tokenKind ¬ tokenREAL;
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
thisChar ¬ PeekNextChar[];
IF thisChar = '+
OR thisChar = '-
THEN
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
IF IsDecimalDigit[PeekNextChar[]]
THEN {
tokenValid ¬ TRUE;
state ¬ Exponent
}
};
Exponent => {
WHILE IsDecimalDigit[PeekNextChar[]]
DO
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
ENDLOOP;
tokenKind ¬ tokenREAL;
state ¬ FloatModFOrL;
};
FloatModFOrL => {
thisChar ¬ PeekNextChar[];
IF thisChar = 'f
OR thisChar = 'F
OR thisChar = 'l
OR thisChar = 'L
THEN
token ¬ token.Concat[Rope.FromChar[GetNextChar[]]];
state ¬ Done;
};
Error => {
[] ¬ GetNextChar[];
tokenKind ¬ tokenERROR;
state ¬ Done;
};
ENDCASE => state ¬ Error;
END
REPEAT
GoodExit => {};
BadExit => tokenKind ¬ tokenERROR
ENDLOOP;
SELECT tokenKind
FROM
tokenDECIMAL =>
tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token,10]];
tokenHEX => {
tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token.Substr[2,token.Length[]-2],16]];
tokenKind ¬ tokenDECIMAL
};
tokenOCTAL => {
tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token,8]];
tokenKind ¬ tokenDECIMAL
};
tokenREAL =>
tokenValue ¬ NEW[REAL ¬ Convert.RealFromRope[token]];
tokenCHAR =>
tokenValue ¬ NEW[CHAR ¬ Rope.Fetch[token,1]];
tokenROPE =>
tokenValue ¬ NEW[Rope.ROPE ¬ token.Substr[1,token.Length[]-2]];
ENDCASE => {};
RETURN [tokenKind, token, 0, tokenValue]
};
EscapeChar:
PROC []
RETURNS [Rope.
ROPE] ~ {
first,second,third:Rope.ROPE;
number:Rope.ROPE;
newChar:CHAR;
newCharValid:BOOL ¬ TRUE;
c:CHAR ¬ GetNextChar[];
SELECT c
FROM
'n => newChar ¬ Ascii.LF;
't => newChar ¬ Ascii.TAB;
Vertical Tab must be finished
'v => newChar ¬ '^;
'b => newChar ¬ Ascii.BS;
'r => newChar ¬ Ascii.CR;
'f => newChar ¬ Ascii.FF;
'a => newChar ¬ Ascii.BEL;
'\\ => newChar ¬ '\\;
'' => newChar ¬ '';
'" => newChar ¬ '";
'? => newChar ¬ '?;
'x => {
first ¬ Rope.FromChar[GetNextChar[]];
IF IsHexDigit[PeekNextChar[]]
THEN
second ¬ Rope.FromChar[GetNextChar[]]
number ¬ Rope.Concat[first, second];
newChar ¬ VAL[CARDINAL[Convert.CardFromRope[number,16]]]
};
Ascii.LF => newCharValid ¬ FALSE;
ENDCASE => {
IF IsOctalDigit[c]
THEN {
first ¬ Rope.FromChar[c];
IF IsOctalDigit[PeekNextChar[]]
THEN {
second ¬ Rope.FromChar[GetNextChar[]];
IF IsOctalDigit[PeekNextChar[]]
THEN
third ¬ Rope.FromChar[GetNextChar[]]
}
ELSE
second ¬ third ¬ NIL;
number ¬ Rope.Cat[first,second,third];
newChar ¬ VAL[CARDINAL[Convert.CardFromRope[number,8]]]
}
ELSE
newCharValid ¬ FALSE;
};
IF newCharValid
THEN
RETURN [Rope.FromChar[newChar]]
};
Test: Commander.CommandProc ~ {
charsSkipped:INT;
sourceStream:IO.STREAM;
token:Rope.ROPE;
tokenKind:IO.TokenKind ¬ tokenID;
commandLineStream:IO.STREAM ¬ IO.RIS[cmd.commandLine];
filename:PFSNames.PATH ¬ PFS.PathFromRope[IO.GetTokenRope[commandLineStream,IO.IDProc].token];
IO.Close[commandLineStream];
sourceStream ¬ PFS.StreamOpen[filename];
GetCTokenInit[];
WHILE tokenKind # tokenEOF
DO
[tokenKind,token,charsSkipped] ¬ GetCTokenRope[sourceStream, TRUE !
IO.EndOfStream => GOTO EOS];
SELECT tokenKind
FROM
tokenID =>
IO.PutF1[cmd.out,"Identifier = /%g/\n",IO.rope[token]];
tokenROPE =>
IO.PutF1[cmd.out,"String constant= /%g/\n",IO.rope[token]];
tokenCHAR =>
IO.PutF1[cmd.out,"Character constant = /%g/\n",IO.rope[token]];
tokenSINGLE =>
IO.PutF1[cmd.out,"Single punctuation = /%g/\n",IO.rope[token]];
tokenDOUBLE =>
IO.PutF1[cmd.out,"Double punctuation = /%g/\n",IO.rope[token]];
tokenTRIPLE =>
IO.PutF[cmd.out,"Triple punctuation = /%g/\n",IO.rope[token]];
tokenCOMMENT =>
IO.PutF1[cmd.out,"Comment = /%g/\n",IO.rope[token]];
tokenHEX =>
IO.PutF1[cmd.out,"Hexidecimal constant = /%g/\n",IO.rope[token]];
tokenDECIMAL =>
IO.PutF1[cmd.out,"Integer constant = /%g/\n",IO.rope[token]];
tokenREAL =>
IO.PutF1[cmd.out,"Real constant = /%g/\n",IO.rope[token]];
tokenOCTAL =>
IO.PutF1[cmd.out,"Octal constant = /%g/\n",IO.rope[token]];
tokenEOF =>
IO.PutRope[cmd.out,"End of file\n"];
tokenERROR =>
IO.PutRope[cmd.out,"Misformed token\n"];
ENDCASE =>
IO.PutF1[cmd.out,"Bad token kind %g\n",IO.int[ORD[tokenKind]]];
ENDLOOP;
EXITS
EOS => IO.PutRope[cmd.out,"End of Stream\n"];
};
Commander.Register["Test",Test];
}...