<<>> <> <> <> <> <> <> DIRECTORY Ascii USING [SP, CR, LF, FF, TAB, BS, BEL], Commander USING[CommandProc, Register], Convert USING [CardFromRope, IntFromRope, RealFromRope], PFS USING [PathFromRope, StreamOpen], PFSNames USING [PATH], IO USING[Close, EndOfStream, GetChar, GetTokenRope, int, IDProc, PeekChar, PutF1, PutRope, RIS, rope, STREAM, TokenKind], Lexer USING [], Rope USING[Cat, Concat, Fetch, FromChar, Length, ROPE, Substr]; LexerImpl: CEDAR PROGRAM IMPORTS Commander, Convert, IO, PFS, Rope EXPORTS Lexer ~ { << TokenKind: TYPE = { tokenERROR, tokenID, tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL, tokenROPE, tokenCHAR, tokenATOM, tokenSINGLE, tokenDOUBLE, tokenTRIPLE, tokenCOMMENT, tokenEOF, tokenNUMBER };>> State: TYPE = { LeadingSpace, StartComment, Comment, EndComment, Char, String, CharStringIdentifier, FloatIntHexOct, Identifier, Punctuation, FloatHexOctal, FloatOctal, Hex, FloatInt, IntModUORL, IntModU, IntModL, Float, Fraction, FractionOrDot, ExponentSign, Exponent, FloatModFOrL, Done, Error }; inputStream: IO.STREAM; haveAChar: BOOL; thisChar: CHAR; <> GetNextChar: PROC [] RETURNS [CHAR] ~ { RETURN[inputStream.GetChar[]] <> <> <> }; PeekNextChar: PROC [] RETURNS [CHAR] ~ { RETURN[inputStream.PeekChar[]] <> <> <> <> <> <> <<}>> }; IsPunctuation: PROC [c:CHAR] RETURNS [BOOL] ~ { RETURN [SELECT c FROM '!, '*, '%, '/, '^, '&, '(, '), ',, '., ':, '?, '[, '], '{, '}, '~, '+, '-, '<, '>, ';, '=, '', '", '| => TRUE ENDCASE => FALSE] }; IsDecimalDigit: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['0 .. '9]] }; Is1To9: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['1 .. '9]] }; IsHexDigit: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['0 .. '9] OR c IN ['A .. 'F] OR c IN ['a .. 'f]] }; IsOctalDigit: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['0 .. '7]] }; IsAlphaNum: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['0 .. '9] OR c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='_] }; IsNonDigit: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='_] }; IsWhite: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c = Ascii.SP OR c = Ascii.CR OR c = Ascii.LF OR c = Ascii.TAB] }; GetCTokenInit: PUBLIC PROC [] RETURNS [] ~ { haveAChar ¬ FALSE; }; GetCTokenRope: PUBLIC PROC [inStream:IO.STREAM, junk:BOOL] RETURNS [tokenKind:IO.TokenKind,token:Rope.ROPE,charsSkipped:INT] ~ { tokenValue:REF ANY; [tokenKind, token, charsSkipped, tokenValue] ¬ GetCTokenRopeAndValue[inStream, junk]; RETURN [tokenKind, token, charsSkipped] }; GetCTokenRopeAndValue: PUBLIC PROC [inStream:IO.STREAM, junk:BOOL] RETURNS [tokenKind:IO.TokenKind,token:Rope.ROPE,charsSkipped:INT, tokenValue:REF ANY] ~ { state: State ¬ LeadingSpace; tokenValid: BOOL ¬ FALSE; inputStream ¬ inStream; token ¬ NIL; charsSkipped ¬ 0; WHILE state # Done DO BEGIN ENABLE IO.EndOfStream => { IF tokenValid THEN GOTO GoodExit ELSE GOTO BadExit }; SELECT state FROM LeadingSpace => { tokenKind ¬ tokenEOF; tokenValid ¬ TRUE; WHILE IsWhite[PeekNextChar[]] DO [] ¬ GetNextChar[] ENDLOOP; tokenValid ¬ FALSE; state ¬ SELECT thisChar ¬ PeekNextChar[] FROM '' => Char, '" => String, 'L => CharStringIdentifier, '/ => StartComment, '. => FractionOrDot, ENDCASE => SELECT TRUE FROM IsDecimalDigit[thisChar] => FloatIntHexOct, IsNonDigit[thisChar] => Identifier, IsPunctuation[thisChar] => Punctuation, ENDCASE => Error; }; Char => { [] ¬ GetNextChar[]; token ¬ "'"; IF (thisChar ¬ GetNextChar[]) = '\\ THEN token ¬ token.Concat[EscapeChar[]] ELSE token ¬ token.Concat[Rope.FromChar[thisChar]]; tokenKind ¬ tokenCHAR; IF GetNextChar[] # '' THEN state ¬ Error ELSE { token ¬ token.Concat["'"]; tokenValid ¬ TRUE; state ¬ Done } }; String => { [] ¬ GetNextChar[]; token ¬ "\""; WHILE PeekNextChar[] # '" DO thisChar ¬ GetNextChar[]; IF thisChar = '\\ THEN token ¬ token.Concat[EscapeChar[]] ELSE token ¬ token.Concat[Rope.FromChar[thisChar]] ENDLOOP; tokenKind ¬ tokenROPE; IF GetNextChar[] # '" THEN state ¬ Error ELSE { token ¬ token.Concat["\""]; tokenValid ¬ TRUE; state ¬ Done } }; CharStringIdentifier => { [] ¬ GetNextChar[]; token ¬ "L"; tokenKind ¬ tokenID; tokenValid ¬ TRUE; thisChar ¬ PeekNextChar[]; SELECT thisChar FROM '' => { tokenValid ¬ FALSE; state ¬ Char }; '" => { tokenValid ¬ FALSE; state ¬ String }; ENDCASE => { tokenKind ¬ tokenID; token ¬ "L"; WHILE IsAlphaNum[PeekNextChar[]] DO token ¬ token.Concat[Rope.FromChar[GetNextChar[]]] ENDLOOP; state ¬ Done; }; }; Identifier => { tokenKind ¬ tokenID; tokenValid ¬ TRUE; [] ¬ GetNextChar[]; token ¬ Rope.FromChar[thisChar]; WHILE IsAlphaNum[PeekNextChar[]] DO token ¬ token.Concat[Rope.FromChar[GetNextChar[]]] ENDLOOP; state ¬ Done }; StartComment => { tokenKind ¬ tokenSINGLE; token ¬ "/"; tokenValid ¬ TRUE; [] ¬ GetNextChar[]; IF (thisChar ¬ PeekNextChar[]) = '* THEN { tokenValid ¬ FALSE; [] ¬ GetNextChar[]; state ¬ Comment; } ELSE IF thisChar = '= THEN { [] ¬ GetNextChar[]; token ¬ "/="; tokenKind ¬ tokenDOUBLE; state ¬ Done } ELSE state ¬ Done }; Comment => { WHILE PeekNextChar[] # '* DO token ¬ token.Concat[Rope.FromChar[GetNextChar[]]] ENDLOOP; state ¬ EndComment; }; EndComment => { [] ¬ GetNextChar[]; IF PeekNextChar[] = '/ THEN { [] ¬ GetNextChar[]; tokenKind ¬ tokenCOMMENT; tokenValid ¬ TRUE; state ¬ Done; } ELSE { token ¬ token.Concat["*"]; state ¬ Comment; }; }; Punctuation => { thisChar ¬ GetNextChar[]; token ¬ Rope.FromChar[thisChar]; tokenKind ¬ tokenSINGLE; tokenValid ¬ TRUE; state ¬ Done; SELECT thisChar FROM '!, '*, '%, '/, '=, '^ => { IF PeekNextChar[] = '= THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; }; }; '| => { IF PeekNextChar[] = '= OR PeekNextChar[] = '| THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; } }; '& => { IF PeekNextChar[] = '= OR PeekNextChar[] = '& THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; }; }; '(, '), ',, '., ':, '; , '?, '[, '], '{, '}, '~ => tokenKind ¬ tokenSINGLE; '+ => { IF PeekNextChar[] = '= OR PeekNextChar[] = '+ THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; }; }; '- => { IF PeekNextChar[] = '= OR PeekNextChar[] = '- OR PeekNextChar[] = '> THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; }; }; '< => IF PeekNextChar[] = '= THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; } ELSE IF PeekNextChar[]= '< THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; IF PeekNextChar[] = '= THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; } }; '> => IF PeekNextChar[]='= THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; } ELSE IF PeekNextChar[]= '> THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenDOUBLE; IF PeekNextChar[] = '= THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; } }; ENDCASE => { tokenValid ¬ FALSE; state ¬ Error } }; FloatIntHexOct => { thisChar ¬ PeekNextChar[]; state ¬ IF thisChar = '0 THEN FloatHexOctal ELSE IF thisChar = '. THEN Fraction ELSE IF Is1To9[thisChar] THEN FloatInt ELSE Error; }; FloatHexOctal => { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenOCTAL; tokenValid ¬ TRUE; thisChar ¬ PeekNextChar[]; IF thisChar = 'x THEN { tokenValid ¬ FALSE; state ¬ Hex } ELSE IF thisChar = '8 OR thisChar = '9 THEN { tokenKind ¬ tokenREAL; state ¬ Float } ELSE IF IsOctalDigit[thisChar] THEN state ¬ FloatOctal ELSE IF thisChar = '. THEN { tokenKind ¬ tokenREAL; state ¬ Fraction } ELSE IF thisChar = 'E OR thisChar = 'e THEN { tokenValid ¬ FALSE; state ¬ ExponentSign } ELSE { tokenKind ¬ tokenOCTAL; state ¬ IntModUORL; } }; FloatOctal => { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; thisChar ¬ PeekNextChar[]; IF thisChar = '8 OR thisChar = '9 THEN { tokenKind ¬ tokenREAL; state ¬ Float } ELSE IF IsOctalDigit[thisChar] THEN state ¬ FloatOctal ELSE IF thisChar = '. THEN { tokenKind ¬ tokenREAL; state ¬ Fraction } ELSE IF thisChar = 'E OR thisChar = 'e THEN { tokenValid ¬ FALSE; state ¬ ExponentSign } ELSE { state ¬ IntModUORL; tokenKind ¬ tokenOCTAL }; }; Hex => { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenHEX; WHILE IsHexDigit[PeekNextChar[]] DO tokenValid ¬ TRUE; token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; ENDLOOP; state ¬ IntModUORL; }; FloatInt => { tokenKind ¬ tokenDECIMAL; tokenValid ¬ TRUE; token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; thisChar ¬ PeekNextChar[]; IF thisChar = '. THEN state ¬ Fraction ELSE IF thisChar = 'e OR thisChar = 'E THEN { tokenValid ¬ FALSE; state ¬ ExponentSign } ELSE IF NOT IsDecimalDigit[thisChar] THEN { tokenKind ¬ tokenDECIMAL; state ¬ IntModUORL; }; }; IntModUORL => { thisChar ¬ PeekNextChar[]; IF thisChar = 'u OR thisChar = 'U THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; state ¬ IntModL; } ELSE IF thisChar = 'l OR thisChar = 'L THEN { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; state ¬ IntModU; } ELSE state ¬ Done; }; IntModU => { thisChar ¬ PeekNextChar[]; IF thisChar = 'u OR thisChar = 'U THEN token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; state ¬ Done }; IntModL => { thisChar ¬ PeekNextChar[]; IF thisChar = 'l OR thisChar = 'L THEN token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; state ¬ Done }; Float => { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; thisChar ¬ PeekNextChar[]; IF thisChar = '. THEN state ¬ Fraction ELSE IF thisChar = 'e OR thisChar = 'E THEN { tokenValid ¬ FALSE; state ¬ ExponentSign } ELSE IF NOT IsDecimalDigit[thisChar] THEN { tokenKind ¬ tokenREAL; state ¬ FloatModFOrL; }; }; FractionOrDot => { token ¬ "."; [] ¬ GetNextChar[]; tokenKind ¬ tokenSINGLE; tokenValid ¬ TRUE; thisChar ¬ PeekNextChar[]; IF IsDecimalDigit[thisChar] THEN { tokenKind ¬ tokenREAL; state ¬ Fraction } ELSE { tokenKind ¬ tokenSINGLE; state ¬ Done } }; Fraction => { token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind ¬ tokenREAL; thisChar ¬ PeekNextChar[]; IF thisChar = 'e OR thisChar = 'E THEN { tokenValid ¬ FALSE; state ¬ ExponentSign } ELSE IF NOT IsDecimalDigit[thisChar] THEN { state ¬ FloatModFOrL; }; }; ExponentSign => { tokenKind ¬ tokenREAL; token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; thisChar ¬ PeekNextChar[]; IF thisChar = '+ OR thisChar = '- THEN token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; IF IsDecimalDigit[PeekNextChar[]] THEN { tokenValid ¬ TRUE; state ¬ Exponent } ELSE state ¬ Error; }; Exponent => { WHILE IsDecimalDigit[PeekNextChar[]] DO token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; ENDLOOP; tokenKind ¬ tokenREAL; state ¬ FloatModFOrL; }; FloatModFOrL => { thisChar ¬ PeekNextChar[]; IF thisChar = 'f OR thisChar = 'F OR thisChar = 'l OR thisChar = 'L THEN token ¬ token.Concat[Rope.FromChar[GetNextChar[]]]; state ¬ Done; }; Error => { [] ¬ GetNextChar[]; tokenKind ¬ tokenERROR; state ¬ Done; }; ENDCASE => state ¬ Error; END REPEAT GoodExit => {}; BadExit => tokenKind ¬ tokenERROR ENDLOOP; SELECT tokenKind FROM tokenDECIMAL => tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token,10]]; tokenHEX => { tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token.Substr[2,token.Length[]-2],16]]; tokenKind ¬ tokenDECIMAL }; tokenOCTAL => { tokenValue ¬ NEW[INT ¬ Convert.IntFromRope[token,8]]; tokenKind ¬ tokenDECIMAL }; tokenREAL => tokenValue ¬ NEW[REAL ¬ Convert.RealFromRope[token]]; tokenCHAR => tokenValue ¬ NEW[CHAR ¬ Rope.Fetch[token,1]]; tokenROPE => tokenValue ¬ NEW[Rope.ROPE ¬ token.Substr[1,token.Length[]-2]]; ENDCASE => {}; RETURN [tokenKind, token, 0, tokenValue] }; EscapeChar: PROC [] RETURNS [Rope.ROPE] ~ { first,second,third:Rope.ROPE; number:Rope.ROPE; newChar:CHAR; newCharValid:BOOL ¬ TRUE; c:CHAR ¬ GetNextChar[]; SELECT c FROM 'n => newChar ¬ Ascii.LF; 't => newChar ¬ Ascii.TAB; <> 'v => newChar ¬ '^; 'b => newChar ¬ Ascii.BS; 'r => newChar ¬ Ascii.CR; 'f => newChar ¬ Ascii.FF; 'a => newChar ¬ Ascii.BEL; '\\ => newChar ¬ '\\; '' => newChar ¬ ''; '" => newChar ¬ '"; '? => newChar ¬ '?; 'x => { first ¬ Rope.FromChar[GetNextChar[]]; IF IsHexDigit[PeekNextChar[]] THEN second ¬ Rope.FromChar[GetNextChar[]] ELSE second ¬ NIL; number ¬ Rope.Concat[first, second]; newChar ¬ VAL[CARDINAL[Convert.CardFromRope[number,16]]] }; Ascii.LF => newCharValid ¬ FALSE; ENDCASE => { IF IsOctalDigit[c] THEN { first ¬ Rope.FromChar[c]; IF IsOctalDigit[PeekNextChar[]] THEN { second ¬ Rope.FromChar[GetNextChar[]]; IF IsOctalDigit[PeekNextChar[]] THEN third ¬ Rope.FromChar[GetNextChar[]] ELSE third ¬ NIL } ELSE second ¬ third ¬ NIL; number ¬ Rope.Cat[first,second,third]; newChar ¬ VAL[CARDINAL[Convert.CardFromRope[number,8]]] } ELSE newCharValid ¬ FALSE; }; IF newCharValid THEN RETURN [Rope.FromChar[newChar]] ELSE RETURN [NIL]; }; Test: Commander.CommandProc ~ { charsSkipped:INT; sourceStream:IO.STREAM; token:Rope.ROPE; tokenKind:IO.TokenKind ¬ tokenID; commandLineStream:IO.STREAM ¬ IO.RIS[cmd.commandLine]; filename:PFSNames.PATH ¬ PFS.PathFromRope[IO.GetTokenRope[commandLineStream,IO.IDProc].token]; IO.Close[commandLineStream]; sourceStream ¬ PFS.StreamOpen[filename]; GetCTokenInit[]; WHILE tokenKind # tokenEOF DO [tokenKind,token,charsSkipped] ¬ GetCTokenRope[sourceStream, TRUE ! IO.EndOfStream => GOTO EOS]; SELECT tokenKind FROM tokenID => IO.PutF1[cmd.out,"Identifier = /%g/\n",IO.rope[token]]; tokenROPE => IO.PutF1[cmd.out,"String constant= /%g/\n",IO.rope[token]]; tokenCHAR => IO.PutF1[cmd.out,"Character constant = /%g/\n",IO.rope[token]]; tokenSINGLE => IO.PutF1[cmd.out,"Single punctuation = /%g/\n",IO.rope[token]]; tokenDOUBLE => IO.PutF1[cmd.out,"Double punctuation = /%g/\n",IO.rope[token]]; < >> <> tokenCOMMENT => IO.PutF1[cmd.out,"Comment = /%g/\n",IO.rope[token]]; tokenHEX => IO.PutF1[cmd.out,"Hexidecimal constant = /%g/\n",IO.rope[token]]; tokenDECIMAL => IO.PutF1[cmd.out,"Integer constant = /%g/\n",IO.rope[token]]; tokenREAL => IO.PutF1[cmd.out,"Real constant = /%g/\n",IO.rope[token]]; tokenOCTAL => IO.PutF1[cmd.out,"Octal constant = /%g/\n",IO.rope[token]]; tokenEOF => IO.PutRope[cmd.out,"End of file\n"]; tokenERROR => IO.PutRope[cmd.out,"Misformed token\n"]; ENDCASE => IO.PutF1[cmd.out,"Bad token kind %g\n",IO.int[ORD[tokenKind]]]; ENDLOOP; EXITS EOS => IO.PutRope[cmd.out,"End of Stream\n"]; }; Commander.Register["Test",Test]; }...