<<>> <> <> <> <> DIRECTORY Ascii USING [SP, CR, LF, FF, TAB, BS, BEL], Commander USING[CommandProc, Register], Convert USING [CardFromRope, IntFromRope, RealFromRope], PFS USING [PathFromRope, StreamOpen], PFSNames USING [PATH], IO USING[Close, EndOfStream, GetChar, GetTokenRope, int, IDProc, PeekChar, PutF, RIS, rope, STREAM, TokenKind], Lexer USING [], Rope USING[Cat, Concat, Fetch, FromChar, Length, ROPE, Substr]; LexerImpl: CEDAR PROGRAM IMPORTS Commander, Convert, IO, PFS, Rope EXPORTS Lexer ~ { << TokenKind: TYPE = { tokenERROR, tokenID, tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL, tokenROPE, tokenCHAR, tokenATOM, tokenSINGLE, tokenDOUBLE, tokenTRIPLE, tokenCOMMENT, tokenEOF, tokenNUMBER };>> State: TYPE = { LeadingSpace, StartComment, Comment, EndComment, Char, String, CharStringIdentifier, FloatIntHexOct, Identifier, Punctuation, FloatHexOctal, FloatOctal, Hex, FloatInt, IntModUORL, IntModU, IntModL, Float, Fraction, FractionOrDot, ExponentSign, Exponent, FloatModFOrL, Done, Error }; inputStream: IO.STREAM; haveAChar: BOOL; thisChar: CHAR; <> GetNextChar: PROC [] RETURNS [CHAR] ~ { RETURN[inputStream.GetChar[]] <> <> <> }; PeekNextChar: PROC [] RETURNS [CHAR] ~ { RETURN[inputStream.PeekChar[]] <> <> <> <> <> <> <<}>> }; IsPunctuation: PROC [c:CHAR] RETURNS [BOOL] ~ { RETURN [SELECT c FROM '!, '*, '%, '/, '^, '&, '(, '), ',, '., ':, '?, '[, '], '{, '}, '~, '+, '-, '<, '>, ';, '=, '', '", '| => TRUE ENDCASE => FALSE] }; IsDecimalDigit: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['0 .. '9]] }; Is1To9: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['1 .. '9]] }; IsHexDigit: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['0 .. '9] OR c IN ['A .. 'F] OR c IN ['a .. 'f]] }; IsOctalDigit: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['0 .. '7]] }; IsAlphaNum: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['0 .. '9] OR c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='_] }; IsNonDigit: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c IN ['a .. 'z] OR c IN ['A .. 'Z] OR c='_] }; IsWhite: PROC [c:CHAR] RETURNS [BOOL] = INLINE { RETURN [c = Ascii.SP OR c = Ascii.CR OR c = Ascii.LF OR c = Ascii.TAB] }; GetCTokenInit: PUBLIC PROC [] RETURNS [] ~ { haveAChar _ FALSE; }; GetCTokenRope: PUBLIC PROC [inStream:IO.STREAM, junk:BOOL] RETURNS [tokenKind:IO.TokenKind,token:Rope.ROPE,charsSkipped:INT] ~ { tokenValue:REF ANY; [tokenKind, token, charsSkipped, tokenValue] _ GetCTokenRopeAndValue[inStream, junk]; RETURN [tokenKind, token, charsSkipped] }; GetCTokenRopeAndValue: PUBLIC PROC [inStream:IO.STREAM, junk:BOOL] RETURNS [tokenKind:IO.TokenKind,token:Rope.ROPE,charsSkipped:INT, tokenValue:REF ANY] ~ { state: State _ LeadingSpace; tokenValid: BOOL _ FALSE; inputStream _ inStream; token _ NIL; charsSkipped _ 0; WHILE state # Done DO BEGIN ENABLE IO.EndOfStream => { IF tokenValid THEN GOTO GoodExit ELSE GOTO BadExit }; SELECT state FROM LeadingSpace => { tokenKind _ tokenEOF; tokenValid _ TRUE; WHILE IsWhite[PeekNextChar[]] DO [] _ GetNextChar[] ENDLOOP; tokenValid _ FALSE; state _ SELECT thisChar _ PeekNextChar[] FROM '' => Char, '" => String, 'L => CharStringIdentifier, '/ => StartComment, '. => FractionOrDot, ENDCASE => SELECT TRUE FROM IsDecimalDigit[thisChar] => FloatIntHexOct, IsNonDigit[thisChar] => Identifier, IsPunctuation[thisChar] => Punctuation, ENDCASE => Error; }; Char => { [] _ GetNextChar[]; token _ "'"; IF (thisChar _ GetNextChar[]) = '\\ THEN token _ token.Concat[EscapeChar[]] ELSE token _ token.Concat[Rope.FromChar[thisChar]]; tokenKind _ tokenCHAR; IF GetNextChar[] # '' THEN state _ Error ELSE { token _ token.Concat["'"]; tokenValid _ TRUE; state _ Done } }; String => { [] _ GetNextChar[]; token _ "\""; WHILE PeekNextChar[] # '" DO thisChar _ GetNextChar[]; IF thisChar = '\\ THEN token _ token.Concat[EscapeChar[]] ELSE token _ token.Concat[Rope.FromChar[thisChar]] ENDLOOP; tokenKind _ tokenROPE; IF GetNextChar[] # '" THEN state _ Error ELSE { token _ token.Concat["\""]; tokenValid _ TRUE; state _ Done } }; CharStringIdentifier => { [] _ GetNextChar[]; token _ "L"; tokenKind _ tokenID; tokenValid _ TRUE; thisChar _ PeekNextChar[]; SELECT thisChar FROM '' => { tokenValid _ FALSE; state _ Char }; '" => { tokenValid _ FALSE; state _ String }; ENDCASE => { tokenKind _ tokenID; token _ "L"; WHILE IsAlphaNum[PeekNextChar[]] DO token _ token.Concat[Rope.FromChar[GetNextChar[]]] ENDLOOP; state _ Done; }; }; Identifier => { tokenKind _ tokenID; tokenValid _ TRUE; [] _ GetNextChar[]; token _ Rope.FromChar[thisChar]; WHILE IsAlphaNum[PeekNextChar[]] DO token _ token.Concat[Rope.FromChar[GetNextChar[]]] ENDLOOP; state _ Done }; StartComment => { tokenKind _ tokenSINGLE; token _ "/"; tokenValid _ TRUE; [] _ GetNextChar[]; IF (thisChar _ PeekNextChar[]) = '* THEN { tokenValid _ FALSE; [] _ GetNextChar[]; state _ Comment; } ELSE IF thisChar = '= THEN { [] _ GetNextChar[]; token _ "/="; tokenKind _ tokenDOUBLE; state _ Done } ELSE state _ Done }; Comment => { WHILE PeekNextChar[] # '* DO token _ token.Concat[Rope.FromChar[GetNextChar[]]] ENDLOOP; state _ EndComment; }; EndComment => { [] _ GetNextChar[]; IF PeekNextChar[] = '/ THEN { [] _ GetNextChar[]; tokenKind _ tokenCOMMENT; tokenValid _ TRUE; state _ Done; } ELSE { token _ token.Concat["*"]; state _ Comment; }; }; Punctuation => { thisChar _ GetNextChar[]; token _ Rope.FromChar[thisChar]; tokenKind _ tokenSINGLE; tokenValid _ TRUE; state _ Done; SELECT thisChar FROM '!, '*, '%, '/, '=, '^ => { IF PeekNextChar[] = '= THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; }; }; '| => { IF PeekNextChar[] = '= OR PeekNextChar[] = '| THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; } }; '& => { IF PeekNextChar[] = '= OR PeekNextChar[] = '& THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; }; }; '(, '), ',, '., ':, '; , '?, '[, '], '{, '}, '~ => tokenKind _ tokenSINGLE; '+ => { IF PeekNextChar[] = '= OR PeekNextChar[] = '+ THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; }; }; '- => { IF PeekNextChar[] = '= OR PeekNextChar[] = '- OR PeekNextChar[] = '> THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; }; }; '< => IF PeekNextChar[] = '= THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; } ELSE IF PeekNextChar[]= '< THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; IF PeekNextChar[] = '= THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; } }; '> => IF PeekNextChar[]='= THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; } ELSE IF PeekNextChar[]= '> THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenDOUBLE; IF PeekNextChar[] = '= THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; } }; ENDCASE => { tokenValid _ FALSE; state _ Error } }; FloatIntHexOct => { thisChar _ PeekNextChar[]; state _ IF thisChar = '0 THEN FloatHexOctal ELSE IF thisChar = '. THEN Fraction ELSE IF Is1To9[thisChar] THEN FloatInt ELSE Error; }; FloatHexOctal => { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenOCTAL; tokenValid _ TRUE; thisChar _ PeekNextChar[]; IF thisChar = 'x THEN { tokenValid _ FALSE; state _ Hex } ELSE IF thisChar = '8 OR thisChar = '9 THEN { tokenKind _ tokenREAL; state _ Float } ELSE IF IsOctalDigit[thisChar] THEN state _ FloatOctal ELSE IF thisChar = '. THEN { tokenKind _ tokenREAL; state _ Fraction } ELSE IF thisChar = 'E OR thisChar = 'e THEN { tokenValid _ FALSE; state _ ExponentSign } ELSE { tokenKind _ tokenOCTAL; state _ IntModUORL; } }; FloatOctal => { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; thisChar _ PeekNextChar[]; IF thisChar = '8 OR thisChar = '9 THEN { tokenKind _ tokenREAL; state _ Float } ELSE IF IsOctalDigit[thisChar] THEN state _ FloatOctal ELSE IF thisChar = '. THEN { tokenKind _ tokenREAL; state _ Fraction } ELSE IF thisChar = 'E OR thisChar = 'e THEN { tokenValid _ FALSE; state _ ExponentSign } ELSE { state _ IntModUORL; tokenKind _ tokenOCTAL }; }; Hex => { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenHEX; WHILE IsHexDigit[PeekNextChar[]] DO tokenValid _ TRUE; token _ token.Concat[Rope.FromChar[GetNextChar[]]]; ENDLOOP; state _ IntModUORL; }; FloatInt => { tokenKind _ tokenDECIMAL; tokenValid _ TRUE; token _ token.Concat[Rope.FromChar[GetNextChar[]]]; thisChar _ PeekNextChar[]; IF thisChar = '. THEN state _ Fraction ELSE IF thisChar = 'e OR thisChar = 'E THEN { tokenValid _ FALSE; state _ ExponentSign } ELSE IF NOT IsDecimalDigit[thisChar] THEN { tokenKind _ tokenDECIMAL; state _ IntModUORL; }; }; IntModUORL => { thisChar _ PeekNextChar[]; IF thisChar = 'u OR thisChar = 'U THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; state _ IntModL; } ELSE IF thisChar = 'l OR thisChar = 'L THEN { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; state _ IntModU; } ELSE state _ Done; }; IntModU => { thisChar _ PeekNextChar[]; IF thisChar = 'u OR thisChar = 'U THEN token _ token.Concat[Rope.FromChar[GetNextChar[]]]; state _ Done }; IntModL => { thisChar _ PeekNextChar[]; IF thisChar = 'l OR thisChar = 'L THEN token _ token.Concat[Rope.FromChar[GetNextChar[]]]; state _ Done }; Float => { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; thisChar _ PeekNextChar[]; IF thisChar = '. THEN state _ Fraction ELSE IF thisChar = 'e OR thisChar = 'E THEN { tokenValid _ FALSE; state _ ExponentSign } ELSE IF NOT IsDecimalDigit[thisChar] THEN { tokenKind _ tokenREAL; state _ FloatModFOrL; }; }; FractionOrDot => { token _ "."; [] _ GetNextChar[]; tokenKind _ tokenSINGLE; tokenValid _ TRUE; thisChar _ PeekNextChar[]; IF IsDecimalDigit[thisChar] THEN { tokenKind _ tokenREAL; state _ Fraction } ELSE { tokenKind _ tokenSINGLE; state _ Done } }; Fraction => { token _ token.Concat[Rope.FromChar[GetNextChar[]]]; tokenKind _ tokenREAL; thisChar _ PeekNextChar[]; IF thisChar = 'e OR thisChar = 'E THEN { tokenValid _ FALSE; state _ ExponentSign } ELSE IF NOT IsDecimalDigit[thisChar] THEN { state _ FloatModFOrL; }; }; ExponentSign => { tokenKind _ tokenREAL; token _ token.Concat[Rope.FromChar[GetNextChar[]]]; thisChar _ PeekNextChar[]; IF thisChar = '+ OR thisChar = '- THEN token _ token.Concat[Rope.FromChar[GetNextChar[]]]; IF IsDecimalDigit[PeekNextChar[]] THEN { tokenValid _ TRUE; state _ Exponent } ELSE state _ Error; }; Exponent => { WHILE IsDecimalDigit[PeekNextChar[]] DO token _ token.Concat[Rope.FromChar[GetNextChar[]]]; ENDLOOP; tokenKind _ tokenREAL; state _ FloatModFOrL; }; FloatModFOrL => { thisChar _ PeekNextChar[]; IF thisChar = 'f OR thisChar = 'F OR thisChar = 'l OR thisChar = 'L THEN token _ token.Concat[Rope.FromChar[GetNextChar[]]]; state _ Done; }; Error => { [] _ GetNextChar[]; tokenKind _ tokenERROR; state _ Done; }; ENDCASE => state _ Error; END REPEAT GoodExit => {}; BadExit => tokenKind _ tokenERROR ENDLOOP; SELECT tokenKind FROM tokenDECIMAL => tokenValue _ NEW[INT _ Convert.IntFromRope[token,10]]; tokenHEX => { tokenValue _ NEW[INT _ Convert.IntFromRope[token.Substr[2,token.Length[]-2],16]]; tokenKind _ tokenDECIMAL }; tokenOCTAL => { tokenValue _ NEW[INT _ Convert.IntFromRope[token,8]]; tokenKind _ tokenDECIMAL }; tokenREAL => tokenValue _ NEW[REAL _ Convert.RealFromRope[token]]; tokenCHAR => tokenValue _ NEW[CHAR _ Rope.Fetch[token,1]]; tokenROPE => tokenValue _ NEW[Rope.ROPE _ token.Substr[1,token.Length[]-2]]; ENDCASE => {}; RETURN [tokenKind, token, 0, tokenValue] }; EscapeChar: PROC [] RETURNS [Rope.ROPE] ~ { first,second,third:Rope.ROPE; number:Rope.ROPE; newChar:CHAR; newCharValid:BOOL _ TRUE; c:CHAR _ GetNextChar[]; SELECT c FROM 'n => newChar _ Ascii.LF; 't => newChar _ Ascii.TAB; <> 'v => newChar _ '^; 'b => newChar _ Ascii.BS; 'r => newChar _ Ascii.CR; 'f => newChar _ Ascii.FF; 'a => newChar _ Ascii.BEL; '\\ => newChar _ '\\; '' => newChar _ ''; '" => newChar _ '"; '? => newChar _ '?; 'x => { first _ Rope.FromChar[GetNextChar[]]; IF IsHexDigit[PeekNextChar[]] THEN second _ Rope.FromChar[GetNextChar[]] ELSE second _ NIL; number _ Rope.Cat[first,second]; newChar _ VAL[CARDINAL[Convert.CardFromRope[number,16]]] }; Ascii.LF => newCharValid _ FALSE; ENDCASE => { IF IsOctalDigit[c] THEN { first _ Rope.FromChar[c]; IF IsOctalDigit[PeekNextChar[]] THEN { second _ Rope.FromChar[GetNextChar[]]; IF IsOctalDigit[PeekNextChar[]] THEN third _ Rope.FromChar[GetNextChar[]] ELSE third _ NIL } ELSE second _ third _ NIL; number _ Rope.Cat[first,second,third]; newChar _ VAL[CARDINAL[Convert.CardFromRope[number,8]]] } ELSE newCharValid _ FALSE; }; IF newCharValid THEN RETURN [Rope.FromChar[newChar]] ELSE RETURN [NIL]; }; Test: Commander.CommandProc ~ { charsSkipped:INT; sourceStream:IO.STREAM; token:Rope.ROPE; tokenKind:IO.TokenKind _ tokenID; commandLineStream:IO.STREAM _ IO.RIS[cmd.commandLine]; filename:PFSNames.PATH _ PFS.PathFromRope[IO.GetTokenRope[commandLineStream,IO.IDProc].token]; IO.Close[commandLineStream]; sourceStream _ PFS.StreamOpen[filename]; GetCTokenInit[]; WHILE tokenKind # tokenEOF DO [tokenKind,token,charsSkipped] _ GetCTokenRope[sourceStream, TRUE ! IO.EndOfStream => GOTO EOS]; SELECT tokenKind FROM tokenID => IO.PutF[cmd.out,"Identifier = /%g/\n",IO.rope[token]]; tokenROPE => IO.PutF[cmd.out,"String constant= /%g/\n",IO.rope[token]]; tokenCHAR => IO.PutF[cmd.out,"Character constant = /%g/\n",IO.rope[token]]; tokenSINGLE => IO.PutF[cmd.out,"Single punctuation = /%g/\n",IO.rope[token]]; tokenDOUBLE => IO.PutF[cmd.out,"Double punctuation = /%g/\n",IO.rope[token]]; < >> <> tokenCOMMENT => IO.PutF[cmd.out,"Comment = /%g/\n",IO.rope[token]]; tokenHEX => IO.PutF[cmd.out,"Hexidecimal constant = /%g/\n",IO.rope[token]]; tokenDECIMAL => IO.PutF[cmd.out,"Integer constant = /%g/\n",IO.rope[token]]; tokenREAL => IO.PutF[cmd.out,"Real constant = /%g/\n",IO.rope[token]]; tokenOCTAL => IO.PutF[cmd.out,"Octal constant = /%g/\n",IO.rope[token]]; tokenEOF => IO.PutF[cmd.out,"End of file\n"]; tokenERROR => IO.PutF[cmd.out,"Misformed token\n"]; ENDCASE => IO.PutF[cmd.out,"Bad token kind %g\n",IO.int[ORD[tokenKind]]]; ENDLOOP; EXITS EOS => IO.PutF[cmd.out,"End of Stream\n"]; }; Commander.Register["Test",Test]; }...