<> <> <> <> DIRECTORY Convert, IO, PPLeaves USING [HTIndex, HTNode, LTIndex, LTNode], PPP1 USING [Token, Value, NullValue], PPParseTable USING [Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOTS, tokenGE, tokenID, tokenLE, tokenFLNUM, tokenLNUM, tokenSTR], Real USING [RealException], Rope USING [Fetch, Flatten, Length, ROPE, Size]; PPScanner: PROGRAM IMPORTS Convert, IO, Real, Rope EXPORTS PPP1 SHARES Rope = BEGIN OPEN PPLeaves, PPParseTable, P1: PPP1; ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; <> hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry _ NIL; scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol _ NIL; vocab: LONG STRING _ NIL; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL _ NIL; rf: ROPE _ NIL; -- the source rs: STREAM _ NIL; -- the source as stream toklen: NAT _ 0; -- current token length tokpos: INT _ 0; -- source index for start of token nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors lastToken: INT _ 0; IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = { RETURN [NEW[HTNode _ [index: index, name: r]]]}; IdFirst: HTIndex _ IdFromRope["first", LAST[INT]]; IDLock: HTIndex _ IdFromRope["LOCK", LAST[INT]]; IDRest: HTIndex _ IdFromRope["rest", LAST[INT]]; IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]}; IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]}; IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]}; Atom: PUBLIC SAFE PROC [errPut: IO.STREAM] RETURNS [token: P1.Token] = TRUSTED { tokenKind: IO.TokenKind; rope: ROPE; charsSkipped: INT; DO peek: CHAR _ 0C; [] _ IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT]; peek _ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; tokpos _ IO.GetIndex[rs]; SELECT peek FROM '%, '& => { <> DO [] _ IO.GetChar[rs]; peek _ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; SELECT peek FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; ENDLOOP; toklen _ IO.GetIndex[rs] - tokpos; token.class _ tokenID; token.value.r _ IdFromRope[rf.Flatten[tokpos, toklen], tokpos]; EXIT; }; ENDCASE => NULL; [tokenKind, rope, charsSkipped] _ IO.GetCedarTokenRope[rs ! IO.Error => { ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT}; IO.EndOfStream => { ErrorContext["Unexpected end of stream", IO.GetIndex[rs], errPut]; EXIT}]; toklen _ rope.Length[]; tokpos _ rs.GetIndex[] - toklen; SELECT tokenKind FROM tokenID => { <> allcaps: BOOL _ TRUE; r: ROPE _ NIL; token.class _ tokenID; FOR i: INT IN [0..rope.Size[]) DO IF rope.Fetch[i] NOT IN ['A..'Z] THEN {allcaps _ FALSE; EXIT}; ENDLOOP; IF allcaps THEN { <> first: CARDINAL _ LOOPHOLE[rope.Fetch[0], CARDINAL]; last: CARDINAL _ LOOPHOLE[rope.Fetch[rope.Size[]-1], CARDINAL]; h: CARDINAL _ (first * 128 - first + last) MOD LAST[HashIndex] + 1; j: CARDINAL _ 0; len: NAT _ rope.Size[]; WHILE (j _ hashTab[h].symbol) # 0 DO s2: CARDINAL _ vocabIndex[j - 1]; IF vocabIndex[j] - s2 = len THEN FOR s1: CARDINAL IN [0..len) DO IF rope.Fetch[s1] # vocab[s2] THEN EXIT; s2 _ s2 + 1; REPEAT FINISHED => {token.class _ j; GO TO CheckEnd}; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT ENDLOOP; }; token.value.r _ IdFromRope[rope, tokpos]; }; tokenDECIMAL => { <> ENABLE Convert.Error => GO TO badNumber; token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromDecimalLiteral[rope]]]]; }; tokenOCTAL => { <> ENABLE Convert.Error => GO TO badNumber; token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromOctalLiteral[rope]]]]; }; tokenHEX => { <> token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromHexLiteral[rope]]]]; }; tokenREAL => { <> ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber}; token.class _ tokenFLNUM; token.value.r _ WrapLit[NEW[REAL _ Convert.RealFromLiteral[rope]]]; }; tokenROPE => { <> token.class _ tokenSTR; token.value.r _ WrapLit[NEW[ROPE _ Convert.RopeFromLiteral[rope]]]; }; tokenCHAR => { <> token.class _ tokenCHAR; token.value.r _ WrapLit[NEW[CHAR _ Convert.CharFromLiteral[rope]]]; }; tokenATOM => { <> token.class _ tokenATOM; token.value.r _ WrapLit[NEW[ATOM _ Convert.AtomFromRope[rope]]]; }; tokenSINGLE => { <> token.class _ scanTab[rf.Fetch[tokpos]]; }; tokenDOUBLE => { <> c1: CHAR _ rf.Fetch[tokpos]; SELECT c1 FROM '= => token.class _ tokenARROW; '< => token.class _ tokenLE; '> => token.class _ tokenGE; '. => token.class _ tokenDOTS ENDCASE => ERROR; }; tokenCOMMENT => { <> LOOP; }; tokenEOF => { token.class _ EndMarker; token.value _ P1.NullValue; }; tokenERROR => { <> ErrorContext["Syntax error", tokpos, errPut]; }; ENDCASE => ERROR; -- all cases should have been covered EXIT; REPEAT badNumber => { ErrorContext["invalid number", tokpos, errPut]; }; CheckEnd => {}; ENDLOOP; <> nTokens _ nTokens + 1; lastToken _ tokpos; RETURN; }; <> WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = { RETURN [NEW[LTNode _ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]}; <> ScanInit: PUBLIC SAFE PROC [table: PPParseTable.Handle, source: ROPE] = TRUSTED { hashTab _ @table.scanTable.hashTab; scanTab _ @table.scanTable.scanTab; vocab _ LOOPHOLE[@table.scanTable.vocabBody]; vocabIndex _ @table.scanTable.vocabIndex; rf _ source; rs _ IO.RIS[rf]; tokpos _ 0; lastToken _ 0; nTokens _ nErrors _ 0}; ScanReset: PUBLIC SAFE PROC RETURNS [CARDINAL, CARDINAL] = TRUSTED { rf _ NIL; rs _ NIL; RETURN [nTokens, nErrors]}; <> ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED { tokpos _ index; RETURN [TRUE]}; ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT, put: IO.STREAM] = TRUSTED { low: INT _ tokenIndex - 40; high: INT _ tokenIndex + 40; nErrors _ nErrors + 1; IF low < 0 THEN low _ 0; IF high >= rf.Size[] THEN high _ rf.Size[]-1; put.PutChar['\n]; IF low > 0 THEN put.PutRope["..."]; FOR i: INT IN [low..high] DO c: CHAR _ rf.Fetch[i]; IF i = tokenIndex THEN put.PutRope[" *^* "]; put.PutChar[c]; ENDLOOP; IF high < rf.Size[]-1 THEN put.PutRope["..."]; put.PutChar['\n]; put.PutRope[message]; put.PutChar['\n]; }; END.