DIRECTORY Convert, IO, PPLeaves USING [HTIndex, HTNode, LTIndex, LTNode], PPP1 USING [Token, Value, NullValue], PPParseTable USING [Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOTS, tokenGE, tokenID, tokenLE, tokenFLNUM, tokenLNUM, tokenSTR], Real USING [RealException], Rope USING [Fetch, Flatten, Length, ROPE, Size]; PPScanner: PROGRAM IMPORTS Convert, IO, Real, Rope EXPORTS PPP1 SHARES Rope = BEGIN OPEN PPLeaves, PPParseTable, P1: PPP1; ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry _ NIL; scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol _ NIL; vocab: LONG STRING _ NIL; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL _ NIL; rf: ROPE _ NIL; -- the source rs: STREAM _ NIL; -- the source as stream toklen: NAT _ 0; -- current token length tokpos: INT _ 0; -- source index for start of token nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors lastToken: INT _ 0; IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = { RETURN [NEW[HTNode _ [index: index, name: r]]]}; IdFirst: HTIndex _ IdFromRope["first", LAST[INT]]; IDLock: HTIndex _ IdFromRope["LOCK", LAST[INT]]; IDRest: HTIndex _ IdFromRope["rest", LAST[INT]]; IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]}; IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]}; IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]}; Atom: PUBLIC SAFE PROC [errPut: IO.STREAM] RETURNS [token: P1.Token] = TRUSTED { tokenKind: IO.TokenKind; rope: ROPE; charsSkipped: INT; DO peek: CHAR _ 0C; [] _ IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT]; peek _ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; tokpos _ IO.GetIndex[rs]; SELECT peek FROM '%, '& => { DO [] _ IO.GetChar[rs]; peek _ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; SELECT peek FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; ENDLOOP; toklen _ IO.GetIndex[rs] - tokpos; token.class _ tokenID; token.value.r _ IdFromRope[rf.Flatten[tokpos, toklen], tokpos]; EXIT; }; ENDCASE => NULL; [tokenKind, rope, charsSkipped] _ IO.GetCedarTokenRope[rs ! IO.Error => { ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT}; IO.EndOfStream => { ErrorContext["Unexpected end of stream", IO.GetIndex[rs], errPut]; EXIT}]; toklen _ rope.Length[]; tokpos _ rs.GetIndex[] - toklen; SELECT tokenKind FROM tokenID => { allcaps: BOOL _ TRUE; r: ROPE _ NIL; token.class _ tokenID; FOR i: INT IN [0..rope.Size[]) DO IF rope.Fetch[i] NOT IN ['A..'Z] THEN {allcaps _ FALSE; EXIT}; ENDLOOP; IF allcaps THEN { first: CARDINAL _ LOOPHOLE[rope.Fetch[0], CARDINAL]; last: CARDINAL _ LOOPHOLE[rope.Fetch[rope.Size[]-1], CARDINAL]; h: CARDINAL _ (first * 128 - first + last) MOD LAST[HashIndex] + 1; j: CARDINAL _ 0; len: NAT _ rope.Size[]; WHILE (j _ hashTab[h].symbol) # 0 DO s2: CARDINAL _ vocabIndex[j - 1]; IF vocabIndex[j] - s2 = len THEN FOR s1: CARDINAL IN [0..len) DO IF rope.Fetch[s1] # vocab[s2] THEN EXIT; s2 _ s2 + 1; REPEAT FINISHED => {token.class _ j; GO TO CheckEnd}; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT ENDLOOP; }; token.value.r _ IdFromRope[rope, tokpos]; }; tokenDECIMAL => { ENABLE Convert.Error => GO TO badNumber; token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromDecimalLiteral[rope]]]]; }; tokenOCTAL => { ENABLE Convert.Error => GO TO badNumber; token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromOctalLiteral[rope]]]]; }; tokenHEX => { token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromHexLiteral[rope]]]]; }; tokenREAL => { ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber}; token.class _ tokenFLNUM; token.value.r _ WrapLit[NEW[REAL _ Convert.RealFromLiteral[rope]]]; }; tokenROPE => { token.class _ tokenSTR; token.value.r _ WrapLit[NEW[ROPE _ Convert.RopeFromLiteral[rope]]]; }; tokenCHAR => { token.class _ tokenCHAR; token.value.r _ WrapLit[NEW[CHAR _ Convert.CharFromLiteral[rope]]]; }; tokenATOM => { token.class _ tokenATOM; token.value.r _ WrapLit[NEW[ATOM _ Convert.AtomFromRope[rope]]]; }; tokenSINGLE => { token.class _ scanTab[rf.Fetch[tokpos]]; }; tokenDOUBLE => { c1: CHAR _ rf.Fetch[tokpos]; SELECT c1 FROM '= => token.class _ tokenARROW; '< => token.class _ tokenLE; '> => token.class _ tokenGE; '. => token.class _ tokenDOTS ENDCASE => ERROR; }; tokenCOMMENT => { LOOP; }; tokenEOF => { token.class _ EndMarker; token.value _ P1.NullValue; }; tokenERROR => { ErrorContext["Syntax error", tokpos, errPut]; }; ENDCASE => ERROR; -- all cases should have been covered EXIT; REPEAT badNumber => { ErrorContext["invalid number", tokpos, errPut]; }; CheckEnd => {}; ENDLOOP; nTokens _ nTokens + 1; lastToken _ tokpos; RETURN; }; WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = { RETURN [NEW[LTNode _ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]}; ScanInit: PUBLIC SAFE PROC [table: PPParseTable.Handle, source: ROPE] = TRUSTED { hashTab _ @table.scanTable.hashTab; scanTab _ @table.scanTable.scanTab; vocab _ LOOPHOLE[@table.scanTable.vocabBody]; vocabIndex _ @table.scanTable.vocabIndex; rf _ source; rs _ IO.RIS[rf]; tokpos _ 0; lastToken _ 0; nTokens _ nErrors _ 0}; ScanReset: PUBLIC SAFE PROC RETURNS [CARDINAL, CARDINAL] = TRUSTED { rf _ NIL; rs _ NIL; RETURN [nTokens, nErrors]}; ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED { tokpos _ index; RETURN [TRUE]}; ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT, put: IO.STREAM] = TRUSTED { low: INT _ tokenIndex - 40; high: INT _ tokenIndex + 40; nErrors _ nErrors + 1; IF low < 0 THEN low _ 0; IF high >= rf.Size[] THEN high _ rf.Size[]-1; put.PutChar['\n]; IF low > 0 THEN put.PutRope["..."]; FOR i: INT IN [low..high] DO c: CHAR _ rf.Fetch[i]; IF i = tokenIndex THEN put.PutRope[" *^* "]; put.PutChar[c]; ENDLOOP; IF high < rf.Size[]-1 THEN put.PutRope["..."]; put.PutChar['\n]; put.PutRope[message]; put.PutChar['\n]; }; END. lPPScanner.Mesa Ed Satterthwaite, January 12, 1981 12:37 PM Russ Atkinson, October 5, 1983 4:40 pm Paul Rovner, September 26, 1983 1:06 pm stuff supporting the scanner special case of identifier, don't let IO.GetCedarTokenRope see it! an identifier or reserved word This could be a reserved word... a DECIMAL literal an OCTAL literal a HEX literal a REAL literal a ROPE literal a CHAR literal an ATOM literal a single-character token a double-character token a comment token.msg describes the scanning error Every token return must come through here numerical conversion initialization/finalization error handling Ê g˜šœ™Jšœ,™,Jšœ&™&Jšœ'™'—J˜šÏk ˜ Jšœ ˜ Jšœ˜Jšœ œ$˜2Jšœœ˜&šœ ˜Jšœ¡˜¡—Jšœœ˜Jšœœœ˜0J˜—šœ ˜Jšœ œ ˜Jšœ˜ Jšœ˜ Jšœœœ"˜.J˜Jšœœœ˜Jšœœœœ˜J˜šœ™Jš œ œœœœ œœ˜BJšœ œœœœœ œ œ˜BJšœœœœ˜Jšœ œœœœ œœœ˜=J˜JšœœœÏc ˜Jšœœœž˜*J˜Jšœœ ž˜-Jšœœ ž"˜6Jšœ œž˜$Jšœ œž˜'Jšœ œ˜J˜—š Ïn œœœ œœ˜Jšœ˜—šœ œ˜Jšœ ™ Jšœœœœ˜4Jšœœœœ˜?Jšœœ œœ˜CJšœœ˜Jšœœ˜šœ˜$Jšœœ˜!šœœ˜!šœœœ ˜Jšœœœ˜)J˜ š˜Jšœœœ ˜.—Jšœ˜ ——Jšœœ˜&Jšœ˜ —J˜—Jšœ)˜)Jšœ˜—šœ˜Jšœ™Jšœœœ ˜(Jšœ˜šœœœ˜!Jšœ)˜1—Jšœ˜—šœ˜Jšœ™Jšœœœ ˜(Jšœ˜šœœœ˜!Jšœ'˜/—Jšœ˜—šœ ˜ Jšœ ™ Jšœ˜šœœœ˜!Jšœ%˜-—Jšœ˜—šœ˜Jšœ™Jšœ&œœœ ˜EJšœ˜Jšœœœ#˜CJšœ˜—šœ˜Jšœ™Jšœ˜Jšœœœ#˜CJšœ˜—šœ˜Jšœ™Jšœ˜Jšœœœ#˜CJšœ˜—šœ˜Jšœ™Jšœ˜Jšœœœ ˜@Jšœ˜—šœ˜Jšœ™Jšœ(˜(Jšœ˜—šœ˜Jšœ™Jšœœ˜šœ˜Jšœ˜Jšœ˜Jšœ˜Jšœ˜Jšœœ˜—Jšœ˜—šœ˜Jšœ ™ Jšœ˜J˜—šœ ˜ Jšœ˜J˜Jšœ˜—šœ˜Jšœ&™&Jšœ-˜-J˜—Jšœœž%˜8—Jšœ˜š˜˜Jšœ/˜/J˜—J˜—Jšœ˜ —Jšœ)™)J˜J˜Jšœ˜Jšœ˜J˜—Jšœ™J˜š Ÿœœœœœ˜0JšœœM˜XJ˜—Jšœ™J˜š Ÿœœœœ&œœ˜QJ˜$J˜$Jšœœ˜.J˜)Jšœ ˜ Jšœœœ˜J˜ J˜J˜J˜—šŸ œœœ˜Jšœœœœ˜(Jšœœ˜ Jšœœ˜ Jšœ˜J˜—Jšœ™J˜šŸœœœ˜ Jš œœœ œœ˜0J˜Jšœœ˜J˜—šŸ œœœ˜Jš œ œœœœœ˜