DIRECTORY Convert, IO, PPLeaves USING [HTIndex, HTNode, LTIndex, LTNode], PPP1 USING [Token, Value, NullValue], PPParseTable USING [Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOTS, tokenGE, tokenID, tokenLE, tokenFLNUM, tokenLNUM, tokenSTR], Real USING [RealException], Rope USING [Fetch, Flatten, Length, ROPE, Size]; PPScanner: PROGRAM IMPORTS Convert, IO, Real, Rope EXPORTS PPP1 SHARES Rope = BEGIN OPEN PPLeaves, PPParseTable, P1: PPP1; ROPE: TYPE = Rope.ROPE; STREAM: TYPE = IO.STREAM; hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry _ NIL; scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol _ NIL; vocab: LONG STRING _ NIL; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL _ NIL; rf: ROPE _ NIL; -- the source rs: STREAM _ NIL; -- the source as stream toklen: NAT _ 0; -- current token length tokpos: INT _ 0; -- source index for start of token nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors lastToken: INT _ 0; IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = { RETURN [NEW[HTNode _ [index: index, name: r]]]}; IdFirst: HTIndex _ IdFromRope["first", LAST[INT]]; IDLock: HTIndex _ IdFromRope["LOCK", LAST[INT]]; IDRest: HTIndex _ IdFromRope["rest", LAST[INT]]; IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]}; IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]}; IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]}; Atom: PUBLIC SAFE PROC [errPut: IO.STREAM] RETURNS [token: P1.Token] = TRUSTED { tokenKind: IO.TokenKind; rope: ROPE; charsSkipped: INT; DO peek: CHAR _ 0C; [] _ IO.SkipWhitespace[rs, TRUE ! IO.EndOfStream => EXIT]; peek _ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; tokpos _ IO.GetIndex[rs]; SELECT peek FROM '%, '& => { DO [] _ IO.GetChar[rs]; peek _ IO.PeekChar[rs ! IO.EndOfStream => EXIT]; SELECT peek FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9] => {}; ENDCASE => EXIT; ENDLOOP; toklen _ IO.GetIndex[rs] - tokpos; token.class _ tokenID; token.value.r _ IdFromRope[rf.Flatten[tokpos, toklen], tokpos]; EXIT; }; ENDCASE => NULL; [tokenKind, rope, charsSkipped] _ IO.GetCedarTokenRope[rs ! IO.Error => { ErrorContext["Syntax error", IO.GetIndex[rs], errPut]; EXIT}; IO.EndOfStream => { ErrorContext["Unexpected end of stream", IO.GetIndex[rs], errPut]; EXIT}]; toklen _ rope.Length[]; tokpos _ rs.GetIndex[] - toklen; SELECT tokenKind FROM tokenID => { allcaps: BOOL _ TRUE; r: ROPE _ NIL; token.class _ tokenID; FOR i: INT IN [0..rope.Size[]) DO IF rope.Fetch[i] NOT IN ['A..'Z] THEN {allcaps _ FALSE; EXIT}; ENDLOOP; IF allcaps THEN { first: CARDINAL _ LOOPHOLE[rope.Fetch[0], CARDINAL]; last: CARDINAL _ LOOPHOLE[rope.Fetch[rope.Size[]-1], CARDINAL]; h: CARDINAL _ (first * 128 - first + last) MOD LAST[HashIndex] + 1; j: CARDINAL _ 0; len: NAT _ rope.Size[]; WHILE (j _ hashTab[h].symbol) # 0 DO s2: CARDINAL _ vocabIndex[j - 1]; IF vocabIndex[j] - s2 = len THEN FOR s1: CARDINAL IN [0..len) DO IF rope.Fetch[s1] # vocab[s2] THEN EXIT; s2 _ s2 + 1; REPEAT FINISHED => {token.class _ j; GO TO CheckEnd}; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT ENDLOOP; }; token.value.r _ IdFromRope[rope, tokpos]; }; tokenDECIMAL => { ENABLE Convert.Error => GO TO badNumber; token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromDecimalLiteral[rope]]]]; }; tokenOCTAL => { ENABLE Convert.Error => GO TO badNumber; token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromOctalLiteral[rope]]]]; }; tokenHEX => { token.class _ tokenLNUM; token.value.r _ WrapLit[NEW[INT _ LOOPHOLE[Convert.CardFromHexLiteral[rope]]]]; }; tokenREAL => { ENABLE Convert.Error, Real.RealException => TRUSTED{GO TO badNumber}; token.class _ tokenFLNUM; token.value.r _ WrapLit[NEW[REAL _ Convert.RealFromLiteral[rope]]]; }; tokenROPE => { token.class _ tokenSTR; token.value.r _ WrapLit[NEW[ROPE _ Convert.RopeFromLiteral[rope]]]; }; tokenCHAR => { token.class _ tokenCHAR; token.value.r _ WrapLit[NEW[CHAR _ Convert.CharFromLiteral[rope]]]; }; tokenATOM => { token.class _ tokenATOM; token.value.r _ WrapLit[NEW[ATOM _ Convert.AtomFromRope[rope]]]; }; tokenSINGLE => { token.class _ scanTab[rf.Fetch[tokpos]]; }; tokenDOUBLE => { c1: CHAR _ rf.Fetch[tokpos]; SELECT c1 FROM '= => token.class _ tokenARROW; '< => token.class _ tokenLE; '> => token.class _ tokenGE; '. => token.class _ tokenDOTS ENDCASE => ERROR; }; tokenCOMMENT => { LOOP; }; tokenEOF => { token.class _ EndMarker; token.value _ P1.NullValue; }; tokenERROR => { ErrorContext["Syntax error", tokpos, errPut]; }; ENDCASE => ERROR; -- all cases should have been covered EXIT; REPEAT badNumber => { ErrorContext["invalid number", tokpos, errPut]; }; CheckEnd => {}; ENDLOOP; nTokens _ nTokens + 1; lastToken _ tokpos; RETURN; }; WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = { RETURN [NEW[LTNode _ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]}; ScanInit: PUBLIC SAFE PROC [table: PPParseTable.Handle, source: ROPE] = TRUSTED { hashTab _ @table.scanTable.hashTab; scanTab _ @table.scanTable.scanTab; vocab _ LOOPHOLE[@table.scanTable.vocabBody]; vocabIndex _ @table.scanTable.vocabIndex; rf _ source; rs _ IO.RIS[rf]; tokpos _ 0; lastToken _ 0; nTokens _ nErrors _ 0}; ScanReset: PUBLIC SAFE PROC RETURNS [CARDINAL, CARDINAL] = TRUSTED { rf _ NIL; rs _ NIL; RETURN [nTokens, nErrors]}; ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED { tokpos _ index; RETURN [TRUE]}; ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT, put: IO.STREAM] = TRUSTED { low: INT _ tokenIndex - 40; high: INT _ tokenIndex + 40; nErrors _ nErrors + 1; IF low < 0 THEN low _ 0; IF high >= rf.Size[] THEN high _ rf.Size[]-1; put.PutChar['\n]; IF low > 0 THEN put.PutRope["..."]; FOR i: INT IN [low..high] DO c: CHAR _ rf.Fetch[i]; IF i = tokenIndex THEN put.PutRope[" *^* "]; put.PutChar[c]; ENDLOOP; IF high < rf.Size[]-1 THEN put.PutRope["..."]; put.PutChar['\n]; put.PutRope[message]; put.PutChar['\n]; }; END. ²PPScanner.Mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Ed Satterthwaite, January 12, 1981 12:37 PM Russ Atkinson, February 12, 1985 3:38:03 pm PST Paul Rovner, September 26, 1983 1:06 pm stuff supporting the scanner special case of identifier, don't let IO.GetCedarTokenRope see it! an identifier or reserved word This could be a reserved word... a DECIMAL literal an OCTAL literal a HEX literal a REAL literal a ROPE literal a CHAR literal an ATOM literal a single-character token a double-character token a comment token.msg describes the scanning error Every token return must come through here numerical conversion initialization/finalization error handling Κ Q˜codešœ™Kšœ Οmœ1™Kšžœ˜—šžœ žœ˜Kšœ ™ Kšœžœžœžœ˜4Kšœžœžœžœ˜?Kšœžœ žœžœ˜CKšœžœ˜Kšœžœ˜šžœž˜$Kšœžœ˜!šžœžœ˜!šžœžœžœ ž˜Kšžœžœžœ˜)K˜ šž˜Kšžœžœžœ ˜.—Kšžœ˜ ——Kšžœžœž˜&Kšžœ˜ —K˜—Kšœ)˜)Kšœ˜—šœ˜Kšœ™Kšžœžœžœ ˜(Kšœ˜šœžœžœ˜!Kšžœ)˜1—Kšœ˜—šœ˜Kšœ™Kšžœžœžœ ˜(Kšœ˜šœžœžœ˜!Kšžœ'˜/—Kšœ˜—šœ ˜ Kšœ ™ Kšœ˜šœžœžœ˜!Kšžœ%˜-—Kšœ˜—šœ˜Kšœ™Kšžœ&žœžœžœ ˜EKšœ˜Kšœžœžœ#˜CKšœ˜—šœ˜Kšœ™Kšœ˜Kšœžœžœ#˜CKšœ˜—šœ˜Kšœ™Kšœ˜Kšœžœžœ#˜CKšœ˜—šœ˜Kšœ™Kšœ˜Kšœžœžœ ˜@Kšœ˜—šœ˜Kšœ™Kšœ(˜(Kšœ˜—šœ˜Kšœ™Kšœžœ˜šžœž˜Kšœ˜Kšœ˜Kšœ˜Kšœ˜Kšžœžœ˜—Kšœ˜—šœ˜Kšœ ™ Kšžœ˜K˜—šœ ˜ Kšœ˜K˜Kšœ˜—šœ˜Kšœ&™&Kšœ-˜-K˜—KšžœžœŸ%˜8—Kšžœ˜šž˜˜Kšœ/˜/K˜—K˜—Kšžœ˜ —Kšœ)™)K˜K˜Kšžœ˜Kšœ˜K˜—Kšœ™K˜š  œžœžœžœžœ˜0KšžœžœM˜XK˜—Kšœ™K˜š  œžœžœžœ&žœžœ˜QK˜$K˜$Kšœžœ˜.K˜)Kšœ ˜ Kšœžœžœ˜K˜ K˜K˜K˜—š  œžœžœž˜Kšžœžœžœžœ˜(Kšœžœ˜ Kšœžœ˜ Kšžœ˜K˜—Kšœ™K˜š œžœžœžœ žœžœ žœžœ˜QK˜Kšžœžœ˜K˜—š  œžœžœžœ žœžœžœžœžœ˜[Kšœžœ˜Kšœžœ˜K˜Kšžœ žœ ˜Kšžœžœ˜-Kšœ˜Kšžœ žœ˜#šžœžœžœ ž˜Kšœžœ˜Kšžœžœ˜,Kšœ˜Kšžœ˜—Kšžœžœ˜.Kšœ˜Kšœ˜Kšœ˜K˜K˜—šžœ˜K˜K˜K˜——…—θ$λ