DIRECTORY BBZones USING [GetPrefixedZone], CedarScanner USING [CharFromToken, ContentsFromToken, GetClosure, GetProc, GetToken, IntegerOverflow, IntFromToken, RealFromToken, RopeFromToken, Token], PPComData USING [], PPCommentTable USING [AddComment, AddBreakHint, Reset, SetEnding], PPLeaves USING [HTIndex, HTNode, ISEIndex, LTIndex, LTNode], PPOps USING [GetSource], PPP1 USING [Token, Value, NullValue], PPParseTable USING [Handle, HashIndex, TSymbol, VocabHashEntry, EndMarker, tokenARROW, tokenATOM, tokenCHAR, tokenDOTS, tokenGE, tokenID, tokenLE, tokenFLNUM, tokenLNUM, tokenSTR], PPUtil USING [ShowChar, ShowCR, ShowRope], Real USING [RealException], Rope USING [Equal, Fetch, Flatten, ROPE, Run, Size, Text]; PPScanner: PROGRAM IMPORTS BBZones, CedarScanner, PPCommentTable, PPOps, PPUtil, Real, Rope EXPORTS PPComData, PPP1 SHARES Rope = BEGIN OPEN PPLeaves, PPParseTable, P1: PPP1, PPUtil, Rope; idANY: PUBLIC PPLeaves.ISEIndex _ "UNSPECIFIED"; idINT: PUBLIC PPLeaves.ISEIndex _ "INTEGER"; idLOCK: PUBLIC PPLeaves.ISEIndex _ "LOCK"; Init: PUBLIC SAFE PROC = TRUSTED {}; hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry; scanTab: LONG POINTER TO ARRAY CHAR [40C..177C] OF TSymbol; vocab: LONG STRING; vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL; rf: ROPE _ NIL; -- the source tLimit: INT _ 0; pz: ZONE _ BBZones.GetPrefixedZone[]; toklen: NAT _ 0; -- current token length tokpos: INT _ 0; -- source index for start of token nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors lastToken: INT _ 0; IdFromRope: PROC [r: ROPE, index: INT] RETURNS [HTIndex] = { RETURN [pz.NEW[HTNode _ [index: index, name: r]]]}; IdFirst: HTIndex _ IdFromRope["first", LAST[INT]]; IDLock: HTIndex _ IdFromRope["LOCK", LAST[INT]]; IDRest: HTIndex _ IdFromRope["rest", LAST[INT]]; IdOfFirst: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IdFirst]}; IdOfLock: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDLock]}; IdOfRest: PUBLIC SAFE PROC RETURNS [HTIndex] = TRUSTED {RETURN [IDRest]}; Atom: PUBLIC SAFE PROC RETURNS [token: P1.Token] = TRUSTED { CRcount: NAT _ 0; formLim: INT _ -1; allcaps: BOOL _ TRUE; getChar1: CedarScanner.GetProc = TRUSTED { c: CHAR _ 0C; IF index < tLimit THEN SELECT (c _ rf.Fetch[index]) FROM '\n => { CRcount _ CRcount + 1; IF index > formLim THEN { formLim _ index; PPCommentTable.AddBreakHint[index]; }; }; '\f => IF index > formLim THEN { formLim _ index; PPCommentTable.AddComment[index, "\f", lastToken, CRcount]; CRcount _ 0; }; '& => c _ 'a; ENDCASE; RETURN [c]; }; getChar2: CedarScanner.GetProc = TRUSTED { IF index < tLimit THEN RETURN[rf.Fetch[index]] ELSE RETURN [0C]; }; get1: CedarScanner.GetClosure _ [getChar1]; get2: CedarScanner.GetClosure _ [getChar2]; ctok: CedarScanner.Token; DO CRcount _ 0; ctok _ CedarScanner.GetToken[get1, tokpos]; token.index _ tokpos _ ctok.start; toklen _ ctok.next - tokpos; IF CRcount > 0 THEN { IF tokpos < formLim THEN CRcount _ CRcount - 1; IF CRcount > 1 THEN { PPCommentTable.AddComment[tokpos, NIL, lastToken, CRcount]; CRcount _ 0; }}; SELECT ctok.kind FROM tokenID => { allcaps: BOOL _ TRUE; limit: INT _ ctok.next-1; r: ROPE _ NIL; token.class _ tokenID; FOR i: INT IN [tokpos..limit] DO c: CHAR _ rf.Fetch[i]; IF c NOT IN ['A..'Z] THEN {allcaps _ FALSE; EXIT}; ENDLOOP; IF allcaps THEN { first: CARDINAL _ LOOPHOLE[rf.Fetch[tokpos]]; last: CARDINAL _ LOOPHOLE[rf.Fetch[limit]]; h: CARDINAL _ (first * 128 - first + last) MOD LAST[HashIndex] + 1; j: CARDINAL _ 0; len: NAT _ toklen; WHILE (j _ hashTab[h].symbol) # 0 DO s2: CARDINAL _ vocabIndex[j - 1]; IF vocabIndex[j] - s2 = len THEN FOR s1: CARDINAL IN [0..len) DO IF rf.Fetch[tokpos+s1] # vocab[s2] THEN EXIT; s2 _ s2 + 1 REPEAT FINISHED => {token.class _ j; GO TO CheckEnd}; ENDLOOP; IF (h _ hashTab[h].link) = 0 THEN EXIT ENDLOOP; }; token.value.r _ IdFromRope[ r _ CedarScanner.ContentsFromToken[get2, ctok], tokpos]; IF allcaps THEN SELECT TRUE FROM Rope.Equal[r, "CEDAR"], Rope.Equal[r, "TRUSTED"], Rope.Equal[r, "SAFE"], Rope.Equal[r, "UNSAFE"], Rope.Equal[r, "CHECKED"] => { tokpos _ ctok.next; LOOP}; ENDCASE; }; tokenINT => { ENABLE CedarScanner.IntegerOverflow => GO TO badNumber; token.class _ tokenLNUM; token.value.r _ WrapLit[pz.NEW[INT _ CedarScanner.IntFromToken[get2, ctok]]]; }; tokenREAL => { r: REAL _ 0.0; r _ CedarScanner.RealFromToken[ get2, ctok ! Real.RealException => TRUSTED {GO TO badNumber}]; token.class _ tokenFLNUM; token.value.r _ WrapLit[pz.NEW[REAL _ r]]; }; tokenROPE => { token.class _ tokenSTR; token.value.r _ WrapLit[pz.NEW[ROPE _ CedarScanner.RopeFromToken[get2, ctok]]]; }; tokenCHAR => { token.class _ tokenCHAR; token.value.r _ WrapLit[pz.NEW[CHAR _ CedarScanner.CharFromToken[get2, ctok]]]; }; tokenATOM => { token.class _ tokenATOM; token.value.r _ CedarScanner.ContentsFromToken[get2, ctok]; }; tokenSINGLE => { token.class _ scanTab[rf.Fetch[tokpos]]; }; tokenDOUBLE => { c1: CHAR _ rf.Fetch[tokpos]; SELECT c1 FROM '= => token.class _ tokenARROW; '< => token.class _ tokenLE; '> => token.class _ tokenGE; '. => token.class _ tokenDOTS ENDCASE => ERROR; }; tokenCOMMENT => { comment: Rope.Text _ CedarScanner.ContentsFromToken[get2, ctok].Flatten[]; PPCommentTable.AddComment[tokpos, comment, lastToken, CRcount]; tokpos _ ctok.next; LOOP; }; tokenEOF => { token.class _ EndMarker; token.value _ P1.NullValue; }; tokenERROR => { ErrorContext[ctok.msg, tokpos]; }; ENDCASE => ERROR; -- all cases should have been covered EXIT; REPEAT badNumber => { ErrorContext["invalid number", tokpos]; }; CheckEnd => IF rf.Fetch[ctok.next] = '. AND toklen = 3 AND Rope.Run[rf, tokpos, "END", 0] = 3 THEN { pos: INT _ ctok.next+1; PPCommentTable.AddComment[pos, rf.Flatten[pos], tLimit, 0]; PPCommentTable.SetEnding[pos]; }; ENDLOOP; nTokens _ nTokens + 1; lastToken _ tokpos; tokpos _ ctok.next; RETURN}; WrapLit: PROC [r: REF ANY] RETURNS [LTIndex] = { RETURN [pz.NEW[LTNode _ [index: tokpos, value: r, literal: rf.Flatten[tokpos, toklen]]]]}; ScanInit: PUBLIC SAFE PROC [table: PPParseTable.Handle] = TRUSTED { hashTab _ @table.scanTable.hashTab; scanTab _ @table.scanTable.scanTab; vocab _ LOOPHOLE[@table.scanTable.vocabBody]; vocabIndex _ @table.scanTable.vocabIndex; rf _ PPOps.GetSource[]; tokpos _ 0; tLimit _ rf.Size[]; PPCommentTable.Reset[]; lastToken _ 0; nTokens _ nErrors _ 0}; ScanReset: PUBLIC SAFE PROC RETURNS [CARDINAL, CARDINAL] = TRUSTED { rf _ NIL; RETURN [nTokens, nErrors]}; ResetScanIndex: PUBLIC SAFE PROC [index: INT] RETURNS [success: BOOL] = TRUSTED { tokpos _ index; RETURN [TRUE]}; ErrorContext: PUBLIC SAFE PROC [message: ROPE, tokenIndex: INT] = TRUSTED { low: INT _ tokenIndex - 40; high: INT _ tokenIndex + 40; nErrors _ nErrors + 1; IF low < 0 THEN low _ 0; IF high >= rf.Size[] THEN high _ rf.Size[]-1; ShowCR[]; IF low > 0 THEN ShowRope["..."]; FOR i: INT IN [low..high] DO c: CHAR _ rf.Fetch[i]; IF i = tokenIndex THEN ShowRope[" *^* "]; ShowChar[c]; ENDLOOP; IF high < rf.Size[]-1 THEN ShowRope["..."]; ShowCR[]; ShowRope[message]; ShowCR[]}; END. tPPScanner.Mesa Ed Satterthwaite, January 12, 1981 12:37 PM Russ Atkinson, May 19, 1983 4:10 pm added export of PPComData stuff exported to PPComData stuff supporting the scanner an identifier or reserved word This could be a reserved word... language additions since our parser was created, treat as comments an INT literal a REAL literal a ROPE literal a CHAR literal an ATOM literal a single-character token a double-character token a comment token.msg describes the scanning error accumulate the ending comment Every token return must come through here numerical conversion initialization/finalization error handling Ê “˜šœ™Jšœ,™,šœ#™#J™——J˜šÏk ˜ Jšœœ˜ šœ ˜Jšœ‡˜‡—Jšœ œ˜Jšœœ/˜CJšœ œ.˜