file: PasScanner.mesa
modified by Ramshaw, January 20, 1984 2:58 pm
written by McCreight, September 16, 1980 4:54 PM
Last Edited by: Plass, December 30, 1982 1:08 pm
DIRECTORY
FS USING [StreamOpen],
IO USING [Close, EndOfStream, GetChar, PutChar],
PasPrivate,
PasPrivateVars,
RefText USING [AppendChar],
Rope USING [Equal, Fetch, FromRefText, Length];
PasScanner: CEDAR PROGRAM
IMPORTS FS, IO, PasPrivate, PasPrivateVars, RefText, Rope
EXPORTS PasPrivate =
BEGIN
OPEN PasPrivate, PasPrivateVars;
Types
ResWordFirstChar: TYPE = CHARACTER ['A..SUCC['Z]];
ReservedWord: TYPE = RECORD [string: ROPE, sy: Symbol, op: Operator];
ResWordIndex: TYPE = [0..40];
CharTabEntry: TYPE = RECORD [sy: Symbol ← otherSy, op: Operator ← noOp];
Constants
resWordIndex: ARRAY ResWordFirstChar OF ResWordIndex;
resWordTable: ARRAY ResWordIndex OF ReservedWord =
[["AND", mulOpSy, andOp], ["ARRAY", arraySy, noOp],
["BEGIN", beginSy, noOp], ["CASE", caseSy, noOp],
["CONST", constSy, noOp], ["DIV", mulOpSy, iDivOp], ["DO", doSy, noOp],
["DOWNTO", downToSy, noOp], ["ELSE", elseSy, noOp], ["END", endSy, noOp],
["EXIT", exitSy, noOp], ["EXTERN", externSy, noOp],
["EXTERNAL", externSy, noOp], ["FILE", fileSy, noOp],
["FOR", forSy, noOp], ["FORWARD", forwardSy, noOp],
["FUNCTION", functionSy, noOp], ["GOTO", gotoSy, noOp],
["IF", ifSy, noOp], ["IN", relOpSy, inOp], ["LABEL", labelSy, noOp],
["LOOP", loopSy, noOp], ["MOD", mulOpSy, modOp], ["NOT", notSy, noOp],
["OF", ofSy, noOp], ["OR", addOpSy, orOp], ["OTHERS", othersSy, noOp],
["PACKED", packedSy, noOp], ["PROCEDURE", procedureSy, noOp],
["PROGRAM", programSy, noOp], ["RECORD", recordSy, noOp],
["REPEAT", repeatSy, noOp], ["SET", setSy, noOp], ["THEN", thenSy, noOp],
["TO", toSy, noOp], ["TYPE", typeSy, noOp], ["UNTIL", untilSy, noOp],
["VAR", varSy, noOp], ["WHILE", whileSy, noOp], ["WITH", withSy, noOp],
["", eofSy, noOp]];
Variables
scanCh: CHARACTER;
capitalizeAlphabetic: BOOLEANTRUE;
capitalizeStringConstants: BOOLEANFALSE;
string: REF TEXTZ.NEW[TEXT[200]]; -- must be long enough for one lexeme
readingString: BOOLEAN;
source: STREAM;
i: ResWordIndex;
j: CARDINAL;
traceInput: BOOLEANFALSE;
charTable: ARRAY CHARACTER OF CharTabEntry;
Procedures
EndOfSource, BadInteger: ERROR = CODE;
InitContextBuffer: PROCEDURE =
BEGIN
FOR i:NAT IN [0..contextBufferLength) DO contextBuffer[i] ← ' ; ENDLOOP;
contextBufferIndex ← 0;
END;
SourceFromStream: PUBLIC PROCEDURE [stream: STREAM, name: ROPE] =
BEGIN
source ← stream;
sy ← lBrackSy; -- anything but eofSy
ch ← scanCh ← ' ;
InitContextBuffer[];
positionInInputFile ← 0;
nameOfInputFile ← name;
END; -- of SourceFromStream
SourceFromNextStream: PUBLIC PROCEDURE =
BEGIN
fileName: ROPE;
source.Close[]; -- close previous file
fileName ← "";
IF sourceFileSeq = NIL THEN
ERROR IO.EndOfStream[source]
ELSE
BEGIN
os: SourceFileSeqPtr ← sourceFileSeq;
source ← FS.StreamOpen[os.name];
positionInInputFile ← 0;
nameOfInputFile ← os.name;
sourceFileSeq ← os.next;
sy ← lBrackSy; -- anything but eofSy
InitContextBuffer[];
END
END; -- of SourceFromNextStream
NextCh: PROCEDURE =
read the next character of the input file into ch
BEGIN
IF sy = eofSy THEN ERROR EndOfSource;
ch ← scanCh ← source.GetChar[ !
IO.EndOfStream =>
IF sourceFileSeq # NIL THEN
{SourceFromNextStream[]; RETRY}
];
positionInInputFile ← positionInInputFile + 1;
contextBuffer[contextBufferIndex] ← scanCh;
contextBufferIndex ← contextBufferIndex + 1;
IF contextBufferIndex = contextBufferLength THEN contextBufferIndex ← 0;
IF traceInput THEN commandHandle.out.PutChar[ch];
IF ~readingString AND capitalizeAlphabetic AND (scanCh IN ['a..'z]) THEN
scanCh ← scanCh - 'a + 'A;
END; -- of NextCh
InSymbol: PUBLIC PROCEDURE [stopAtCR: BOOLEANFALSE] =
read the next basic symbol of the source program and return its
description in the variables sy, op, and ident
BEGIN
SkipComment: PROCEDURE [oneChar: BOOLEAN, endChar: CHARACTER] =
Pass through a comment and code it into Mesa
BEGIN -- looking at the last char of the opening bracket
lastCommentCh: CHARACTER;
SayCommentCh: PROCEDURE [c: CHARACTER] =
BEGIN
IF lastCommentCh = '- AND c = '- THEN SayCh[' ];
SayCh[c];
lastCommentCh ← c
END; -- of SayCommentCh
readingString ← TRUE;
Say["--"];
lastCommentCh ← '-; -- start of comment
NextCh[];
IF oneChar THEN
WHILE scanCh # endChar DO
SayCommentCh[scanCh];
IF scanCh = '\n THEN {Say["-- "]; lastCommentCh ← ' };
NextCh[];
ENDLOOP
ELSE
DO
UNTIL scanCh = '* DO
SayCommentCh[scanCh];
IF scanCh = '\n THEN {Say["-- "]; lastCommentCh ← ' };
NextCh[];
ENDLOOP;
NextCh[];
IF scanCh = ') THEN EXIT;
SayCh['*];
lastCommentCh ← '*;
ENDLOOP;
readingString ← FALSE;
NextCh[];
IF scanCh # '\n THEN {SayCommentCh['-]; SayCh['-]}; -- end of comment
END; -- of SkipComment
AppendToString: PROCEDURE [c: CHARACTER] = INLINE
BEGIN
string ← RefText.AppendChar[to: string, from: c];
END;
AppendChToString: PROCEDURE = INLINE BEGIN AppendToString[scanCh]; NextCh END;
AppendDigitsToString: PROCEDURE =
BEGIN
WHILE scanCh IN ['0..'9] DO
string ← RefText.AppendChar[to: string, from: scanCh];
NextCh
ENDLOOP;
END;
AppendHexDigitsToString: PROCEDURE =
BEGIN
WHILE scanCh IN ['0..'9] OR scanCh IN ['A..'Z] DO
string ← RefText.AppendChar[to: string, from: scanCh];
NextCh
ENDLOOP;
END;
readingString ← FALSE; -- beginning of InSymbol
string.length ← 0;
op ← noOp;
DO
ENABLE
IO.EndOfStream => GO TO SourceExhausted; -- SIGNAL catch
SELECT scanCh FROM
' , '\t, '\f => {SayCh[scanCh]; NextCh};
'\n =>
BEGIN
SayCh[scanCh];
NextCh;
IF stopAtCR THEN {sy ← CRSy; GOTO GotSymbol};
END;
'\032 --Control-Z-- => UNTIL scanCh = '\n DO NextCh ENDLOOP;
'{ => SkipComment[TRUE, '}];
'( =>
BEGIN
NextCh;
IF scanCh = '* THEN SkipComment[FALSE, ')]
ELSE BEGIN sy ← lParentSy; GO TO GotSymbol END
END;
': => -- colon or assignment
BEGIN
NextCh;
IF scanCh = '= THEN BEGIN sy ← becomesSy; NextCh END ELSE sy ← colonSy;
GO TO GotSymbol
END;
'. => -- period or "sideways colon"
BEGIN
NextCh[
!
IO.EndOfStream => {scanCh ← '\n; CONTINUE}];
IF scanCh = '. THEN BEGIN sy ← colonSy; NextCh END ELSE sy ← periodSy;
GO TO GotSymbol
END;
'< =>
BEGIN
sy ← relOpSy;
op ← ltOp;
NextCh;
IF scanCh = '> THEN BEGIN op ← neOp; NextCh END
ELSE IF scanCh = '= THEN BEGIN op ← leOp; NextCh END;
GO TO GotSymbol;
END;
'> =>
BEGIN
sy ← relOpSy;
op ← gtOp;
NextCh;
IF scanCh = '= THEN BEGIN op ← geOp; NextCh END;
GO TO GotSymbol;
END;
IN ['0..'9] =>
numeric constant, of form d{ddd}{B|{.{ddd}}{E{+|-}d{dd}}}
BEGIN
sy ← intConstSy;
AppendDigitsToString;
SELECT scanCh FROM
'b, 'B => BEGIN AppendToString['B]; NextCh; END;
'. =>
BEGIN
NextCh;
IF scanCh = '. THEN scanCh ← ':
ELSE
BEGIN
sy ← realConstSy;
AppendToString['.];
AppendDigitsToString;
IF scanCh = 'E OR scanCh = 'e THEN
BEGIN
AppendToString['E];
NextCh;
IF scanCh = '+ OR scanCh = '- THEN AppendChToString;
AppendDigitsToString
END;
END;
END;
ENDCASE => NULL;
GO TO AlterIdent;
END;
'! => -- hexadecimal integer constant
BEGIN
sy ← intConstSy;
NextCh;
AppendHexDigitsToString;
AppendToString['H];
GO TO AlterIdent;
END;
'' => -- string constant
BEGIN
sy ← stringConstSy;
readingString ← NOT capitalizeStringConstants;
NextCh;
WHILE scanCh # '' DO AppendChToString; ENDLOOP;
NextCh; -- move beyond terminating quote
WHILE scanCh = '' DO
double quote means string contains a quote
AppendChToString;
WHILE scanCh # '' DO AppendChToString; ENDLOOP;
NextCh; -- move beyond terminating quote
ENDLOOP;
readingString ← FALSE;
GO TO AlterIdent;
END;
IN ['a..'z], IN ['A..'Z], '$, '← => -- symbol
BEGIN
WHILE
(SELECT scanCh FROM
IN ['a..'z], IN ['A..'Z], '$, '←, IN ['0..'9] => TRUE,
ENDCASE => FALSE) DO AppendChToString; ENDLOOP;
IF string.length <= 9 -- the longest reserved word -- AND string[0] IN
['A..'Z] THEN
FOR i IN [resWordIndex[string[0]]..resWordIndex[string[0] + 1]) DO
BEGIN -- for EXITS
IF string.length # resWordTable[i].string.Length[] THEN
GO TO MisMatch;
FOR j IN [0..string.length) DO
IF string[j] # resWordTable[i].string.Fetch[j] THEN GO TO MisMatch
ENDLOOP;
GO TO ReservedWord;
EXITS MisMatch => NULL;
END;
ENDLOOP;
sy ← identSy; -- not a reserved word
GO TO AlterIdent;
EXITS
ReservedWord =>
BEGIN
sy ← resWordTable[i].sy;
op ← resWordTable[i].op;
GO TO GotSymbol
END;
END;
ENDCASE =>
BEGIN
sy ← charTable[scanCh].sy;
op ← charTable[scanCh].op;
NextCh;
GO TO GotSymbol
END;
ENDLOOP;
EXITS
AlterIdent => ident ← Rope.FromRefText[string];
GotSymbol => NULL;
SourceExhausted => {scanCh ← ' ; sy ← eofSy};
END; -- of InSymbol
CouldBe: PUBLIC PROCEDURE [testSy: Symbol, string: ROPENIL]
RETURNS [BOOLEAN] =
BEGIN
IF sy = testSy THEN {IF string # NIL THEN Say[string]; InSymbol; RETURN[TRUE]}
ELSE RETURN[FALSE];
END; -- of CouldBe
MustBe: PUBLIC PROCEDURE [testSy: Symbol, string: ROPENIL, e: Errors] =
BEGIN IF NOT CouldBe[testSy: testSy, string: string] THEN Error[e]; END;
of MustBe
SequenceOf: PUBLIC PROCEDURE [
p: PROCEDURE, separatorSy: Symbol ← semiColonSy,
separatorString: ROPENIL] =
BEGIN
LooksPlausible: PROCEDURE RETURNS [BOOLEAN] = {
RETURN[SELECT sy FROM endSy, untilSy, othersSy => FALSE,
ENDCASE => TRUE]};
IF separatorString = NIL THEN
separatorString ←
SELECT separatorSy FROM
semiColonSy => ";",
commaSy => ",",
ENDCASE => "";
IF LooksPlausible[] THEN
BEGIN
p;
WHILE sy = separatorSy DO
q: OutputQueuePtr;
PushOut[];
InSymbol[];
q ← CopyAndPopOut[]; -- get comments
IF LooksPlausible[] THEN {Say[separatorString]; MergeQueue[from: q]; p}
ELSE {MergeQueue[from: q]; EXIT};
ENDLOOP;
END;
END; -- of SequenceOf
StringToPascalInteger: PUBLIC PROCEDURE [s: ROPE] RETURNS [PascalInteger] =
BEGIN
radix: INTEGER ← 10;
sign: INTEGER ← 1;
v: PascalInteger ← 0;
i, start: CARDINAL;
end: CARDINAL ← s.Length[];
IF end = 0 THEN RETURN[v];
SELECT s.Fetch[end - 1] FROM
'B, 'b => {radix ← 8; end ← end - 1}; -- octal
ENDCASE => NULL;
FOR start IN [0..end) WHILE s.Fetch[start] NOT IN ['0..'9] DO
SELECT s.Fetch[start] FROM '- => sign ← -sign; ENDCASE => NULL; ENDLOOP;
WHILE start < end AND s.Fetch[start] = '0 DO start ← start + 1 ENDLOOP;
FOR i IN [start..end) DO
IF s.Fetch[i] NOT IN ['0..'0 + radix) THEN ERROR BadInteger;
v ← radix*v + (s.Fetch[i] - '0);
ENDLOOP;
we should check here that v <= LAST[PascalInteger]
RETURN[IF sign > 0 THEN v ELSE -v];
END; -- of StringToPascalInteger
SayPascalInteger: PUBLIC PROCEDURE [i: PascalInteger] =
BEGIN
IF i < 0 THEN {SayCh['-]; i ← -i};
IF i >= 10 THEN SayPascalInteger[i/10];
SayCh['0 + NARROW[i MOD 10, INT]];
END; -- of SayPascalInteger
SayIdent: PUBLIC PROCEDURE [s: ROPENIL] =
BEGIN
i: CARDINAL;
allCaps: BOOLEAN;
capitalize: BOOLEANTRUE;
IF s = NIL THEN s ← ident;
IF s.Length[] <= 0 THEN RETURN;
allCaps ←
SELECT s.Fetch[0] FROM
'f, 'F => Rope.Equal[s, "FALSE"],
't, 'T => Rope.Equal[s, "TRUE"],
'n, 'N => Rope.Equal[s, "NIL"],
ENDCASE => FALSE;
FOR i IN [0..NAT[s.Length[]]) DO
SELECT TRUE FROM
s.Fetch[i] IN ['a..'z] AND capitalize => {
SayCh[s.Fetch[i] + ('A - 'a)]; capitalize ← allCaps};
s.Fetch[i] IN ['A..'Z] AND NOT capitalize => {
SayCh[s.Fetch[i] + ('a - 'A)]; capitalize ← allCaps};
s.Fetch[i] = '← => capitalize ← TRUE;
ENDCASE => {SayCh[s.Fetch[i]]; capitalize ← allCaps};
ENDLOOP;
END; -- of SayIdent
RopeSayIdent: PUBLIC PROCEDURE [s: ROPENIL] RETURNS [r: ROPE] =
BEGIN
PushOut[];
SayIdent[s];
RETURN[CopyAndPopOut[].contents];
END; -- of RopeSayIdent
Module start code
i ← FIRST[ResWordIndex]; -- set up reserved word index
FOR scanCh IN ResWordFirstChar DO
WHILE i < LAST[ResWordIndex] AND resWordTable[i].string.Fetch[0] < scanCh DO
i ← i + 1; ENDLOOP;
resWordIndex[scanCh] ← i;
ENDLOOP;
set up character table
charTable['+] ← CharTabEntry[sy: addOpSy, op: plusOp];
charTable['-] ← CharTabEntry[sy: addOpSy, op: minusOp];
charTable['*] ← CharTabEntry[sy: mulOpSy, op: mulOp];
charTable['/] ← CharTabEntry[sy: mulOpSy, op: rDivOp];
charTable['=] ← CharTabEntry[sy: relOpSy, op: eqOp];
charTable['<] ← CharTabEntry[sy: relOpSy, op: ltOp];
charTable['>] ← CharTabEntry[sy: relOpSy, op: gtOp];
charTable['(] ← CharTabEntry[sy: lParentSy];
charTable[')] ← CharTabEntry[sy: rParentSy];
charTable['[] ← CharTabEntry[sy: lBrackSy];
charTable[']] ← CharTabEntry[sy: rBrackSy];
charTable[':] ← CharTabEntry[sy: colonSy];
charTable[';] ← CharTabEntry[sy: semiColonSy];
charTable['^] ← CharTabEntry[sy: arrowSy];
charTable['.] ← CharTabEntry[sy: periodSy];
charTable[',] ← CharTabEntry[sy: commaSy];
END. -- of PasScanner --