OneCasabaParserImpl.Mesa (September 18, 1984 9:00:29 am PDT)
Copyright Ó 1987, 1988, 1989 by Xerox Corporation. All rights reserved.
Sturgis, June 3, 1986 3:07:05 pm PDT
Bill Jackson (bj) September 13, 1989 10:22:29 pm PDT
DIRECTORY
Ascii USING [ SP ],
IO USING [ BreakProc, card, CharClass, Close, GetCard, GetCedarTokenRope, GetTokenRope, int, PutF, PutRope, RIS, rope, RopeFromROS, ROS, SkipWhitespace, STREAM, TokenKind ],
KipperSupport USING [ CreateKipperer, CreateUnKipperer, Kipperer ],
OneCasabaParser,
OneCasabaParserData USING [ KipperParserTableBodyRef, UnKipperParserTableBodyRef ],
OneCasabaParserPrivate USING [ Action, ActionCell, ActionCellBody, ActionTable, ActionTableBody, ParserTableBody, SymbolCell, SymbolCellBody, SymbolTable, SymbolTableBody, TokenCase, TokenCell, TokenCellBody, TokenTable, TokenTableBody ],
Rope USING [ Equal, Fetch, Length, ROPE, Substr ];
OneCasabaParserImpl: CEDAR PROGRAM
IMPORTS IO, KipperSupport, OneCasabaParserData, Rope
EXPORTS OneCasabaParser ~ {
OPEN OneCasabaParser, OneCasabaParserPrivate;
ROPE: TYPE ~ Rope.ROPE;
first the procedures for creating parser info on streams
RecordKipperedParserTableOnStream: PUBLIC PROC [ s: IO.STREAM,
genInfo: OneCasabaParser.GenInfoProc,
getSourceToken: OneCasabaParser.LexicalAnalysisProc ← NIL ] ~ {
lexProc: OneCasabaParser.LexicalAnalysisProc ← IF ( getSourceToken # NIL )
THEN
getSourceToken ELSE IO.GetCedarTokenRope;
table: ParserTable ← BuildParserTable[genInfo];
kipperer: KipperSupport.Kipperer ← KipperSupport.CreateKipperer[s];
OneCasabaParserData.KipperParserTableBodyRef[kipperer, table];
s.Close[];
};
RecordReadableParserTableOnStream: PUBLIC PROC [ s: IO.STREAM,
genInfo: OneCasabaParser.GenInfoProc,
getSourceToken: OneCasabaParser.LexicalAnalysisProc ← NIL ] ~ {
lexProc: OneCasabaParser.LexicalAnalysisProc ← IF ( getSourceToken # NIL )
THEN
getSourceToken ELSE IO.GetCedarTokenRope;
LocalRecordSymbol: SymbolProc ~
{ s.PutF["sy %g ", IO.rope[name]] };
LocalRecordUniqueToken: UniqueTokenProc ~
{ s.PutF["ut %g %g ", IO.rope[name], IO.rope[spelling]] };
LocalRecordGenericToken: GenericTokenProc ~
{ s.PutF["gt %g %g ", IO.rope[name], IO.rope[class]] };
LocalRecordShift: ShiftProc ~
{ s.PutF["sh %g <%g> %g ", IO.card[state], IO.rope[terminalSeq], IO.card[newState]] };
LocalRecordReduction: ReductionProc ~
{ s.PutF["rd %g <%g> %g %g %g ", IO.card[state], IO.rope[terminalSeq], IO.rope[leftSide], IO.card[ruleNumber], IO.card[ruleSize]] };
LocalRecordAcceptance: AcceptanceProc ~
{ s.PutF["ac %g <%g> ", IO.card[state], IO.rope[terminalSeq]] };
LocalRecordStartState: StartStateProc ~ { s.PutF["start %g ", IO.card[state]] };
genInfo[LocalRecordSymbol, LocalRecordUniqueToken, LocalRecordGenericToken, LocalRecordShift, LocalRecordReduction, LocalRecordAcceptance, LocalRecordStartState];
s.PutRope[" end "];
};
now the types and procedures for building an internal parsetable from parser descriptions on a stream
ParserTable: TYPE ~ REF ParserTableBody;
ParserTableBody: PUBLIC TYPE ~ OneCasabaParserPrivate.ParserTableBody;
BuildParserTableFromKipperedStream: PUBLIC PROC [ s: IO.STREAM,
getSourceToken: LexicalAnalysisProc ← NIL ] RETURNS [ table: ParserTable ] ~ {
lexProc: OneCasabaParser.LexicalAnalysisProc ← IF ( getSourceToken # NIL )
THEN
getSourceToken ELSE IO.GetCedarTokenRope;
table ← OneCasabaParserData.UnKipperParserTableBodyRef[ KipperSupport.CreateUnKipperer[s]];
};
BuildParserTableFromReadableStream: PUBLIC PROC [ s: IO.STREAM,
getSourceToken: LexicalAnalysisProc ← NIL ] RETURNS [ table: ParserTable ] ~ {
lexProc: OneCasabaParser.LexicalAnalysisProc ← IF ( getSourceToken # NIL )
THEN
getSourceToken ELSE IO.GetCedarTokenRope;
GenInfo: GenInfoProc ~ {
Break: IO.BreakProc ~ {
class: IO.CharClass ← IF ( char = Ascii.SP ) THEN sepr ELSE other;
RETURN[class];
};
DO
GetTermSeq: PROC RETURNS [ text: ROPE ] ~ {
[] ← s.SkipWhitespace[];
text ← s.GetTokenRope[Break].token;
text ← text.Substr[1, text.Length[] - 2];
};
SkipAndGetCard: PROC RETURNS [ c: CARD16 ] ~ {
[] ← s.SkipWhitespace[];
c ← s.GetCard[];
};
firstToken: ROPE ← s.GetTokenRope[Break].token;
SELECT TRUE FROM
firstToken.Equal["sy"] => {
recordSymbol[s.GetTokenRope[Break].token];
};
firstToken.Equal["ut"] => {
name: ROPE ~ s.GetTokenRope[Break].token;
spelling: ROPE ~ s.GetTokenRope[Break].token;
recordUniqueToken[name, spelling]
};
firstToken.Equal["gt"] => {
name: ROPE ~ s.GetTokenRope[Break].token;
class: ROPE ~ s.GetTokenRope[Break].token;
recordGenericToken[name, class];
};
firstToken.Equal["sh"] => {
state: CARD16 ~ SkipAndGetCard[];
terms: ROPE ~ GetTermSeq[];
newState: CARD16 ← SkipAndGetCard[];
recordShift[state, terms, newState];
};
firstToken.Equal["rd"] => {
state: CARD16 ~ SkipAndGetCard[];
terms: ROPE ~ GetTermSeq[];
leftSide: ROPE ~ s.GetTokenRope[Break].token;
ruleNumber: CARD16 ~ SkipAndGetCard[];
ruleSize: CARD16 ~ SkipAndGetCard[];
recordReduction[state, terms, leftSide, ruleNumber, ruleSize];
};
firstToken.Equal["ac"] => {
state: CARD16 ~ SkipAndGetCard[];
terms: ROPE ~ GetTermSeq[];
recordAcceptance[state, terms];
};
firstToken.Equal["start"] => {
recordStartState[SkipAndGetCard[]];
};
firstToken.Equal["end"] => { EXIT };
ENDCASE => { ERROR };
ENDLOOP;
};
table ← BuildParserTable[GenInfo];
};
BuildParserTable: PROC [ genInfo: GenInfoProc ] RETURNS [ table: ParserTable ] ~ {
LocalRecordSymbol: SymbolProc ~
{ RecordSymbol[table, name] };
LocalRecordUniqueToken: UniqueTokenProc ~
{ RecordUniqueToken[table, name, spelling] };
LocalRecordGenericToken: GenericTokenProc ~
{ RecordGenericToken[table, name, class] };
LocalRecordShift: ShiftProc ~
{ RecordShift[table, state, terminalSeq, newState] };
LocalRecordReduction: ReductionProc ~
{ RecordReduction[table, state, terminalSeq, leftSide, ruleNumber, ruleSize] };
LocalRecordAcceptance: AcceptanceProc ~
{ RecordAcceptance[table, state, terminalSeq] };
LocalRecordStartState: StartStateProc ~
{ RecordStartState[table, state] };
table ← CreateParserTable[];
genInfo[LocalRecordSymbol, LocalRecordUniqueToken, LocalRecordGenericToken, LocalRecordShift, LocalRecordReduction, LocalRecordAcceptance, LocalRecordStartState];
AnalyzeActions[table];
};
CreateParserTable: PROC RETURNS [ table: ParserTable ] ~ {
table ← NEW[ParserTableBody];
RecordSymbol[table, ""]; -- the empty seq
RecordTokenInfo[table, "", tokenEOF, "", generic];
};
AnalyzeActions: PROC [ table: ParserTable ] ~ {
actionTable: ActionTable;
next: ActionCell;
nShiftReduceConflicts: INT32 ← 0;
nReduceReduceConflicts: INT32 ← 0;
NoteShiftReduce: PROC [ terminal: CARD16, ruleNumber: CARD16 ] ~
{ nShiftReduceConflicts ← nShiftReduceConflicts.SUCC };
NoteShiftAccept: PROC [ terminal: CARD16 ] ~
{ nShiftReduceConflicts ← nShiftReduceConflicts.SUCC };
NoteReduceAccept: PROC [ terminal: CARD16, ruleNumber: CARD16 ] ~
{ nReduceReduceConflicts ← nReduceReduceConflicts.SUCC };
NoteReduceReduce: PROC [ terminal: CARD16, ruleNumber1, ruleNumber2: CARD16 ] ~
{ nReduceReduceConflicts ← nReduceReduceConflicts.SUCC };
IF table.actionTable # NIL THEN ERROR;
actionTable ← table.actionTable ← NEW[ActionTableBody[table.nActions]];
FOR I: CARD16 IN [0..actionTable.nSlots) DO actionTable.actions[I] ← NIL ENDLOOP;
FOR action: ActionCell ← table.unAnalyzedActions, next WHILE action # NIL DO
hash: CARD16 ← HashOneAction[action.state, action.terminal];
hashCode: CARD16 ← hash MOD actionTable.nSlots;
next ← action.next;
action.next ← actionTable.actions[hashCode];
actionTable.actions[hashCode] ← action;
ENDLOOP;
table.unAnalyzedActions ← NIL;
now remove any ambiguities
resolve shift/reduce by shift
resolve reduce/reduce to the lowest production number
FOR I: CARD16 IN [0..actionTable.nSlots) DO
previousTo1: ActionCell ← NIL;
action1: ActionCell← NIL;
action1Next: ActionCell ← NIL;
FOR action1 ← actionTable.actions[I], action1Next WHILE ( action1 # NIL ) DO
previousTo2: ActionCell ← action1;
action2: ActionCell ← NIL;
action2Next: ActionCell ← NIL;
action1Next ← action1.next;
FOR action2 ← action1.next, action2Next WHILE ( action2 # NIL ) DO
action2Next ← action2.next;
IF action1.state = action2.state
AND action1.terminal = action2.terminal
THEN { -- we have a conflict
SELECT action1.action FROM
shift => {
SELECT action2.action FROM
shift => {
ERROR;
};
reduce => {
NoteShiftReduce[action1.terminal, action2.ruleNumber];
GOTO remove2;
};
accept => {
NoteShiftAccept[action1.terminal];
GOTO remove2;
};
ENDCASE => {
ERROR;
};
};
reduce => {
SELECT action2.action FROM
shift => {
NoteShiftReduce[action1.terminal, action1.ruleNumber];
GOTO remove1;
};
reduce => {
NoteReduceReduce[action1.terminal, action1.ruleNumber, action2.ruleNumber];
IF action1.ruleNumber < action2.ruleNumber
THEN GOTO remove2
ELSE GOTO remove1;
};
accept => {
NoteReduceAccept[action1.terminal, action1.ruleNumber];
GOTO remove1;
};
ENDCASE => {
ERROR;
};
};
accept => {
SELECT action2.action FROM
shift => {
NoteShiftAccept[action1.terminal];
GOTO remove2;
};
reduce => {
NoteReduceAccept[action1.terminal, action2.ruleNumber];
GOTO remove2;
};
accept => {
ERROR;
};
ENDCASE => {
ERROR;
};
};
ENDCASE => ERROR;
EXITS
remove1 => {
IF previousTo2 = action1 THEN previousTo2 ← previousTo1;
action1 ← action1Next;
action1Next ← action1.next;
IF previousTo1 # NIL
THEN previousTo1.next ← action1
ELSE actionTable.actions[I] ← action1;
};
remove2 => {
action2 ← action2Next;
action2Next ← IF action2 # NIL THEN action2.next ELSE NIL;
previousTo2.next ← action2;
};
};
previousTo2 ← action2;
ENDLOOP;
previousTo1 ← action1;
ENDLOOP;
ENDLOOP;
now report any conflicts that have been artificially removed
IF nReduceReduceConflicts # 0 OR nShiftReduceConflicts # 0 THEN {
reportStream: IO.STREAM ← GetReportStream[];
using GetReportStream for a slightly bastardized purpose
the definitions file says it will only be called during parse, and I don't want to change the definitions file
reportStream.PutF["\n\nWARNING\n"];
IF nReduceReduceConflicts # 0
THEN reportStream.PutF["\t%g reduce/reduce conflicts removed\n", IO.int[nReduceReduceConflicts]];
IF nShiftReduceConflicts # 0
THEN reportStream.PutF["\t%g shift/reduce conflicts removed\n\n", IO.int[nShiftReduceConflicts]];
};
};
RecordSymbol: PROC [ table: ParserTable, name: ROPE ] ~ {
IF table.symbolTable # NIL THEN ERROR; -- must record all symbols before any actions
table.unAnalyzedSymbols ← NEW[SymbolCellBody←[name, table.nSymbols, table.unAnalyzedSymbols]];
table.nSymbols ← table.nSymbols.SUCC;
};
RecordUniqueToken: PROC [ table: ParserTable, name: ROPE, spelling: ROPE ] ~ {
spellingStream: IO.STREAMIO.RIS[spelling];
kind: IO.TokenKind;
the following hack is because GetCedarToken fails on |
of course, the scanner will still fail if it ever sees a |
kind ← IF spelling.Equal["|"]
THEN tokenSINGLE
ELSE spellingStream.GetCedarTokenRope[].tokenKind;
spellingStream.Close[];
IF ( ( kind # tokenID ) AND ( NOT HashKind[kind].includeRope ) ) THEN ERROR;
RecordTokenInfo[table, name, kind, spelling, unique];
};
RecordGenericToken: PROC [table: ParserTable, name: ROPE, class: ROPE] ~ {
kind: IO.TokenKind ← DecodeTokenKind[class];
IF ( ( kind # tokenID ) AND ( HashKind[kind].includeRope ) ) THEN ERROR;
RecordTokenInfo[table, name, kind, "", generic];
};
RecordShift: PROC [ table: ParserTable, state: State, terminalSeq: ROPE, newState: State ] ~ {
IF ( table.symbolTable = NIL ) THEN AnalyzeSymbols[table];
table.unAnalyzedActions ← NEW[ActionCellBody ← [
state: state,
terminal: LookUpSymbol[table.symbolTable, terminalSeq],
action: shift,
nextState: newState,
ruleNumber: 0,
leftSide: 0,
ruleSize: 0,
next: table.unAnalyzedActions
]];
table.nActions ← table.nActions.SUCC;
};
RecordReduction: PROC [ table: ParserTable, state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARD16, ruleSize: CARD16 ] ~ {
IF ( table.symbolTable = NIL ) THEN AnalyzeSymbols[table];
table.unAnalyzedActions ← NEW[ActionCellBody ← [
state: state,
terminal: LookUpSymbol[table.symbolTable, terminalSeq],
action: reduce,
nextState: 0,
ruleNumber: ruleNumber,
leftSide: LookUpSymbol[table.symbolTable, leftSide],
ruleSize: ruleSize,
next: table.unAnalyzedActions
]];
table.nActions ← table.nActions.SUCC;
};
RecordAcceptance: PROC [ table: ParserTable, state: State, terminalSeq: ROPE ] ~ {
IF ( table.symbolTable = NIL ) THEN AnalyzeSymbols[table];
table.unAnalyzedActions ← NEW[ActionCellBody ← [
state: state,
terminal: LookUpSymbol[table.symbolTable, terminalSeq],
action: accept,
nextState: 0,
ruleNumber: 0,
leftSide: 0,
ruleSize: 0,
next: table.unAnalyzedActions
]];
table.nActions ← table.nActions.SUCC;
};
RecordStartState: PROC [ table: ParserTable, state: State ] ~ {
table.startState ← state;
};
RecordTokenInfo: PROC [ table: ParserTable, name: ROPE, kind: IO.TokenKind, spelling: ROPE, case: TokenCase ] ~ {
IF ( table.tokenTable # NIL ) THEN ERROR; -- must record all tokens before any actions
table.unAnalyzedTokens ← NEW[TokenCellBody ←
[name, 0, kind, spelling, case, table.unAnalyzedTokens]];
table.nTokens ← table.nTokens.SUCC;
};
HashOneAction: PROC [ state: CARD16, terminal: CARD16 ]
RETURNS [ hash: CARD16 ] ~ INLINE { hash ← (state * terminal) MOD 256 };
HashKind: PROC [ kind: IO.TokenKind ] RETURNS [ hash: CARD16, includeRope: BOOL ] ~ {
SELECT kind FROM
tokenERROR => RETURN[0, FALSE];
tokenID => RETURN[1, TRUE];
tokenDECIMAL => RETURN[2, FALSE];
tokenOCTAL => RETURN[3, FALSE];
tokenHEX => RETURN[4, FALSE];
tokenREAL => RETURN[5, FALSE];
tokenROPE => RETURN[6, FALSE];
tokenCHAR => RETURN[7, FALSE];
tokenATOM => RETURN[8, FALSE];
tokenSINGLE => RETURN[9, TRUE];
tokenDOUBLE => RETURN[10, TRUE];
tokenCOMMENT => RETURN[11, FALSE];
tokenEOF => RETURN[12, FALSE];
ENDCASE => ERROR;
};
DecodeTokenKind: PROC [ class: ROPE ] RETURNS [ kind: IO.TokenKind ] ~ {
SELECT TRUE FROM
class.Equal["tokenERROR"] => RETURN[tokenERROR];
class.Equal["tokenID"] => RETURN[tokenID];
class.Equal["tokenDECIMAL"] => RETURN[tokenDECIMAL];
class.Equal["tokenOCTAL"] => RETURN[tokenOCTAL];
class.Equal["tokenHEX"] => RETURN[tokenHEX];
class.Equal["tokenREAL"] => RETURN[tokenREAL];
class.Equal["tokenROPE"] => RETURN[tokenROPE];
class.Equal["tokenCHAR"] => RETURN[tokenCHAR];
class.Equal["tokenATOM"] => RETURN[tokenATOM];
class.Equal["tokenSINGLE"] => RETURN[tokenSINGLE];
class.Equal["tokenDOUBLE"] => RETURN[tokenDOUBLE];
class.Equal["tokenCOMMENT"] => RETURN[tokenCOMMENT];
class.Equal["tokenEOF"] => RETURN[tokenEOF];
ENDCASE => ERROR;
};
AnalyzeSymbols: PROC [ table: ParserTable ] ~ {
symbolTable: SymbolTable;
next: SymbolCell;
IF table.symbolTable # NIL THEN ERROR;
symbolTable ← table.symbolTable ← NEW[SymbolTableBody[table.nSymbols]];
FOR I: CARD16 IN [0..symbolTable.nSlots) DO symbolTable.symbols[I] ← NIL ENDLOOP;
FOR symbol: SymbolCell ← table.unAnalyzedSymbols, next WHILE symbol # NIL DO
hash: CARD16 ← HashOneSymbol[symbol.name];
hashCode: CARD16 ← hash MOD symbolTable.nSlots;
next ← symbol.next;
symbol.next ← symbolTable.symbols[hashCode];
symbolTable.symbols[hashCode] ← symbol;
ENDLOOP;
table.unAnalyzedSymbols ← NIL;
AnalyzeTokens[table];
};
LookUpSymbol: PROC [ symbolTable: SymbolTable, name: ROPE ]
RETURNS [ code: CARD16 ] ~ {
hash: CARD16 ← HashOneSymbol[name];
hashCode: CARD16 ← hash MOD symbolTable.nSlots;
FOR symbol: SymbolCell ← symbolTable.symbols[hashCode], symbol.next WHILE symbol # NIL DO
IF symbol.name.Equal[name] THEN RETURN[symbol.code];
ENDLOOP;
ERROR;
};
HashOneSymbol: PROC [ rope: ROPE ] RETURNS [ hash: CARD16 ] ~ {
hash ← 0;
FOR I: INT32 IN [0..rope.Length[]) DO
hash ← hash + LOOPHOLE[rope.Fetch[I], CARD16];
ENDLOOP;
};
AnalyzeTokens: PROC [ table: ParserTable ] ~ {
tokenTable: TokenTable;
next: TokenCell;
IF table.tokenTable # NIL THEN ERROR;
tokenTable ← table.tokenTable ← NEW[TokenTableBody[table.nTokens]];
FOR token: TokenCell ← table.unAnalyzedTokens, next WHILE token # NIL DO
spellingHash: CARD16 ← TokenHash[token.kind, token.spelling].hash;
spellingHashCode: CARD16 ← spellingHash MOD tokenTable.nSlots;
next ← token.next;
token.symbolCode ← LookUpSymbol[table.symbolTable, token.name];
token.next ← tokenTable.tokens[spellingHashCode];
tokenTable.tokens[spellingHashCode] ← token;
IF token.kind = tokenID AND token.case = generic THEN {
IF tokenTable.idToken # NIL THEN ERROR;
tokenTable.idToken ← token;
};
ENDLOOP;
table.unAnalyzedTokens ← NIL;
};
following routines are intended for the output of the GetCedarToken routine in IO.mesa
LookUpCedarToken: PROC [ tokenTable: TokenTable, kind: IO.TokenKind, text: ROPE ]
RETURNS [ code: CARD16, info: ROPE, case: TokenCase ] ~ {
includeTextInHash: BOOL;
hash: CARD16;
hashCode: CARD16;
[hash, includeTextInHash] ← TokenHash[kind, text];
hashCode ← hash MOD tokenTable.nSlots;
IF ( includeTextInHash )
THEN
FOR token: TokenCell ← tokenTable.tokens[hashCode], token.next WHILE token # NIL DO
IF ( ( token.kind = kind ) AND ( token.spelling.Equal[text] ) )
THEN RETURN[token.symbolCode, NIL, token.case];
ENDLOOP
ELSE
FOR token: TokenCell ← tokenTable.tokens[hashCode], token.next WHILE token # NIL DO
IF ( token.kind = kind ) THEN RETURN[token.symbolCode, text, token.case];
ENDLOOP;
IF ( ( kind = tokenID ) AND ( tokenTable.idToken # NIL ) )
THEN RETURN[tokenTable.idToken.symbolCode, text, tokenTable.idToken.case];
ERROR;
};
TokenHash: PROC [ kind: IO.TokenKind, text: ROPE ]
RETURNS [ hash: CARD16, includeTextInHash: BOOL ] ~ {
[hash, includeTextInHash] ← HashKind[kind];
IF ( ( includeTextInHash ) AND ( text # NIL ) ) THEN
FOR I: INT32 IN [0..text.Length[]) DO
hash ← hash + LOOPHOLE[text.Fetch[I], CARD16];
ENDLOOP;
};
LookUpAction: PROC [ actionTable: ActionTable, state: State, terminalCode: CARD16 ]
RETURNS [ cell: ActionCell ← NIL ] ~ {
hash: CARD16 ← HashOneAction[state, terminalCode];
hashCode: CARD16 ← hash MOD actionTable.nSlots;
FOR action: ActionCell ← actionTable.actions[hashCode], action.next
WHILE ( action # NIL ) DO
IF ( ( action.state = state ) AND ( action.terminal = terminalCode ) )
THEN { cell ← action; RETURN };
ENDLOOP;
};
finally the code for actually performing a parse
StackCell: TYPE ~ REF StackCellBody;
StackCellBody: TYPE ~ RECORD [
state: State,
firstPosition, length: INT32,
next: StackCell
];
ReductionCell: TYPE ~ REF ReductionCellBody;
ReductionCellBody: TYPE ~ RECORD [
case: ReductionCellCase,
firstCharPosition: INT32,
if reduce case
rule: CARD16,
length: INT32,
if shift
code: CARD16,
text: ROPE,
tokenCase: TokenCase,
tokenKind: IO.TokenKind,
previous: ReductionCell, -- always a shift
stack: StackCell, -- as it appears just after applying this shift
if accept
nothing
all cases
next: ReductionCell
];
ReductionCellCase: TYPE ~ { reduction, shift, accept };
SimpleTryTokenCase: TYPE ~ { reduction, shift, accept, fail };
SourceTokenCell: TYPE ~ REF SourceTokenCellBody;
SourceTokenCellBody: TYPE ~ RECORD [
kind: IO.TokenKind,
text: ROPE,
position: INT32,
next: SourceTokenCell
];
TrialTokenCell: TYPE ~ REF TrialTokenCellBody;
TrialTokenCellBody: TYPE ~ RECORD [
kind: IO.TokenKind,
text: ROPE,
next: TrialTokenCell
];
MaxTrialInserts: INT32 ← 4;
MaxSkip: INT32 ← 4;
MinSuccesses: INT32 ← 4;
DesiredSuccesses: INT32 ← 12;
FoundAFix: ERROR ~ CODE;
GetReportStream: PUBLIC SIGNAL RETURNS [ s: IO.STREAM ] ~ CODE;
UnRecoverableSyntaxError: PUBLIC ERROR ~ CODE;
Parse: PUBLIC PROC [ table: ParserTable, getSourceToken: GetSourceTokenProc, showReduce: ShowReduceProc ← NIL, showGenericShift: ShowGenericShiftProc ← NIL, showNonGenericShift: ShowNonGenericShiftProc ← NIL ] RETURNS [ accepted: BOOL ] ~ {
this uses a scheme obtained orally from Ed Satterthwaite on Nov 7, 1985.
I am begining to suspect I have not got the search strategy right,
I am breadth first on insertions, but not on deletions?
state stack and saved reductions for later play to caller
old stack is as the stack appears just before applying firstRed
current stack is as the stack appears just after appling the last reduction chained from firstRed
oldStack: StackCell ← NEW[StackCellBody ← [table.startState, 0, 0, NIL]];
currentStack: StackCell ← oldStack;
these reduction cells form a linear list from firstRed to LastRed
last shift points into this list.
firstRed: ReductionCell ← NIL;
lastShift: ReductionCell ← NIL; -- always a shift
lastRed: ReductionCell ← NIL;
results in a shift, a single reduction, an acceptance, or a failure
SimpleTryToken: PROC [ tokenKind: IO.TokenKind, tokenText: ROPE, position: INT32 ] RETURNS [ SimpleTryTokenCase ] ~ {
terminalSeqCode: CARD16;
terminalSeqInfo: ROPE;
tokenCase: TokenCase;
action: ActionCell;
[terminalSeqCode, terminalSeqInfo, tokenCase] ← LookUpCedarToken[table.tokenTable, tokenKind, tokenText];
action ← LookUpAction[table.actionTable, currentStack.state, terminalSeqCode];
IF action = NIL THEN RETURN[fail];
SELECT action.action FROM
shift => {
redCell: ReductionCell;
currentStack ← NEW[StackCellBody ← [action.nextState, position, Rope.Length[tokenText], currentStack]];
redCell ← NEW[ReductionCellBody ← [shift, position, , , terminalSeqCode, tokenText, tokenCase, tokenKind, lastShift, currentStack, NIL]];
IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;
lastRed ← redCell;
lastShift ← redCell;
RETURN[shift];
};
reduce => {
shift: ActionCell;
firstPosition: INT32 ← position;
limitPosition: INT32IF action.ruleSize = 0
THEN position
ELSE currentStack.firstPosition + currentStack.length;
redCell: ReductionCell;
FOR I: CARD16 IN [0..action.ruleSize) DO
firstPosition ← currentStack.firstPosition;
currentStack ← currentStack.next;
ENDLOOP;
redCell ← NEW[ReductionCellBody ← [reduction, firstPosition, action.ruleNumber, limitPosition-firstPosition, , , , , , , NIL]];
IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;
lastRed ← redCell;
shift ← LookUpAction[table.actionTable, currentStack.state, action.leftSide];
IF shift.action # shift THEN ERROR;
currentStack ← NEW[StackCellBody ← [shift.nextState, firstPosition, limitPosition-firstPosition, currentStack]];
RETURN[reduction]
};
accept => {
redCell: ReductionCell ← NEW[ReductionCellBody ← [accept, , , , , , , , , , NIL]];
IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;
lastRed ← redCell;
RETURN[accept];
};
ENDCASE => ERROR;
};
repeats calls on SimpleTryToken, so that all reductions are done, up to and including a shift, acceptance, or failure.
FullTryToken: PROC [ tokenKind: IO.TokenKind, tokenText: ROPE, position: INT32 ]
RETURNS [ SimpleTryTokenCase ] ~ {
DO
SELECT SimpleTryToken[tokenKind, tokenText, position] FROM
reduction => LOOP;
shift => RETURN[shift];
accept => RETURN[accept];
fail => {
ResetToShift[];
RETURN[fail];
};
ENDCASE => ERROR;
ENDLOOP;
};
ResetStack: PROC ~ {
currentStack ← oldStack;
firstRed ← lastShift ← lastRed ← NIL;
};
plays successive reductions up to and including a shift or acceptance
Play1TokenWorth: PROC RETURNS [ accepted: BOOL ] ~ {
DO
SELECT firstRed.case FROM
reduction => {
IF showReduce # NIL
THEN showReduce[firstRed.rule, firstRed.firstCharPosition, firstRed.length];
firstRed ← firstRed.next;
IF firstRed = NIL
THEN lastShift ← lastRed ← NIL;
};
shift => {
IF firstRed.tokenCase = generic
THEN {
IF showGenericShift # NIL
THEN showGenericShift[firstRed.code, firstRed.tokenKind, firstRed.text, firstRed.firstCharPosition];
}
ELSE {
IF showNonGenericShift # NIL
THEN showNonGenericShift[firstRed.text, firstRed.firstCharPosition];
};
firstRed.previous ← NIL;
oldStack ← firstRed.stack;
firstRed ← firstRed.next;
IF firstRed = NIL
THEN lastShift ← lastRed ← NIL;
RETURN[FALSE];
};
accept => {
firstRed ← firstRed.next;
IF firstRed = NIL
THEN lastShift ← lastRed ← NIL;
RETURN[TRUE];
};
ENDCASE => ERROR;
ENDLOOP;
};
ResetToShift: PROC ~ {
IF lastShift # NIL
THEN {
lastRed ← lastShift;
lastRed.next ← NIL;
currentStack ← lastRed.stack;
}
ELSE {
lastRed ← NIL;
currentStack ← oldStack;
};
};
Reset1TokenWorth: PROC ~ {
lastRed ← lastShift;
lastShift ← lastRed.previous;
lastRed.previous ← NIL;
ResetToShift[];
};
ResetNTokensWorth: PROC [n: CARD16] ~ {
FOR I: CARD16 IN [0..n) DO Reset1TokenWorth[] ENDLOOP;
};
now we deal with delivery of tokens. this mechanism allows back up and replay of tokens without any assumptions about the ultimate source of tokens
firstToken: SourceTokenCell ← NIL;
currentToken: SourceTokenCell ← NIL;
lastToken: SourceTokenCell ← NIL;
GetToken: PROC RETURNS [ IO.TokenKind, ROPE, INT32 ] ~ {
t: SourceTokenCell;
IF currentToken = NIL THEN {
currentToken ← NEW[SourceTokenCellBody];
[currentToken.kind, currentToken.text, currentToken.position] ← getSourceToken[];
IF firstToken # NIL
THEN lastToken.next ← currentToken
ELSE firstToken ← currentToken;
lastToken ← currentToken;
};
t ← currentToken;
currentToken ← currentToken.next;
RETURN[t.kind, t.text, t.position];
};
ResetTokens: PROC ~ {
currentToken ← firstToken;
};
DeleteOneToken: PROC ~ {
IF currentToken = firstToken THEN currentToken ← firstToken.next;
firstToken ← firstToken.next;
};
syntactic error recovery involves trying the deletion of some incomming tokens and the insertion of other plausable tokens. The following data records current best such attempt as well as the currently explored attempt.
currentInsertions: TrialTokenCell ← NIL;
bestInsertions: TrialTokenCell ← NIL;
bestDeletions: CARD16 ← 0;
bestScore: INT32 ← 0;
BackUpOneTrialInsertion: PROC ~ {
currentInsertions ← currentInsertions.next;
};
AddOneTrialInsertion: PROC [ kind: IO.TokenKind, text: ROPE ] ~ {
cell: TrialTokenCell ← NEW[TrialTokenCellBody ← [kind, text, currentInsertions]];
currentInsertions ← cell;
};
RecordTrialScore: PROC [ nDeletions: INT32, score: INT32 ] ~ {
IF score > bestScore THEN {
bestInsertions ← currentInsertions;
bestDeletions ← nDeletions;
bestScore ← score;
};
};
RecordFoundFix: PROC [ nDeletions: INT32 ] ~ {
bestInsertions ← currentInsertions;
bestDeletions ← nDeletions;
bestScore ← DesiredSuccesses;
};
ResetTrials: PROC ~ {
currentInsertions ← bestInsertions ← NIL;
bestDeletions ← bestScore ← 0;
};
PlayTrialInsertions: PROC [ for: PROC [ IO.TokenKind, ROPE ] ] ~ {
SubPlayTrialInsertions: PROC [ cell: TrialTokenCell ] ~ {
IF cell.next # NIL THEN SubPlayTrialInsertions[cell.next];
for[cell.kind, cell.text];
};
IF bestInsertions # NIL THEN SubPlayTrialInsertions[bestInsertions];
};
ExploreForBestRecovery: PROC [ nInsertsSoFar: INT32, tryNDeletions: INT32 ] ~ {
first see if we can get away by trying an appropriate number of deletions without any insertions
FOR I: INT32 IN [0..tryNDeletions) DO [] ← GetToken[] ENDLOOP;
FOR I: INT32 IN [0..DesiredSuccesses) DO
tKind: IO.TokenKind; tText: ROPE; tPosition: INT32;
[tKind, tText, tPosition] ← GetToken[];
SELECT FullTryToken[tKind, tText, tPosition] FROM
shift => IF I.SUCC # DesiredSuccesses THEN LOOP
ELSE -- we parsed ahead as far as we wanted, go with this
{RecordFoundFix[tryNDeletions]; ResetNTokensWorth[I.SUCC]; FoundAFix[]};
accept => {RecordFoundFix[tryNDeletions]; ResetNTokensWorth[I]; FoundAFix[]};
fail => {
we couldn't parse as many input tokens as we wanted
but we got enough to record
IF I >= MinSuccesses
THEN RecordTrialScore[tryNDeletions, I];
ResetNTokensWorth[I];
EXIT;
};
ENDCASE => ERROR;
ENDLOOP; -- success loop
we didn't parse ahead as far as we wanted, so reset
ResetTokens[];
now try each plausable inserted token, but with one fewer deletions
IF tryNDeletions > 0 THEN {
SeeOneTokenToInsert: PROC [ kind: IO.TokenKind, text: ROPE ] ~ {
AddOneTrialInsertion[kind, text];
SELECT FullTryToken[kind, text, 0] FROM
shift => {
ExploreForBestRecovery[nInsertsSoFar.SUCC, tryNDeletions-1];
Reset1TokenWorth[];
};
fail => NULL;
accept => ResetToShift[];
ENDCASE => ERROR;
BackUpOneTrialInsertion[];
};
GenPlausableInsertedTokens[table, currentStack.state, SeeOneTokenToInsert];
this might generate FoundAFix, which will be caught by our ultimate caller
};
};
following is the main loop that drives the parser
successfulTrys: CARD16 ← 0;
DO
kind: IO.TokenKind;
text: ROPE;
position: INT32;
[kind, text, position] ← GetToken[];
SELECT FullTryToken[kind, text, position] FROM
shift => {
IF successfulTrys = 0
THEN successfulTrys ← 1
ELSE {
two source tokens in a row have worked, so show first to caller
IF Play1TokenWorth[] THEN ERROR; -- can't produce an acceptance
DeleteOneToken[]; -- from the saved input tokens
};
};
accept => {
IF Play1TokenWorth[] THEN {
IF successfulTrys = 1 THEN ERROR ELSE RETURN[TRUE];
};
IF successfulTrys # 1 THEN ERROR;
IF Play1TokenWorth[] THEN RETURN[TRUE];
ERROR; -- one of them should have produced an acceptance
};
fail => {
first forget what we have seen
successfulTrys ← 0;
ResetStack[];
ResetTokens[];
ResetTrials[];
now conduct an exploration for a replacement
FOR diag: INT32 IN [0..MaxSkip] DO
ExploreForBestRecovery[0, diag
! FoundAFix => {
found a replacement that results in acceptance or good match
we could try to replay the current stack, but it is easier to program using the code for "go with the best".
EXIT
}
];
ENDLOOP;
didn't not find a replacement that resulted in acceptance or good match
so, go with the best
(or, did find one, and are defaulting to this code)
{
reportStream: IO.STREAM ← GetReportStream[];
msgStream: IO.STREAMIO.ROS[];
nInsertions: CARD16 ← 0;
ReportOneInsertion: PROC [ kind: IO.TokenKind, text: ROPE ] ~ {
IO.PutF[msgStream, " %g", IO.rope[text]];
};
TryOneTrialInsertion: PROC [ kind: IO.TokenKind, text: ROPE ] ~ {
nInsertions ← nInsertions.SUCC;
SELECT FullTryToken[kind, text, 0] FROM
shift, accept => RETURN;
fail => ERROR;
ENDCASE;
};
ResetStack[];
ResetTokens[];
IO.PutF[msgStream, "\n\nSyntaxError at %g\n", IF firstToken=NIL THEN IO.rope["<unknown place>"] ELSE IO.int[firstToken.position]];
IF bestDeletions # 0 THEN IO.PutF[msgStream, "\tdeleting"];
FOR I: CARD16 IN [0..bestDeletions) DO
text: ROPE;
[, text, ] ← GetToken[];
IO.PutF[msgStream, " %g", IO.rope[text]];
ENDLOOP;
ResetTokens[];
IO.PutF[msgStream, "\tinserting"];
PlayTrialInsertions[ReportOneInsertion];
IO.PutF[msgStream, "\n\n"];
reportStream.PutF["%g", IO.rope[IO.RopeFromROS[msgStream]]];
IF bestScore < MinSuccesses THEN UnRecoverableSyntaxError[];
FOR I: CARD16 IN [0..bestDeletions) DO DeleteOneToken[] ENDLOOP;
PlayTrialInsertions[TryOneTrialInsertion];
FOR I: CARD16 IN [0..nInsertions) DO
IF Play1TokenWorth[] THEN {
IF I = (nInsertions - 1) THEN RETURN[TRUE] ELSE ERROR;
};
ENDLOOP;
successfulTrys ← 0;
};
};
ENDCASE => ERROR;
ENDLOOP;
};
GenPlausableInsertedTokens: PROC [ table: ParserTable, state: State,
for: PROC [ IO.TokenKind, ROPE ] ] ~ {
we try each possible terminal token, and generate those that result in a non NIL actionCell
eventually we want to control the order in which the terminal tokens are tried
FOR cellX: CARD16 IN [0..table.tokenTable.nSlots) DO
FOR trialTokenCell: TokenCell ← table.tokenTable.tokens[cellX], trialTokenCell.next WHILE trialTokenCell # NIL DO
actionCell: ActionCell ← LookUpAction[table.actionTable, state, trialTokenCell.symbolCode];
IF actionCell # NIL THEN {
text: ROPEIF trialTokenCell.case = unique THEN trialTokenCell.spelling ELSE
SELECT trialTokenCell.kind FROM
tokenID => "id",
tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL => "num",
tokenROPE => "rope",
tokenCHAR => "char",
tokenATOM => "atom",
tokenEOF => "eof",
ENDCASE => ERROR;
for[trialTokenCell.kind, text];
};
ENDLOOP;
ENDLOOP;
};
}.