[_CDMiwok_01_]<cedarcommon2.0>OneCasabaParser>OneCasabaParserImpl.mesa!1

OneCasabaParserImpl.Mesa (September 18, 1984 9:00:29 am PDT)

Sturgis, June 3, 1986 3:07:05 pm PDT

Bill Jackson (bj) September 13, 1989 10:22:29 pm PDT

DIRECTORY

Ascii USING [ SP ],

IO USING [ BreakProc, card, CharClass, Close, GetCard, GetCedarTokenRope, GetTokenRope, int, PutF, PutRope, RIS, rope, RopeFromROS, ROS, SkipWhitespace, STREAM, TokenKind ],

KipperSupport USING [ CreateKipperer, CreateUnKipperer, Kipperer ],

OneCasabaParser,

OneCasabaParserData USING [ KipperParserTableBodyRef, UnKipperParserTableBodyRef ],

OneCasabaParserPrivate USING [ Action, ActionCell, ActionCellBody, ActionTable, ActionTableBody, ParserTableBody, SymbolCell, SymbolCellBody, SymbolTable, SymbolTableBody, TokenCase, TokenCell, TokenCellBody, TokenTable, TokenTableBody ],

Rope USING [ Equal, Fetch, Length, ROPE, Substr ];

OneCasabaParserImpl: CEDAR PROGRAM

IMPORTS IO, KipperSupport, OneCasabaParserData, Rope

EXPORTS OneCasabaParser ~ {

OPEN OneCasabaParser, OneCasabaParserPrivate;

ROPE: TYPE ~ Rope.ROPE;

first the procedures for creating parser info on streams

RecordKipperedParserTableOnStream: PUBLIC PROC [ s: IO.STREAM,
genInfo: OneCasabaParser.GenInfoProc,
getSourceToken: OneCasabaParser.LexicalAnalysisProc ← NIL ] ~ {

lexProc: OneCasabaParser.LexicalAnalysisProc ← IF ( getSourceToken # NIL )
THEN getSourceToken ELSE IO.GetCedarTokenRope;

table: ParserTable ← BuildParserTable[genInfo];

kipperer: KipperSupport.Kipperer ← KipperSupport.CreateKipperer[s];

OneCasabaParserData.KipperParserTableBodyRef[kipperer, table];

s.Close[];

};

RecordReadableParserTableOnStream: PUBLIC PROC [ s: IO.STREAM,
genInfo: OneCasabaParser.GenInfoProc,
getSourceToken: OneCasabaParser.LexicalAnalysisProc ← NIL ] ~ {

lexProc: OneCasabaParser.LexicalAnalysisProc ← IF ( getSourceToken # NIL )
THEN getSourceToken ELSE IO.GetCedarTokenRope;

LocalRecordSymbol: SymbolProc ~
{ s.PutF["sy %g ", IO.rope[name]] };

LocalRecordUniqueToken: UniqueTokenProc ~
{ s.PutF["ut %g %g ", IO.rope[name], IO.rope[spelling]] };

LocalRecordGenericToken: GenericTokenProc ~
{ s.PutF["gt %g %g ", IO.rope[name], IO.rope[class]] };

LocalRecordShift: ShiftProc ~
{ s.PutF["sh %g <%g> %g ", IO.card[state], IO.rope[terminalSeq], IO.card[newState]] };

LocalRecordReduction: ReductionProc ~
{ s.PutF["rd %g <%g> %g %g %g ", IO.card[state], IO.rope[terminalSeq], IO.rope[leftSide], IO.card[ruleNumber], IO.card[ruleSize]] };

LocalRecordAcceptance: AcceptanceProc ~
{ s.PutF["ac %g <%g> ", IO.card[state], IO.rope[terminalSeq]] };

LocalRecordStartState: StartStateProc ~ { s.PutF["start %g ", IO.card[state]] };

genInfo[LocalRecordSymbol, LocalRecordUniqueToken, LocalRecordGenericToken, LocalRecordShift, LocalRecordReduction, LocalRecordAcceptance, LocalRecordStartState];

s.PutRope[" end "];

};

now the types and procedures for building an internal parsetable from parser descriptions on a stream

ParserTable: TYPE ~ REF ParserTableBody;

ParserTableBody: PUBLIC TYPE ~ OneCasabaParserPrivate.ParserTableBody;

BuildParserTableFromKipperedStream: PUBLIC PROC [ s: IO.STREAM,
getSourceToken: LexicalAnalysisProc ← NIL ] RETURNS [ table: ParserTable ] ~ {

lexProc: OneCasabaParser.LexicalAnalysisProc ← IF ( getSourceToken # NIL )
THEN getSourceToken ELSE IO.GetCedarTokenRope;

table ← OneCasabaParserData.UnKipperParserTableBodyRef[ KipperSupport.CreateUnKipperer[s]];

};

BuildParserTableFromReadableStream: PUBLIC PROC [ s: IO.STREAM,
getSourceToken: LexicalAnalysisProc ← NIL ] RETURNS [ table: ParserTable ] ~ {

lexProc: OneCasabaParser.LexicalAnalysisProc ← IF ( getSourceToken # NIL )
THEN getSourceToken ELSE IO.GetCedarTokenRope;

GenInfo: GenInfoProc ~ {

Break: IO.BreakProc ~ {

RETURN[class];

};

GetTermSeq: PROC RETURNS [ text: ROPE ] ~ {

[] ← s.SkipWhitespace[];

text ← s.GetTokenRope[Break].token;

text ← text.Substr[1, text.Length[] - 2];

};

SkipAndGetCard: PROC RETURNS [ c: CARD16 ] ~ {

[] ← s.SkipWhitespace[];

c ← s.GetCard[];

};

firstToken: ROPE ← s.GetTokenRope[Break].token;

SELECT TRUE FROM

firstToken.Equal["sy"] => {

recordSymbol[s.GetTokenRope[Break].token];

};

firstToken.Equal["ut"] => {

spelling: ROPE ~ s.GetTokenRope[Break].token;

recordUniqueToken[name, spelling]

};

firstToken.Equal["gt"] => {

recordGenericToken[name, class];

};

firstToken.Equal["sh"] => {

state: CARD16 ~ SkipAndGetCard[];

terms: ROPE ~ GetTermSeq[];

newState: CARD16 ← SkipAndGetCard[];

recordShift[state, terms, newState];

};

firstToken.Equal["rd"] => {

state: CARD16 ~ SkipAndGetCard[];

terms: ROPE ~ GetTermSeq[];

leftSide: ROPE ~ s.GetTokenRope[Break].token;

ruleNumber: CARD16 ~ SkipAndGetCard[];

ruleSize: CARD16 ~ SkipAndGetCard[];

recordReduction[state, terms, leftSide, ruleNumber, ruleSize];

};

firstToken.Equal["ac"] => {

state: CARD16 ~ SkipAndGetCard[];

terms: ROPE ~ GetTermSeq[];

recordAcceptance[state, terms];

};

firstToken.Equal["start"] => {

recordStartState[SkipAndGetCard[]];

};

firstToken.Equal["end"] => { EXIT };

ENDCASE => { ERROR };

ENDLOOP;

};

table ← BuildParserTable[GenInfo];

};

BuildParserTable: PROC [ genInfo: GenInfoProc ] RETURNS [ table: ParserTable ] ~ {

LocalRecordSymbol: SymbolProc ~
{ RecordSymbol[table, name] };

LocalRecordUniqueToken: UniqueTokenProc ~
{ RecordUniqueToken[table, name, spelling] };

LocalRecordGenericToken: GenericTokenProc ~
{ RecordGenericToken[table, name, class] };

LocalRecordShift: ShiftProc ~
{ RecordShift[table, state, terminalSeq, newState] };

LocalRecordReduction: ReductionProc ~
{ RecordReduction[table, state, terminalSeq, leftSide, ruleNumber, ruleSize] };

LocalRecordAcceptance: AcceptanceProc ~
{ RecordAcceptance[table, state, terminalSeq] };

LocalRecordStartState: StartStateProc ~
{ RecordStartState[table, state] };

table ← CreateParserTable[];

genInfo[LocalRecordSymbol, LocalRecordUniqueToken, LocalRecordGenericToken, LocalRecordShift, LocalRecordReduction, LocalRecordAcceptance, LocalRecordStartState];

AnalyzeActions[table];

};

CreateParserTable: PROC RETURNS [ table: ParserTable ] ~ {

table ← NEW[ParserTableBody];

RecordSymbol[table, ""]; -- the empty seq

RecordTokenInfo[table, "", tokenEOF, "", generic];

};

AnalyzeActions: PROC [ table: ParserTable ] ~ {

actionTable: ActionTable;

next: ActionCell;

nShiftReduceConflicts: INT32 ← 0;

nReduceReduceConflicts: INT32 ← 0;

NoteShiftReduce: PROC [ terminal: CARD16, ruleNumber: CARD16 ] ~
{ nShiftReduceConflicts ← nShiftReduceConflicts.SUCC };

NoteShiftAccept: PROC [ terminal: CARD16 ] ~
{ nShiftReduceConflicts ← nShiftReduceConflicts.SUCC };

NoteReduceAccept: PROC [ terminal: CARD16, ruleNumber: CARD16 ] ~
{ nReduceReduceConflicts ← nReduceReduceConflicts.SUCC };

NoteReduceReduce: PROC [ terminal: CARD16, ruleNumber1, ruleNumber2: CARD16 ] ~
{ nReduceReduceConflicts ← nReduceReduceConflicts.SUCC };

IF table.actionTable # NIL THEN ERROR;

actionTable ← table.actionTable ← NEW[ActionTableBody[table.nActions]];

FOR I: CARD16 IN [0..actionTable.nSlots) DO actionTable.actions[I] ← NIL ENDLOOP;

FOR action: ActionCell ← table.unAnalyzedActions, next WHILE action # NIL DO

hash: CARD16 ← HashOneAction[action.state, action.terminal];

hashCode: CARD16 ← hash MOD actionTable.nSlots;

next ← action.next;

action.next ← actionTable.actions[hashCode];

actionTable.actions[hashCode] ← action;

ENDLOOP;

table.unAnalyzedActions ← NIL;

now remove any ambiguities

resolve shift/reduce by shift

resolve reduce/reduce to the lowest production number

FOR I: CARD16 IN [0..actionTable.nSlots) DO

previousTo1: ActionCell ← NIL;

action1: ActionCell← NIL;

action1Next: ActionCell ← NIL;

FOR action1 ← actionTable.actions[I], action1Next WHILE ( action1 # NIL ) DO

previousTo2: ActionCell ← action1;

action2: ActionCell ← NIL;

action2Next: ActionCell ← NIL;

action1Next ← action1.next;

FOR action2 ← action1.next, action2Next WHILE ( action2 # NIL ) DO

action2Next ← action2.next;

IF action1.state = action2.state

AND action1.terminal = action2.terminal

THEN { -- we have a conflict

SELECT action1.action FROM

shift => {

SELECT action2.action FROM

shift => {

ERROR;

};

reduce => {

NoteShiftReduce[action1.terminal, action2.ruleNumber];

GOTO remove2;

};

accept => {

NoteShiftAccept[action1.terminal];

GOTO remove2;

};

ENDCASE => {

ERROR;

};

reduce => {

SELECT action2.action FROM

shift => {

NoteShiftReduce[action1.terminal, action1.ruleNumber];

GOTO remove1;

};

reduce => {

NoteReduceReduce[action1.terminal, action1.ruleNumber, action2.ruleNumber];

IF action1.ruleNumber < action2.ruleNumber

THEN GOTO remove2

ELSE GOTO remove1;

};

accept => {

NoteReduceAccept[action1.terminal, action1.ruleNumber];

GOTO remove1;

};

ENDCASE => {

ERROR;

};

accept => {

SELECT action2.action FROM

shift => {

NoteShiftAccept[action1.terminal];

GOTO remove2;

};

reduce => {

NoteReduceAccept[action1.terminal, action2.ruleNumber];

GOTO remove2;

};

accept => {

ERROR;

};

ENDCASE => {

ERROR;

};

ENDCASE => ERROR;

EXITS

remove1 => {

IF previousTo2 = action1 THEN previousTo2 ← previousTo1;

action1 ← action1Next;

action1Next ← action1.next;

IF previousTo1 # NIL

THEN previousTo1.next ← action1

ELSE actionTable.actions[I] ← action1;

};

remove2 => {

action2 ← action2Next;

action2Next ← IF action2 # NIL THEN action2.next ELSE NIL;

previousTo2.next ← action2;

};

previousTo2 ← action2;

ENDLOOP;

previousTo1 ← action1;

ENDLOOP;

now report any conflicts that have been artificially removed

IF nReduceReduceConflicts # 0 OR nShiftReduceConflicts # 0 THEN {

reportStream: IO.STREAM ← GetReportStream[];

using GetReportStream for a slightly bastardized purpose

the definitions file says it will only be called during parse, and I don't want to change the definitions file

reportStream.PutF["\n\nWARNING\n"];

IF nReduceReduceConflicts # 0

THEN reportStream.PutF["\t%g reduce/reduce conflicts removed\n", IO.int[nReduceReduceConflicts]];

IF nShiftReduceConflicts # 0

THEN reportStream.PutF["\t%g shift/reduce conflicts removed\n\n", IO.int[nShiftReduceConflicts]];

};

RecordSymbol: PROC [ table: ParserTable, name: ROPE ] ~ {

IF table.symbolTable # NIL THEN ERROR; -- must record all symbols before any actions

table.unAnalyzedSymbols ← NEW[SymbolCellBody←[name, table.nSymbols, table.unAnalyzedSymbols]];

table.nSymbols ← table.nSymbols.SUCC;

};

RecordUniqueToken: PROC [ table: ParserTable, name: ROPE, spelling: ROPE ] ~ {

spellingStream: IO.STREAM ← IO.RIS[spelling];

kind: IO.TokenKind;

the following hack is because GetCedarToken fails on |

of course, the scanner will still fail if it ever sees a |

kind ← IF spelling.Equal["|"]
THEN tokenSINGLE
ELSE spellingStream.GetCedarTokenRope[].tokenKind;

spellingStream.Close[];

IF ( ( kind # tokenID ) AND ( NOT HashKind[kind].includeRope ) ) THEN ERROR;

RecordTokenInfo[table, name, kind, spelling, unique];

};

RecordGenericToken: PROC [table: ParserTable, name: ROPE, class: ROPE] ~ {

kind: IO.TokenKind ← DecodeTokenKind[class];

IF ( ( kind # tokenID ) AND ( HashKind[kind].includeRope ) ) THEN ERROR;

RecordTokenInfo[table, name, kind, "", generic];

};

RecordShift: PROC [ table: ParserTable, state: State, terminalSeq: ROPE, newState: State ] ~ {

IF ( table.symbolTable = NIL ) THEN AnalyzeSymbols[table];

table.unAnalyzedActions ← NEW[ActionCellBody ← [

state: state,

terminal: LookUpSymbol[table.symbolTable, terminalSeq],

action: shift,

nextState: newState,

ruleNumber: 0,

leftSide: 0,

ruleSize: 0,

next: table.unAnalyzedActions

]];

table.nActions ← table.nActions.SUCC;

};

RecordReduction: PROC [ table: ParserTable, state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARD16, ruleSize: CARD16 ] ~ {

IF ( table.symbolTable = NIL ) THEN AnalyzeSymbols[table];

table.unAnalyzedActions ← NEW[ActionCellBody ← [

state: state,

terminal: LookUpSymbol[table.symbolTable, terminalSeq],

action: reduce,

nextState: 0,

ruleNumber: ruleNumber,

leftSide: LookUpSymbol[table.symbolTable, leftSide],

ruleSize: ruleSize,

next: table.unAnalyzedActions

]];

table.nActions ← table.nActions.SUCC;

};

RecordAcceptance: PROC [ table: ParserTable, state: State, terminalSeq: ROPE ] ~ {

IF ( table.symbolTable = NIL ) THEN AnalyzeSymbols[table];

table.unAnalyzedActions ← NEW[ActionCellBody ← [

state: state,

terminal: LookUpSymbol[table.symbolTable, terminalSeq],

action: accept,

nextState: 0,

ruleNumber: 0,

leftSide: 0,

ruleSize: 0,

next: table.unAnalyzedActions

]];

table.nActions ← table.nActions.SUCC;

};

RecordStartState: PROC [ table: ParserTable, state: State ] ~ {

table.startState ← state;

};

RecordTokenInfo: PROC [ table: ParserTable, name: ROPE, kind: IO.TokenKind, spelling: ROPE, case: TokenCase ] ~ {

IF ( table.tokenTable # NIL ) THEN ERROR; -- must record all tokens before any actions

table.unAnalyzedTokens ← NEW[TokenCellBody ←
[name, 0, kind, spelling, case, table.unAnalyzedTokens]];

table.nTokens ← table.nTokens.SUCC;

};

HashOneAction: PROC [ state: CARD16, terminal: CARD16 ]
RETURNS [ hash: CARD16 ] ~ INLINE { hash ← (state * terminal) MOD 256 };

HashKind: PROC [ kind: IO.TokenKind ] RETURNS [ hash: CARD16, includeRope: BOOL ] ~ {

SELECT kind FROM

tokenERROR => RETURN[0, FALSE];

tokenID => RETURN[1, TRUE];

tokenDECIMAL => RETURN[2, FALSE];

tokenOCTAL => RETURN[3, FALSE];

tokenHEX => RETURN[4, FALSE];

tokenREAL => RETURN[5, FALSE];

tokenROPE => RETURN[6, FALSE];

tokenCHAR => RETURN[7, FALSE];

tokenATOM => RETURN[8, FALSE];

tokenSINGLE => RETURN[9, TRUE];

tokenDOUBLE => RETURN[10, TRUE];

tokenCOMMENT => RETURN[11, FALSE];

tokenEOF => RETURN[12, FALSE];

ENDCASE => ERROR;

};

DecodeTokenKind: PROC [ class: ROPE ] RETURNS [ kind: IO.TokenKind ] ~ {

SELECT TRUE FROM

class.Equal["tokenERROR"] => RETURN[tokenERROR];

class.Equal["tokenID"] => RETURN[tokenID];

class.Equal["tokenDECIMAL"] => RETURN[tokenDECIMAL];

class.Equal["tokenOCTAL"] => RETURN[tokenOCTAL];

class.Equal["tokenHEX"] => RETURN[tokenHEX];

class.Equal["tokenREAL"] => RETURN[tokenREAL];

class.Equal["tokenROPE"] => RETURN[tokenROPE];

class.Equal["tokenCHAR"] => RETURN[tokenCHAR];

class.Equal["tokenATOM"] => RETURN[tokenATOM];

class.Equal["tokenSINGLE"] => RETURN[tokenSINGLE];

class.Equal["tokenDOUBLE"] => RETURN[tokenDOUBLE];

class.Equal["tokenCOMMENT"] => RETURN[tokenCOMMENT];

class.Equal["tokenEOF"] => RETURN[tokenEOF];

ENDCASE => ERROR;

};

AnalyzeSymbols: PROC [ table: ParserTable ] ~ {

symbolTable: SymbolTable;

next: SymbolCell;

IF table.symbolTable # NIL THEN ERROR;

symbolTable ← table.symbolTable ← NEW[SymbolTableBody[table.nSymbols]];

FOR I: CARD16 IN [0..symbolTable.nSlots) DO symbolTable.symbols[I] ← NIL ENDLOOP;

FOR symbol: SymbolCell ← table.unAnalyzedSymbols, next WHILE symbol # NIL DO

hash: CARD16 ← HashOneSymbol[symbol.name];

hashCode: CARD16 ← hash MOD symbolTable.nSlots;

next ← symbol.next;

symbol.next ← symbolTable.symbols[hashCode];

symbolTable.symbols[hashCode] ← symbol;

ENDLOOP;

table.unAnalyzedSymbols ← NIL;

AnalyzeTokens[table];

};

LookUpSymbol: PROC [ symbolTable: SymbolTable, name: ROPE ]
RETURNS [ code: CARD16 ] ~ {

hash: CARD16 ← HashOneSymbol[name];

hashCode: CARD16 ← hash MOD symbolTable.nSlots;

FOR symbol: SymbolCell ← symbolTable.symbols[hashCode], symbol.next WHILE symbol # NIL DO

IF symbol.name.Equal[name] THEN RETURN[symbol.code];

ENDLOOP;

ERROR;

};

HashOneSymbol: PROC [ rope: ROPE ] RETURNS [ hash: CARD16 ] ~ {

hash ← 0;

FOR I: INT32 IN [0..rope.Length[]) DO

hash ← hash + LOOPHOLE[rope.Fetch[I], CARD16];

ENDLOOP;

};

AnalyzeTokens: PROC [ table: ParserTable ] ~ {

tokenTable: TokenTable;

next: TokenCell;

IF table.tokenTable # NIL THEN ERROR;

tokenTable ← table.tokenTable ← NEW[TokenTableBody[table.nTokens]];

FOR token: TokenCell ← table.unAnalyzedTokens, next WHILE token # NIL DO

spellingHash: CARD16 ← TokenHash[token.kind, token.spelling].hash;

spellingHashCode: CARD16 ← spellingHash MOD tokenTable.nSlots;

next ← token.next;

token.symbolCode ← LookUpSymbol[table.symbolTable, token.name];

token.next ← tokenTable.tokens[spellingHashCode];

tokenTable.tokens[spellingHashCode] ← token;

IF token.kind = tokenID AND token.case = generic THEN {

IF tokenTable.idToken # NIL THEN ERROR;

tokenTable.idToken ← token;

};

ENDLOOP;

table.unAnalyzedTokens ← NIL;

};

following routines are intended for the output of the GetCedarToken routine in IO.mesa

LookUpCedarToken: PROC [ tokenTable: TokenTable, kind: IO.TokenKind, text: ROPE ]
RETURNS [ code: CARD16, info: ROPE, case: TokenCase ] ~ {

includeTextInHash: BOOL;

hash: CARD16;

hashCode: CARD16;

[hash, includeTextInHash] ← TokenHash[kind, text];

hashCode ← hash MOD tokenTable.nSlots;

IF ( includeTextInHash )

THEN

FOR token: TokenCell ← tokenTable.tokens[hashCode], token.next WHILE token # NIL DO

IF ( ( token.kind = kind ) AND ( token.spelling.Equal[text] ) )
THEN RETURN[token.symbolCode, NIL, token.case];

ENDLOOP

ELSE

FOR token: TokenCell ← tokenTable.tokens[hashCode], token.next WHILE token # NIL DO

IF ( token.kind = kind ) THEN RETURN[token.symbolCode, text, token.case];

ENDLOOP;

IF ( ( kind = tokenID ) AND ( tokenTable.idToken # NIL ) )
THEN RETURN[tokenTable.idToken.symbolCode, text, tokenTable.idToken.case];

ERROR;

};

TokenHash: PROC [ kind: IO.TokenKind, text: ROPE ]
RETURNS [ hash: CARD16, includeTextInHash: BOOL ] ~ {

[hash, includeTextInHash] ← HashKind[kind];

IF ( ( includeTextInHash ) AND ( text # NIL ) ) THEN

FOR I: INT32 IN [0..text.Length[]) DO

hash ← hash + LOOPHOLE[text.Fetch[I], CARD16];

ENDLOOP;

};

LookUpAction: PROC [ actionTable: ActionTable, state: State, terminalCode: CARD16 ]
RETURNS [ cell: ActionCell ← NIL ] ~ {

hash: CARD16 ← HashOneAction[state, terminalCode];

hashCode: CARD16 ← hash MOD actionTable.nSlots;

FOR action: ActionCell ← actionTable.actions[hashCode], action.next
WHILE ( action # NIL ) DO

IF ( ( action.state = state ) AND ( action.terminal = terminalCode ) )
THEN { cell ← action; RETURN };

ENDLOOP;

};

finally the code for actually performing a parse

StackCell: TYPE ~ REF StackCellBody;

StackCellBody: TYPE ~ RECORD [

state: State,

firstPosition, length: INT32,

next: StackCell

];

ReductionCell: TYPE ~ REF ReductionCellBody;

ReductionCellBody: TYPE ~ RECORD [

case: ReductionCellCase,

firstCharPosition: INT32,

if reduce case

rule: CARD16,

length: INT32,

if shift

code: CARD16,

text: ROPE,

tokenCase: TokenCase,

tokenKind: IO.TokenKind,

previous: ReductionCell, -- always a shift

stack: StackCell, -- as it appears just after applying this shift

if accept

nothing

all cases

next: ReductionCell

];

ReductionCellCase: TYPE ~ { reduction, shift, accept };

SimpleTryTokenCase: TYPE ~ { reduction, shift, accept, fail };

SourceTokenCell: TYPE ~ REF SourceTokenCellBody;

SourceTokenCellBody: TYPE ~ RECORD [

kind: IO.TokenKind,

text: ROPE,

position: INT32,

next: SourceTokenCell

];

TrialTokenCell: TYPE ~ REF TrialTokenCellBody;

TrialTokenCellBody: TYPE ~ RECORD [

kind: IO.TokenKind,

text: ROPE,

next: TrialTokenCell

];

MaxTrialInserts: INT32 ← 4;

MaxSkip: INT32 ← 4;

MinSuccesses: INT32 ← 4;

DesiredSuccesses: INT32 ← 12;

FoundAFix: ERROR ~ CODE;

GetReportStream: PUBLIC SIGNAL RETURNS [ s: IO.STREAM ] ~ CODE;

UnRecoverableSyntaxError: PUBLIC ERROR ~ CODE;

Parse: PUBLIC PROC [ table: ParserTable, getSourceToken: GetSourceTokenProc, showReduce: ShowReduceProc ← NIL, showGenericShift: ShowGenericShiftProc ← NIL, showNonGenericShift: ShowNonGenericShiftProc ← NIL ] RETURNS [ accepted: BOOL ] ~ {

this uses a scheme obtained orally from Ed Satterthwaite on Nov 7, 1985.

I am begining to suspect I have not got the search strategy right,

I am breadth first on insertions, but not on deletions?

state stack and saved reductions for later play to caller

old stack is as the stack appears just before applying firstRed

current stack is as the stack appears just after appling the last reduction chained from firstRed

oldStack: StackCell ← NEW[StackCellBody ← [table.startState, 0, 0, NIL]];

currentStack: StackCell ← oldStack;

these reduction cells form a linear list from firstRed to LastRed

last shift points into this list.

firstRed: ReductionCell ← NIL;

lastShift: ReductionCell ← NIL; -- always a shift

lastRed: ReductionCell ← NIL;

results in a shift, a single reduction, an acceptance, or a failure

SimpleTryToken: PROC [ tokenKind: IO.TokenKind, tokenText: ROPE, position: INT32 ] RETURNS [ SimpleTryTokenCase ] ~ {

terminalSeqCode: CARD16;

terminalSeqInfo: ROPE;

tokenCase: TokenCase;

action: ActionCell;

[terminalSeqCode, terminalSeqInfo, tokenCase] ← LookUpCedarToken[table.tokenTable, tokenKind, tokenText];

action ← LookUpAction[table.actionTable, currentStack.state, terminalSeqCode];

IF action = NIL THEN RETURN[fail];

SELECT action.action FROM

shift => {

redCell: ReductionCell;

currentStack ← NEW[StackCellBody ← [action.nextState, position, Rope.Length[tokenText], currentStack]];

redCell ← NEW[ReductionCellBody ← [shift, position, , , terminalSeqCode, tokenText, tokenCase, tokenKind, lastShift, currentStack, NIL]];

IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;

lastRed ← redCell;

lastShift ← redCell;

RETURN[shift];

};

reduce => {

shift: ActionCell;

firstPosition: INT32 ← position;

limitPosition: INT32 ← IF action.ruleSize = 0

THEN position

ELSE currentStack.firstPosition + currentStack.length;

redCell: ReductionCell;

FOR I: CARD16 IN [0..action.ruleSize) DO

firstPosition ← currentStack.firstPosition;

currentStack ← currentStack.next;

ENDLOOP;

redCell ← NEW[ReductionCellBody ← [reduction, firstPosition, action.ruleNumber, limitPosition-firstPosition, , , , , , , NIL]];

IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;

lastRed ← redCell;

shift ← LookUpAction[table.actionTable, currentStack.state, action.leftSide];

IF shift.action # shift THEN ERROR;

currentStack ← NEW[StackCellBody ← [shift.nextState, firstPosition, limitPosition-firstPosition, currentStack]];

RETURN[reduction]

};

accept => {

redCell: ReductionCell ← NEW[ReductionCellBody ← [accept, , , , , , , , , , NIL]];

IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;

lastRed ← redCell;

RETURN[accept];

};

ENDCASE => ERROR;

};

repeats calls on SimpleTryToken, so that all reductions are done, up to and including a shift, acceptance, or failure.

FullTryToken: PROC [ tokenKind: IO.TokenKind, tokenText: ROPE, position: INT32 ]
RETURNS [ SimpleTryTokenCase ] ~ {

SELECT SimpleTryToken[tokenKind, tokenText, position] FROM

reduction => LOOP;

shift => RETURN[shift];

accept => RETURN[accept];

fail => {

ResetToShift[];

RETURN[fail];

};

ENDCASE => ERROR;

ENDLOOP;

};

ResetStack: PROC ~ {

currentStack ← oldStack;

firstRed ← lastShift ← lastRed ← NIL;

};

plays successive reductions up to and including a shift or acceptance

Play1TokenWorth: PROC RETURNS [ accepted: BOOL ] ~ {

SELECT firstRed.case FROM

reduction => {

IF showReduce # NIL

THEN showReduce[firstRed.rule, firstRed.firstCharPosition, firstRed.length];

firstRed ← firstRed.next;

IF firstRed = NIL

THEN lastShift ← lastRed ← NIL;

};

shift => {

IF firstRed.tokenCase = generic

THEN {

IF showGenericShift # NIL

THEN showGenericShift[firstRed.code, firstRed.tokenKind, firstRed.text, firstRed.firstCharPosition];

}

ELSE {

IF showNonGenericShift # NIL

THEN showNonGenericShift[firstRed.text, firstRed.firstCharPosition];

};

firstRed.previous ← NIL;

oldStack ← firstRed.stack;

firstRed ← firstRed.next;

IF firstRed = NIL

THEN lastShift ← lastRed ← NIL;

RETURN[FALSE];

};

accept => {

firstRed ← firstRed.next;

IF firstRed = NIL

THEN lastShift ← lastRed ← NIL;

RETURN[TRUE];

};

ENDCASE => ERROR;

ENDLOOP;

};

ResetToShift: PROC ~ {

IF lastShift # NIL

THEN {

lastRed ← lastShift;

lastRed.next ← NIL;

currentStack ← lastRed.stack;

}

ELSE {

lastRed ← NIL;

currentStack ← oldStack;

};

Reset1TokenWorth: PROC ~ {

lastRed ← lastShift;

lastShift ← lastRed.previous;

lastRed.previous ← NIL;

ResetToShift[];

};

ResetNTokensWorth: PROC [n: CARD16] ~ {

FOR I: CARD16 IN [0..n) DO Reset1TokenWorth[] ENDLOOP;

};

now we deal with delivery of tokens. this mechanism allows back up and replay of tokens without any assumptions about the ultimate source of tokens

firstToken: SourceTokenCell ← NIL;

currentToken: SourceTokenCell ← NIL;

lastToken: SourceTokenCell ← NIL;

GetToken: PROC RETURNS [ IO.TokenKind, ROPE, INT32 ] ~ {

t: SourceTokenCell;

IF currentToken = NIL THEN {

currentToken ← NEW[SourceTokenCellBody];

[currentToken.kind, currentToken.text, currentToken.position] ← getSourceToken[];

IF firstToken # NIL

THEN lastToken.next ← currentToken

ELSE firstToken ← currentToken;

lastToken ← currentToken;

};

t ← currentToken;

currentToken ← currentToken.next;

RETURN[t.kind, t.text, t.position];

};

ResetTokens: PROC ~ {

currentToken ← firstToken;

};

DeleteOneToken: PROC ~ {

IF currentToken = firstToken THEN currentToken ← firstToken.next;

firstToken ← firstToken.next;

};

syntactic error recovery involves trying the deletion of some incomming tokens and the insertion of other plausable tokens. The following data records current best such attempt as well as the currently explored attempt.

currentInsertions: TrialTokenCell ← NIL;

bestInsertions: TrialTokenCell ← NIL;

bestDeletions: CARD16 ← 0;

bestScore: INT32 ← 0;

BackUpOneTrialInsertion: PROC ~ {

currentInsertions ← currentInsertions.next;

};

AddOneTrialInsertion: PROC [ kind: IO.TokenKind, text: ROPE ] ~ {

cell: TrialTokenCell ← NEW[TrialTokenCellBody ← [kind, text, currentInsertions]];

currentInsertions ← cell;

};

RecordTrialScore: PROC [ nDeletions: INT32, score: INT32 ] ~ {

IF score > bestScore THEN {

bestInsertions ← currentInsertions;

bestDeletions ← nDeletions;

bestScore ← score;

};

RecordFoundFix: PROC [ nDeletions: INT32 ] ~ {

bestInsertions ← currentInsertions;

bestDeletions ← nDeletions;

bestScore ← DesiredSuccesses;

};

ResetTrials: PROC ~ {

currentInsertions ← bestInsertions ← NIL;

bestDeletions ← bestScore ← 0;

};

PlayTrialInsertions: PROC [ for: PROC [ IO.TokenKind, ROPE ] ] ~ {

SubPlayTrialInsertions: PROC [ cell: TrialTokenCell ] ~ {

IF cell.next # NIL THEN SubPlayTrialInsertions[cell.next];

for[cell.kind, cell.text];

};

IF bestInsertions # NIL THEN SubPlayTrialInsertions[bestInsertions];

};

ExploreForBestRecovery: PROC [ nInsertsSoFar: INT32, tryNDeletions: INT32 ] ~ {

first see if we can get away by trying an appropriate number of deletions without any insertions

FOR I: INT32 IN [0..tryNDeletions) DO [] ← GetToken[] ENDLOOP;

FOR I: INT32 IN [0..DesiredSuccesses) DO

tKind: IO.TokenKind; tText: ROPE; tPosition: INT32;

[tKind, tText, tPosition] ← GetToken[];

SELECT FullTryToken[tKind, tText, tPosition] FROM

shift => IF I.SUCC # DesiredSuccesses THEN LOOP

ELSE -- we parsed ahead as far as we wanted, go with this

{RecordFoundFix[tryNDeletions]; ResetNTokensWorth[I.SUCC]; FoundAFix[]};

accept => {RecordFoundFix[tryNDeletions]; ResetNTokensWorth[I]; FoundAFix[]};

fail => {

we couldn't parse as many input tokens as we wanted

but we got enough to record

IF I >= MinSuccesses

THEN RecordTrialScore[tryNDeletions, I];

ResetNTokensWorth[I];

EXIT;

};

ENDCASE => ERROR;

ENDLOOP; -- success loop

we didn't parse ahead as far as we wanted, so reset

ResetTokens[];

now try each plausable inserted token, but with one fewer deletions

IF tryNDeletions > 0 THEN {

SeeOneTokenToInsert: PROC [ kind: IO.TokenKind, text: ROPE ] ~ {

AddOneTrialInsertion[kind, text];

SELECT FullTryToken[kind, text, 0] FROM

shift => {

ExploreForBestRecovery[nInsertsSoFar.SUCC, tryNDeletions-1];

Reset1TokenWorth[];

};

fail => NULL;

accept => ResetToShift[];

ENDCASE => ERROR;

BackUpOneTrialInsertion[];

};

GenPlausableInsertedTokens[table, currentStack.state, SeeOneTokenToInsert];

this might generate FoundAFix, which will be caught by our ultimate caller

};

following is the main loop that drives the parser

successfulTrys: CARD16 ← 0;

kind: IO.TokenKind;

text: ROPE;

position: INT32;

[kind, text, position] ← GetToken[];

SELECT FullTryToken[kind, text, position] FROM

shift => {

IF successfulTrys = 0

THEN successfulTrys ← 1

ELSE {

two source tokens in a row have worked, so show first to caller

IF Play1TokenWorth[] THEN ERROR; -- can't produce an acceptance

DeleteOneToken[]; -- from the saved input tokens

};

accept => {

IF Play1TokenWorth[] THEN {

IF successfulTrys = 1 THEN ERROR ELSE RETURN[TRUE];

};

IF successfulTrys # 1 THEN ERROR;

IF Play1TokenWorth[] THEN RETURN[TRUE];

ERROR; -- one of them should have produced an acceptance

};

fail => {

first forget what we have seen

successfulTrys ← 0;

ResetStack[];

ResetTokens[];

ResetTrials[];

now conduct an exploration for a replacement

FOR diag: INT32 IN [0..MaxSkip] DO

ExploreForBestRecovery[0, diag

! FoundAFix => {

found a replacement that results in acceptance or good match

we could try to replay the current stack, but it is easier to program using the code for "go with the best".

EXIT

}

];

ENDLOOP;

didn't not find a replacement that resulted in acceptance or good match

so, go with the best

(or, did find one, and are defaulting to this code)

{

reportStream: IO.STREAM ← GetReportStream[];

msgStream: IO.STREAM ← IO.ROS[];

nInsertions: CARD16 ← 0;

ReportOneInsertion: PROC [ kind: IO.TokenKind, text: ROPE ] ~ {

IO.PutF[msgStream, " %g", IO.rope[text]];

};

TryOneTrialInsertion: PROC [ kind: IO.TokenKind, text: ROPE ] ~ {

nInsertions ← nInsertions.SUCC;

SELECT FullTryToken[kind, text, 0] FROM

shift, accept => RETURN;

fail => ERROR;

ENDCASE;

};

ResetStack[];

ResetTokens[];

IO.PutF[msgStream, "\n\nSyntaxError at %g\n", IF firstToken=NIL THEN IO.rope["<unknown place>"] ELSE IO.int[firstToken.position]];

IF bestDeletions # 0 THEN IO.PutF[msgStream, "\tdeleting"];

FOR I: CARD16 IN [0..bestDeletions) DO

text: ROPE;

[, text, ] ← GetToken[];

IO.PutF[msgStream, " %g", IO.rope[text]];

ENDLOOP;

ResetTokens[];

IO.PutF[msgStream, "\tinserting"];

PlayTrialInsertions[ReportOneInsertion];

IO.PutF[msgStream, "\n\n"];

reportStream.PutF["%g", IO.rope[IO.RopeFromROS[msgStream]]];

IF bestScore < MinSuccesses THEN UnRecoverableSyntaxError[];

FOR I: CARD16 IN [0..bestDeletions) DO DeleteOneToken[] ENDLOOP;

PlayTrialInsertions[TryOneTrialInsertion];

FOR I: CARD16 IN [0..nInsertions) DO

IF Play1TokenWorth[] THEN {

IF I = (nInsertions - 1) THEN RETURN[TRUE] ELSE ERROR;

};

ENDLOOP;

successfulTrys ← 0;

};

ENDCASE => ERROR;

ENDLOOP;

};

GenPlausableInsertedTokens: PROC [ table: ParserTable, state: State,
for: PROC [ IO.TokenKind, ROPE ] ] ~ {

we try each possible terminal token, and generate those that result in a non NIL actionCell

eventually we want to control the order in which the terminal tokens are tried

FOR cellX: CARD16 IN [0..table.tokenTable.nSlots) DO

FOR trialTokenCell: TokenCell ← table.tokenTable.tokens[cellX], trialTokenCell.next WHILE trialTokenCell # NIL DO

actionCell: ActionCell ← LookUpAction[table.actionTable, state, trialTokenCell.symbolCode];

IF actionCell # NIL THEN {

text: ROPE ← IF trialTokenCell.case = unique THEN trialTokenCell.spelling ELSE

SELECT trialTokenCell.kind FROM

tokenID => "id",

tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL => "num",

tokenROPE => "rope",

tokenCHAR => "char",

tokenATOM => "atom",

tokenEOF => "eof",

ENDCASE => ERROR;

for[trialTokenCell.kind, text];

};

ENDLOOP;

};