-- NewParserImpl.mesa
-- last edit September 18, 1984 9:00:29 am PDT
-- Sturgis, January 2, 1986 11:03:23 am PST
DIRECTORY
BasicTime USING[GMT, Period],
Commander USING[CommandProc, Handle, Register],
FS USING[Close, ComponentPositions, Error, ExpandName, GetInfo, GetName, Open, OpenFile, OpenFileFromStream, StreamFromOpenFile, StreamOpen],
IO USING[STREAM, CharClass, Close, EndOfStream, Error, GetCard, GetCedarTokenRope, GetTokenRope, IDProc, int, PutF, PutRope, card, RIS, rope, RopeFromROS, ROS, SP, SkipWhitespace, TokenKind],
NewParserData USING[KipperParserTable, UnKipperParserTable],
NewParserPrivate USING[ParserTableBody, SymbolTable, SymbolTableBody, SymbolCell, SymbolCellBody, TokenTable, TokenTableBody, TokenCell, TokenCellBody, TokenCase, ActionTable, ActionTableBody, ActionCell, ActionCellBody, Action],
KipperSupport USING[CreateKipperer, CreateUnKipperer, Kipperer, UnKipperer],
Parser USING[State],
ParserExtra USING[],
ProcessProps USING[GetProp],
Rope USING[Cat, Equal, Fetch, Length, ROPE, Substr];
NewParserImpl: CEDAR PROGRAM IMPORTS BasicTime, Commander, FS, IO, KipperSupport, NewParserData, ProcessProps, Rope EXPORTS Parser, ParserExtra =
BEGIN OPEN IO, NewParserPrivate, Parser, Rope;
ParserTable: TYPE = REF ParserTableBody;
ParserTableBody: PUBLIC TYPE = NewParserPrivate.ParserTableBody;
-- parser table procedures
-- issue: want to use either EOF or client specified terminator
CreateParserTable: PROC RETURNS[ParserTable] =
BEGIN
table: ParserTable ← NEW[ParserTableBody];
RecordSymbol[table, ""]; -- the empty seq
RecordTokenInfo[table, "", tokenEOF, "", generic];
RETURN[table];
END;
RecordSymbol: PROC[table: ParserTable, name: ROPE] =
BEGIN
IF table.symbolTable # NIL THEN ERROR; -- must record all symbols before any actions
table.unAnalyzedSymbols ← NEW[SymbolCellBody←[name, table.nSymbols, table.unAnalyzedSymbols]];
table.nSymbols ← table.nSymbols+1;
END;
RecordUniqueToken: PROC[table: ParserTable, name: ROPE, spelling: ROPE] =
BEGIN
spellingStream: STREAM ← RIS[spelling];
kind: TokenKind;
-- the following hack is because GetCedarToken fails on |
-- of course, the scanner will still fail if it ever sees a |
IF Equal[spelling, "|"] THEN
BEGIN
kind ← tokenSINGLE;
END
ELSE
BEGIN
kind ← GetCedarTokenRope[spellingStream].tokenKind;
END;
Close[spellingStream];
IF kind # tokenID AND NOT HashKind[kind].includeRope THEN ERROR;
RecordTokenInfo[table, name, kind, spelling, unique];
END;
RecordGenericToken: PROC[table: ParserTable, name: ROPE, class: ROPE] =
BEGIN
kind: TokenKind ← DecodeTokenKind[class];
IF kind # tokenID AND HashKind[kind].includeRope THEN ERROR;
RecordTokenInfo[table, name, kind, "", generic];
END;
RecordTokenInfo: PROC[table: ParserTable, name: ROPE, kind: TokenKind, spelling: ROPE, case: TokenCase] =
BEGIN
IF table.tokenTable # NIL THEN ERROR; -- must record all tokens before any actions
table.unAnalyzedTokens ← NEW[TokenCellBody←[name, 0, kind, spelling, case, table.unAnalyzedTokens]];
table.nTokens ← table.nTokens+1;
END;
RecordShift: PROC[table: ParserTable, state: State, terminalSeq: ROPE, newState: State] =
BEGIN
IF table.symbolTable = NIL THEN AnalyzeSymbols[table];
table.unAnalyzedActions ← NEW[ActionCellBody ←[
state: state,
terminal: LookUpSymbol[table.symbolTable, terminalSeq],
action: shift,
nextState: newState,
ruleNumber: 0,
leftSide: 0,
ruleSize: 0,
next: table.unAnalyzedActions]];
table.nActions ← table.nActions+1;
END;
RecordReduction: PROC[table: ParserTable, state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL] =
BEGIN
IF table.symbolTable = NIL THEN AnalyzeSymbols[table];
table.unAnalyzedActions ← NEW[ActionCellBody ←[
state: state,
terminal: LookUpSymbol[table.symbolTable, terminalSeq],
action: reduce,
nextState: 0,
ruleNumber: ruleNumber,
leftSide: LookUpSymbol[table.symbolTable, leftSide],
ruleSize: ruleSize,
next: table.unAnalyzedActions]];
table.nActions ← table.nActions+1;
END;
RecordAcceptance: PROC[table: ParserTable, state: State, terminalSeq: ROPE] =
BEGIN
IF table.symbolTable = NIL THEN AnalyzeSymbols[table];
table.unAnalyzedActions ← NEW[ActionCellBody ←[
state: state,
terminal: LookUpSymbol[table.symbolTable, terminalSeq],
action: accept,
nextState: 0,
ruleNumber: 0,
leftSide: 0,
ruleSize: 0,
next: table.unAnalyzedActions]];
table.nActions ← table.nActions+1;
END;
RecordStartState: PROC[table: ParserTable, state: State] =
{table.startState ← state};
-- Symbol Table Procedures
AnalyzeSymbols: PROC[table: ParserTable] =
BEGIN
symbolTable: SymbolTable;
next: SymbolCell;
IF table.symbolTable # NIL THEN ERROR;
symbolTable ← table.symbolTable ← NEW[SymbolTableBody[table.nSymbols]];
FOR I: CARDINAL IN [0..symbolTable.nSlots) DO symbolTable.symbols[I] ← NIL ENDLOOP;
FOR symbol: SymbolCell ← table.unAnalyzedSymbols, next WHILE symbol # NIL DO
hash: CARDINAL ← HashOneSymbol[symbol.name];
hashCode: CARDINAL ← hash MOD symbolTable.nSlots;
next ← symbol.next;
symbol.next ← symbolTable.symbols[hashCode];
symbolTable.symbols[hashCode] ← symbol;
ENDLOOP;
table.unAnalyzedSymbols ← NIL;
AnalyzeTokens[table];
END;
AnalyzeTokens: PROC[table: ParserTable] =
BEGIN
tokenTable: TokenTable;
next: TokenCell;
IF table.tokenTable # NIL THEN ERROR;
tokenTable ← table.tokenTable ← NEW[TokenTableBody[table.nTokens]];
FOR token: TokenCell ← table.unAnalyzedTokens, next WHILE token # NIL DO
spellingHash: CARDINAL ← TokenHash[token.kind, token.spelling].hash;
spellingHashCode: CARDINAL ← spellingHash MOD tokenTable.nSlots;
next ← token.next;
token.symbolCode ← LookUpSymbol[table.symbolTable, token.name];
token.next ← tokenTable.tokens[spellingHashCode];
tokenTable.tokens[spellingHashCode] ← token;
IF token.kind = tokenID AND token.case = generic THEN
BEGIN
IF tokenTable.idToken # NIL THEN ERROR;
tokenTable.idToken ← token;
END;
ENDLOOP;
table.unAnalyzedTokens ← NIL;
END;
LookUpSymbol: PROC[symbolTable: SymbolTable, name: ROPE] RETURNS[code: CARDINAL] =
BEGIN
hash: CARDINAL ← HashOneSymbol[name];
hashCode: CARDINAL ← hash MOD symbolTable.nSlots;
FOR symbol: SymbolCell ← symbolTable.symbols[hashCode], symbol.next WHILE symbol # NIL DO
IF Equal[symbol.name, name] THEN RETURN[symbol.code];
ENDLOOP;
ERROR;
END;
HashOneSymbol: PROC[rope: ROPE] RETURNS[hash: CARDINAL] =
BEGIN
hash ← 0;
FOR I: INT IN [0..Length[rope]) DO
hash ← hash + LOOPHOLE[Fetch[rope, I], CARDINAL];
ENDLOOP;
END;
-- following routines are intended for the output of the GetCedarToken routine in IO.mesa
LookUpCedarToken: PROC[tokenTable: TokenTable, kind: TokenKind, text: ROPE] RETURNS[code: CARDINAL, info: ROPE, case: TokenCase] =
BEGIN
includeTextInHash: BOOLEAN;
hash: CARDINAL;
hashCode: CARDINAL;
[hash, includeTextInHash] ← TokenHash[kind, text];
hashCode ← hash MOD tokenTable.nSlots;
IF includeTextInHash THEN
FOR token: TokenCell ← tokenTable.tokens[hashCode], token.next WHILE token # NIL DO
IF token.kind = kind AND Equal[token.spelling, text] THEN RETURN[token.symbolCode, NIL, token.case];
ENDLOOP
ELSE
FOR token: TokenCell ← tokenTable.tokens[hashCode], token.next WHILE token # NIL DO
IF token.kind = kind THEN RETURN[token.symbolCode, text, token.case];
ENDLOOP;
IF kind = tokenID AND tokenTable.idToken # NIL THEN RETURN[tokenTable.idToken.symbolCode, text, tokenTable.idToken.case];
ERROR;
END;
TokenHash: PROC[kind: TokenKind, text: ROPE] RETURNS[hash: CARDINAL, includeTextInHash: BOOLEAN] =
BEGIN
[hash, includeTextInHash] ← HashKind[kind];
IF includeTextInHash AND text # NIL THEN
FOR I: INT IN [0..Length[text]) DO
hash ← hash + LOOPHOLE[Fetch[text, I], CARDINAL];
ENDLOOP;
END;
HashKind: PROC[kind: TokenKind] RETURNS[hash: CARDINAL, includeRope: BOOLEAN] =
BEGIN
SELECT kind FROM
tokenERROR => RETURN[0, FALSE];
tokenID => RETURN[1, TRUE];
tokenDECIMAL => RETURN[2, FALSE];
tokenOCTAL => RETURN[3, FALSE];
tokenHEX => RETURN[4, FALSE];
tokenREAL => RETURN[5, FALSE];
tokenROPE => RETURN[6, FALSE];
tokenCHAR => RETURN[7, FALSE];
tokenATOM => RETURN[8, FALSE];
tokenSINGLE => RETURN[9, TRUE];
tokenDOUBLE => RETURN[10, TRUE];
tokenCOMMENT => RETURN[11, FALSE];
tokenEOF => RETURN[12, FALSE];
ENDCASE => ERROR;
END;
DecodeTokenKind: PROC[class: ROPE] RETURNS[TokenKind] =
BEGIN
SELECT TRUE FROM
Equal[class, "tokenERROR"] => RETURN[tokenERROR];
Equal[class, "tokenID"] => RETURN[tokenID];
Equal[class, "tokenDECIMAL"] => RETURN[tokenDECIMAL];
Equal[class, "tokenOCTAL"] => RETURN[tokenOCTAL];
Equal[class, "tokenHEX"] => RETURN[tokenHEX];
Equal[class, "tokenREAL"] => RETURN[tokenREAL];
Equal[class, "tokenROPE"] => RETURN[tokenROPE];
Equal[class, "tokenCHAR"] => RETURN[tokenCHAR];
Equal[class, "tokenATOM"] => RETURN[tokenATOM];
Equal[class, "tokenSINGLE"] => RETURN[tokenSINGLE];
Equal[class, "tokenDOUBLE"] => RETURN[tokenDOUBLE];
Equal[class, "tokenCOMMENT"] => RETURN[tokenCOMMENT];
Equal[class, "tokenEOF"] => RETURN[tokenEOF];
ENDCASE => ERROR;
END;
-- exported Parser procedures
ParserInstance: TYPE = REF ParserBody;
ParserBody: PUBLIC TYPE = RECORD[
table: ParserTable,
stack: StackCell];
StackCell: TYPE = REF StackCellBody;
StackCellBody: TYPE = RECORD[
state: State,
firstPosition, length: INT,
next: StackCell];
BuildParserTable: PROC[genInfo: PROC[
recordSymbol: PROC[name: ROPE],
recordUniqueToken: PROC[name: ROPE, spelling: ROPE],
recordGenericToken: PROC[name: ROPE, class: ROPE],
recordShift: PROC[state: State, terminalSeq: ROPE, newState: State],
recordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL],
recordAcceptance: PROC[state: State, terminalSeq: ROPE],
recordStartState: PROC[state: State]]] RETURNS[ParserTable] =
BEGIN
table: ParserTable ← CreateParserTable[];
LocalRecordSymbol: PROC[symbol: ROPE] = {RecordSymbol[table, symbol]};
LocalRecordUniqueToken: PROC[name: ROPE, spelling: ROPE] ={RecordUniqueToken[table, name, spelling]};
LocalRecordGenericToken: PROC[name: ROPE, class: ROPE] = {RecordGenericToken[table, name, class]};
LocalRecordShift: PROC[state: State, terminalSeq: ROPE, newState: State] =
{RecordShift[table, state, terminalSeq, newState]};
LocalRecordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL] =
{RecordReduction[table, state, terminalSeq, leftSide, ruleNumber, ruleSize]};
LocalRecordAcceptance: PROC[state: State, terminalSeq: ROPE] =
{RecordAcceptance[table, state, terminalSeq]};
LocalRecordStartState: PROC[state: State] = {RecordStartState[table, state]};
genInfo[LocalRecordSymbol, LocalRecordUniqueToken, LocalRecordGenericToken, LocalRecordShift, LocalRecordReduction, LocalRecordAcceptance, LocalRecordStartState];
AnalyzeActions[table];
RETURN[table]
END;
CreateParser: PUBLIC PROC[genInfo: PROC[
recordSymbol: PROC[name: ROPE],
recordUniqueToken: PROC[name: ROPE, spelling: ROPE],
recordGenericToken: PROC[name: ROPE, class: ROPE],
recordShift: PROC[state: State, terminalSeq: ROPE, newState: State],
recordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL],
recordAcceptance: PROC[state: State, terminalSeq: ROPE],
recordStartState: PROC[state: State]]] RETURNS[ParserInstance] =
{RETURN[BuildParserFromTable[BuildParserTable[genInfo]]]};
BuildParserFromTable: PUBLIC PROC[table: ParserTable] RETURNS[ParserInstance] =
BEGIN
cell: StackCell;
parser: ParserInstance;
cell ← NEW[StackCellBody ← [table.startState, 0, 0, NIL]];
parser ← NEW[ParserBody ← [table, cell]];
RETURN[parser]
END;
RecordParserTableOnStream: PUBLIC PROC[s: STREAM, genInfo: PROC[
recordSymbol: PROC[symbol: ROPE],
recordUniqueToken: PROC[name: ROPE, spelling: ROPE],
recordGenericToken: PROC[name: ROPE, class: ROPE],
recordShift: PROC[state: State, terminalSeq: ROPE, newState: State],
recordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL],
recordAcceptance: PROC[state: State, terminalSeq: ROPE],
recordStartState: PROC[state: State]]] =
BEGIN
LocalRecordSymbol: PROC[symbol: ROPE] =
{PutF[s, "sy %g ", rope[symbol]]};
LocalRecordUniqueToken: PROC[name: ROPE, spelling: ROPE] =
{PutF[s, "ut %g %g ", rope[name], rope[spelling]]};
LocalRecordGenericToken: PROC[name: ROPE, class: ROPE] =
{PutF[s, "gt %g %g ", rope[name], rope[class]]};
LocalRecordShift: PROC[state: State, terminalSeq: ROPE, newState: State] =
{PutF[s, "sh %g <%g> %g ", card[state], rope[terminalSeq], card[newState]]};
LocalRecordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL] =
{PutF[s, "rd %g <%g> %g %g %g ", card[state], rope[terminalSeq], rope[leftSide], card[ruleNumber], card[ruleSize]]};
LocalRecordAcceptance: PROC[state: State, terminalSeq: ROPE] =
{PutF[s, "ac %g <%g> ", card[state], rope[terminalSeq]]};
LocalRecordStartState: PROC[state: State] = {PutF[s, "start %g ", card[state]]};
genInfo[LocalRecordSymbol, LocalRecordUniqueToken, LocalRecordGenericToken, LocalRecordShift, LocalRecordReduction, LocalRecordAcceptance, LocalRecordStartState];
PutRope[s, " end "];
END;
CreateParserFromStream: PUBLIC PROC[s: STREAM] RETURNS[ParserInstance] =
{RETURN[BuildParserFromTable[BuildParserTableFromStream[s]]]};
Following two procedures are used for building parser tables from info recorded on files. BuildParserTableFromStreamFallBack uses the old style tables, and BuildParserTableFromStream tries to use the new kippered format. It does so in a very awkward manner that will eventually be replaced. This awkwardness is designed to support old interfaces that are used by machine generated code. BuildParserTableFromStream assumes that it has been handed a stream backed by the old format file, it tries to find a new format file of a later date and if successful uses the new format file. One fix would be to do away with the old format entirely, but I am not sure I will take that route.
BuildParserTableFromStreamFallBack: PROC[s: STREAM] RETURNS[ParserTable] =
BEGIN
GenInfo: PROC[
recordSymbol: PROC[ROPE],
recordUniqueToken: PROC[ROPE, ROPE],
recordGenericToken: PROC[ROPE, ROPE],
recordShift: PROC[State, ROPE, State],
recordReduction: PROC[State, ROPE, ROPE, CARDINAL, CARDINAL],
recordAcceptance: PROC[State, ROPE],
recordStartState: PROC[State]] =
BEGIN
Break: PROC [char: CHAR] RETURNS [CharClass] =
{RETURN[IF char = SP THEN sepr ELSE other]};
WHILE TRUE DO
firstToken: ROPE ← GetTokenRope[s, Break].token;
GetTermSeq: PROC RETURNS[ROPE] =
BEGIN
text: ROPE;
[] ← SkipWhitespace[s];
text ← GetTokenRope[s, Break].token;
RETURN[Substr[text, 1, Length[text]-2]];
END;
SkipAndGetCard: PROC RETURNS[CARDINAL] =
BEGIN
[] ← SkipWhitespace[s];
RETURN[GetCard[s]];
END;
SELECT TRUE FROM
Equal[firstToken, "sy"] =>
recordSymbol[GetTokenRope[s, Break].token];
Equal[firstToken, "ut"] =>
BEGIN
name: ROPE ← GetTokenRope[s, Break].token;
spelling: ROPE ← GetTokenRope[s, Break].token;
recordUniqueToken[name, spelling]
END;
Equal[firstToken, "gt"] =>
BEGIN
name: ROPE ← GetTokenRope[s, Break].token;
class: ROPE ← GetTokenRope[s, Break].token;
recordGenericToken[name, class];
END;
Equal[firstToken, "sh"] =>
BEGIN
state: CARDINAL ← SkipAndGetCard[];
terms: ROPE ← GetTermSeq[];
newState: CARDINAL ← SkipAndGetCard[];
recordShift[state, terms, newState];
END;
Equal[firstToken, "rd"] =>
BEGIN
state: CARDINAL ← SkipAndGetCard[];
terms: ROPE ← GetTermSeq[];
leftSide: ROPE ← GetTokenRope[s, Break].token;
ruleNumber: CARDINAL ← SkipAndGetCard[];
ruleSize: CARDINAL ← SkipAndGetCard[];
recordReduction[state, terms, leftSide, ruleNumber, ruleSize];
END;
Equal[firstToken, "ac"] =>
BEGIN
state: CARDINAL ← SkipAndGetCard[];
terms: ROPE ← GetTermSeq[];
recordAcceptance[state, terms];
END;
Equal[firstToken, "start"] =>
recordStartState[SkipAndGetCard[]];
Equal[firstToken, "end"] => EXIT;
ENDCASE => ERROR;
ENDLOOP;
END;
RETURN[BuildParserTable[GenInfo]];
END;
BuildParserTableFromStream: PUBLIC PROC[s: STREAM] RETURNS[ParserTable] =
BEGIN
unKipperedName: Rope.ROPE;
kipperedName: Rope.ROPE;
fullName: Rope.ROPE; components: FS.ComponentPositions;
TRUSTED BEGIN
backingFile: FS.OpenFile ← FS.OpenFileFromStream[s
! IO.Error => GOTO dontTry];
backingFileCreateDate: BasicTime.GMT ← FS.GetInfo[backingFile].created;
kipperedFile: FS.OpenFile;
kipperedFileCreateDate: BasicTime.GMT;
kipperedStream: IO.STREAM;
unkipperer: KipperSupport.UnKipperer;
table: ParserTable;
--cmd: Commander.Handle;
unKipperedName ← FS.GetName[backingFile].fullFName;
[fullName, components] ← FS.ExpandName[unKipperedName];
IF NOT Rope.Equal["parseTables", Rope.Substr[fullName, components.ext.start, components.ext.length]] THEN GOTO dontTry;
kipperedName ← Rope.Cat[Rope.Substr[fullName, components.base.start, components.base.length], ".kipperedTables"];
kipperedFile ← FS.Open[kipperedName
! FS.Error => GOTO dontTry];
kipperedFileCreateDate ← FS.GetInfo[kipperedFile].created;
IF BasicTime.Period[backingFileCreateDate, kipperedFileCreateDate] <= 0 THEN
BEGIN -- dont try
FS.Close[kipperedFile];
GOTO dontTry;
END;
kipperedStream ← FS.StreamFromOpenFile[kipperedFile];
unkipperer ← KipperSupport.CreateUnKipperer[kipperedStream];
table ← NewParserData.UnKipperParserTable[unkipperer];
--cmd ← NARROW[ProcessProps.GetProp[$CommanderHandle]];
--IO.PutF[cmd.out, "\N\Ncongratulations, you are about to parse using Kippered tables\N\N"];
IO.Close[kipperedStream];
-- FS.Close[kipperedFile]; seems to be accomplished by the IO.Close?
RETURN[table];
EXITS
dontTry =>
BEGIN
rootName: Rope.ROPE ← Rope.Cat[Rope.Substr[fullName, 0, components.ext.start-1]];
cmd: Commander.Handle ← NARROW[ProcessProps.GetProp[$CommanderHandle]];
IO.PutF[cmd.out, "\N\NWarning\N\NFailed to find a file named %g with an appropriate date, so will fall back on un kippered parser tables in %g\N", IO.rope[kipperedName], IO.rope[unKipperedName]];
IO.PutF[cmd.out, "\NFor future use, you might want to contruct a file with kippered parser tables by executing:\N\NConvertParserToKippered %g\N\N", IO.rope[rootName]];
END;
END;
-- failed to find kippered tables, so fall back case
RETURN[BuildParserTableFromStreamFallBack[s]];
END;
AbsorbCedarToken: PUBLIC PROC[parser: ParserInstance, tokenKind: TokenKind, tokenText: ROPE, position: INT, showGenericShift: PROC[code: CARDINAL, text: ROPE, firstCharPosition: INT], showReduce: PROC[rule: CARDINAL, firstCharPosition: INT, length: INT]] RETURNS[accepted: BOOLEAN] =
BEGIN
terminalSeqCode: CARDINAL;
terminalSeqInfo: ROPE;
tokenCase: TokenCase;
[terminalSeqCode, terminalSeqInfo, tokenCase] ← LookUpCedarToken[parser.table.tokenTable, tokenKind, tokenText];
WHILE TRUE DO
action: ActionCell ← LookUpAction[parser.table.actionTable, parser.stack.state, terminalSeqCode];
IF action = NIL THEN ERROR SyntaxError[position];
SELECT action.action FROM
shift =>
BEGIN
parser.stack ← NEW[StackCellBody ← [action.nextState, position, Rope.Length[tokenText], parser.stack]];
IF tokenCase = generic THEN showGenericShift[terminalSeqCode, tokenText, position];
RETURN[FALSE];
END;
reduce =>
BEGIN
shift: ActionCell;
firstPosition: INT ← position;
limitPosition: INT ← IF action.ruleSize = 0 THEN position ELSE parser.stack.firstPosition + parser.stack.length;
FOR I: CARDINAL IN [0..action.ruleSize) DO
firstPosition ← parser.stack.firstPosition;
parser.stack ← parser.stack.next;
ENDLOOP;
showReduce[action.ruleNumber, firstPosition, limitPosition-firstPosition];
shift ← LookUpAction[parser.table.actionTable, parser.stack.state, action.leftSide];
IF shift.action # shift THEN ERROR;
parser.stack ← NEW[StackCellBody ← [shift.nextState, firstPosition, limitPosition-firstPosition, parser.stack]];
END;
accept => RETURN[TRUE];
ENDCASE => ERROR;
ENDLOOP;
END;
SyntaxError: PUBLIC ERROR[position: INT] = CODE;
-- Action Table procedures
AnalyzeActions: PROC[table: ParserTable] =
BEGIN
actionTable: ActionTable;
next: ActionCell;
IF table.actionTable # NIL THEN ERROR;
actionTable ← table.actionTable ← NEW[ActionTableBody[table.nActions]];
FOR I: CARDINAL IN [0..actionTable.nSlots) DO actionTable.actions[I] ← NIL ENDLOOP;
FOR action: ActionCell ← table.unAnalyzedActions, next WHILE action # NIL DO
hash: CARDINAL ← HashOneAction[action.state, action.terminal];
hashCode: CARDINAL ← hash MOD actionTable.nSlots;
next ← action.next;
action.next ← actionTable.actions[hashCode];
actionTable.actions[hashCode] ← action;
ENDLOOP;
table.unAnalyzedActions ← NIL;
END;
LookUpAction: PROC[actionTable: ActionTable, state: State, terminalCode: CARDINAL] RETURNS[ActionCell] =
BEGIN
hash: CARDINAL ← HashOneAction[state, terminalCode];
hashCode: CARDINAL ← hash MOD actionTable.nSlots;
FOR action: ActionCell ← actionTable.actions[hashCode], action.next WHILE action # NIL DO
IF action.state = state AND action.terminal = terminalCode THEN
RETURN[action];
ENDLOOP;
RETURN[NIL];
END;
HashOneAction: PROC[state: CARDINAL, terminal: CARDINAL] RETURNS[hash: CARDINAL] = INLINE
{RETURN[(state*terminal) MOD 256]};
-- fancy parser stuff
ReductionCell: TYPE = REF ReductionCellBody;
ReductionCellBody: TYPE = RECORD[
case: ReductionCellCase,
firstCharPosition: INT,
-- if reduce case
rule: CARDINAL,
length: INT,
-- if shift
code: CARDINAL,
text: ROPE,
tokenCase: TokenCase,
tokenKind: TokenKind,
previous: ReductionCell, -- always a shift
stack: StackCell, -- as it appears just after applying this shift
-- if accept
-- nothing
-- all cases
next: ReductionCell];
ReductionCellCase: TYPE = {reduction, shift, accept};
SimpleTryTokenCase: TYPE = {reduction, shift, accept, fail};
SourceTokenCell: TYPE = REF SourceTokenCellBody;
SourceTokenCellBody: TYPE = RECORD[
kind: TokenKind,
text: ROPE,
position: INT,
next: SourceTokenCell];
TrialTokenCell: TYPE = REF TrialTokenCellBody;
TrialTokenCellBody: TYPE = RECORD[
kind: TokenKind,
text: ROPE,
next: TrialTokenCell];
MaxTrialInserts: INT ← 4;
MaxSkip: INT ← 4;
MinSuccesses: INT ← 4;
DesiredSuccesses: INT ← 12;
FoundAFix: ERROR = CODE;
GetReportStream: PUBLIC SIGNAL RETURNS[IO.STREAM] = CODE;
UnRecoverableSyntaxError: PUBLIC ERROR = CODE;
FancyParse: PUBLIC PROC[
 table: ParserTable,
 getSourceToken: PROC RETURNS[tokenKind: TokenKind, tokenText: ROPE, position: INT],
 showGenericShift: PROC[code: CARDINAL, kind: IO.TokenKind, text: ROPE, firstCharPosition: INT],
 showReduce: PROC[rule: CARDINAL, firstCharPosition: INT, length: INT]] RETURNS[accepted: BOOLEAN] =
BEGIN
-- this uses a scheme obtained orally from Ed Satterthwaite on Nov 7, 1985.
-- I am begining to suspect I have not got the search strategy right, I am breadth first on insertions, but not on deletions?
-- state stack and saved reductions for later play to caller
-- old stack is as the stack appears just before applying firstRed
-- current stack is as the stack appears just after appling the last reduction chained from firstRed
oldStack: StackCell ← NEW[StackCellBody ← [table.startState, 0, 0, NIL]];
currentStack: StackCell ← oldStack;
-- these reduction cells form a linear list from firstRed to LastRed
 -- last shift points into this list.
firstRed: ReductionCell ← NIL;
lastShift: ReductionCell ← NIL; -- always a shift
lastRed: ReductionCell ← NIL;
results in a shift, a single reduction, an acceptance, or a failure
SimpleTryToken: PROC[tokenKind: TokenKind, tokenText: ROPE, position: INT] RETURNS[SimpleTryTokenCase] =
BEGIN
terminalSeqCode: CARDINAL;
terminalSeqInfo: ROPE;
tokenCase: TokenCase;
action: ActionCell;
[terminalSeqCode, terminalSeqInfo, tokenCase] ← LookUpCedarToken[table.tokenTable, tokenKind, tokenText];
action ← LookUpAction[table.actionTable, currentStack.state, terminalSeqCode];
IF action = NIL THEN RETURN[fail];
SELECT action.action FROM
shift =>
BEGIN
redCell: ReductionCell;
currentStack ← NEW[StackCellBody ← [action.nextState, position, Rope.Length[tokenText], currentStack]];
redCell ← NEW[ReductionCellBody← [
shift, position,
, ,
terminalSeqCode, tokenText, tokenCase, tokenKind, lastShift, currentStack,
NIL]];
IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;
lastRed ← redCell;
lastShift ← redCell;
RETURN[shift];
END;
reduce =>
BEGIN
shift: ActionCell;
firstPosition: INT ← position;
limitPosition: INT ← IF action.ruleSize = 0 THEN position ELSE currentStack.firstPosition + currentStack.length;
redCell: ReductionCell;
FOR I: CARDINAL IN [0..action.ruleSize) DO
firstPosition ← currentStack.firstPosition;
currentStack ← currentStack.next;
ENDLOOP;
redCell ← NEW[ReductionCellBody←[
reduction, firstPosition,
action.ruleNumber, limitPosition-firstPosition,
, , , , , ,
NIL]];
IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;
lastRed ← redCell;
shift ← LookUpAction[table.actionTable, currentStack.state, action.leftSide];
IF shift.action # shift THEN ERROR;
currentStack ← NEW[StackCellBody ← [shift.nextState, firstPosition, limitPosition-firstPosition, currentStack]];
RETURN[reduction]
END;
accept =>
BEGIN
redCell: ReductionCell ← NEW[ReductionCellBody←[
accept, ,
, ,
, , , , , ,
NIL]];
IF lastRed # NIL THEN lastRed.next ← redCell ELSE firstRed ← redCell;
lastRed ← redCell;
RETURN[accept];
END;
ENDCASE => ERROR;
END;
repeats calls on SimpleTryToken, so that all reductions are done, up to and including a shift, acceptance, or failure.
FullTryToken: PROC[tokenKind: TokenKind, tokenText: ROPE, position: INT] RETURNS[SimpleTryTokenCase] =
BEGIN
DO
SELECT SimpleTryToken[tokenKind, tokenText, position] FROM
reduction => LOOP;
shift => RETURN[shift];
accept => RETURN[accept];
fail =>
BEGIN
ResetToShift[];
RETURN[fail];
END;
ENDCASE => ERROR;
ENDLOOP;
END;
ResetStack: PROC =
{currentStack ← oldStack; firstRed ← lastShift ← lastRed ← NIL};
plays successive reductions up to and including a shift or acceptance
Play1TokenWorth: PROC RETURNS[accepted: BOOLEAN] =
BEGIN
DO
SELECT firstRed.case FROM
reduction =>
BEGIN
showReduce[firstRed.rule, firstRed.firstCharPosition, firstRed.length];
firstRed ← firstRed.next;
IF firstRed = NIL THEN
lastShift ← lastRed ← NIL;
END;
shift =>
BEGIN
IF firstRed.tokenCase = generic THEN
showGenericShift[firstRed.code, firstRed.tokenKind, firstRed.text, firstRed.firstCharPosition];
oldStack ← firstRed.stack;
firstRed ← firstRed.next;
IF firstRed = NIL THEN
lastShift ← lastRed ← NIL;
RETURN[FALSE];
END;
accept =>
BEGIN
firstRed ← firstRed.next;
IF firstRed = NIL THEN
lastShift ← lastRed ← NIL;
RETURN[TRUE];
END;
ENDCASE => ERROR;
ENDLOOP;
END;
ResetToShift: PROC =
BEGIN
IF lastShift # NIL THEN
{lastRed ← lastShift; lastRed.next ← NIL; currentStack ← lastRed.stack}
ELSE
{lastRed ← NIL; currentStack ← oldStack};
END;
Reset1TokenWorth: PROC =
BEGIN
lastRed ← lastShift;
lastShift ← lastRed.previous;
ResetToShift[];
END;
ResetNTokensWorth: PROC[n: CARDINAL] =
{FOR I: CARDINAL IN [0..n) DO Reset1TokenWorth[] ENDLOOP};
-- now we deal with delivery of tokens. this mechanism allows back up and replay of tokens without any assumptions about the ultimate source of tokens
firstToken: SourceTokenCell ← NIL;
currentToken: SourceTokenCell ← NIL;
lastToken: SourceTokenCell ← NIL;
GetToken: PROC RETURNS[TokenKind, ROPE, INT] =
BEGIN
t: SourceTokenCell;
IF currentToken = NIL THEN
BEGIN
currentToken ← NEW[SourceTokenCellBody];
[currentToken.kind, currentToken.text, currentToken.position] ← getSourceToken[];
IF firstToken # NIL THEN lastToken.next ← currentToken ELSE firstToken ← currentToken;
lastToken ← currentToken;
END;
t ← currentToken;
currentToken ← currentToken.next;
RETURN[t.kind, t.text, t.position];
END;
ResetTokens: PROC =
{currentToken ← firstToken};
DeleteOneToken: PROC =
BEGIN
IF currentToken = firstToken THEN currentToken ← firstToken.next;
firstToken ← firstToken.next;
END;
-- syntactic error recovery involves trying the deletion of some incomming tokens and the insertion of other plausable tokens. The following data records current best such attempt as well as the currently explored attempt.
currentInsertions: TrialTokenCell ← NIL;
bestInsertions: TrialTokenCell ← NIL;
bestDeletions: CARDINAL ← 0;
bestScore: INT ← 0;
BackUpOneTrialInsertion: PROC =
{currentInsertions ← currentInsertions.next};
AddOneTrialInsertion: PROC[kind: TokenKind, text: ROPE] =
BEGIN
cell: TrialTokenCell ← NEW[TrialTokenCellBody←[kind, text, currentInsertions]];
currentInsertions ← cell;
END;
RecordTrialScore: PROC[nDeletions: INT, score: INT] =
BEGIN
IF score > bestScore THEN
BEGIN
bestInsertions ← currentInsertions;
bestDeletions ← nDeletions;
bestScore ← score;
END;
END;
RecordFoundFix: PROC[nDeletions: INT] =
BEGIN
bestInsertions ← currentInsertions;
bestDeletions ← nDeletions;
bestScore ← DesiredSuccesses;
END;
ResetTrials: PROC =
BEGIN
currentInsertions ← bestInsertions ← NIL;
bestDeletions ← bestScore ← 0;
END;
PlayTrialInsertions: PROC[for: PROC[TokenKind, ROPE]] =
BEGIN
SubPlayTrialInsertions: PROC[cell: TrialTokenCell] =
BEGIN
IF cell.next # NIL THEN SubPlayTrialInsertions[cell.next];
for[cell.kind, cell.text];
END;
IF bestInsertions # NIL THEN SubPlayTrialInsertions[bestInsertions];
END;
ExploreForBestRecovery: PROC[nInsertsSoFar: INT, tryNDeletions: INT] =
BEGIN
-- first see if we can get away by trying an appropriate number of deletions without any insertions
FOR I: INT IN [0..tryNDeletions) DO [] ← GetToken[] ENDLOOP;
FOR I: INT IN [0..DesiredSuccesses) DO
tKind: TokenKind; tText: ROPE; tPosition: INT;
[tKind, tText, tPosition] ← GetToken[];
SELECT FullTryToken[tKind, tText, tPosition] FROM
shift => IF I+1 # DesiredSuccesses THEN LOOP
ELSE -- we parsed ahead as far as we wanted, go with this
{RecordFoundFix[tryNDeletions]; ResetNTokensWorth[I+1]; FoundAFix[]};
accept => {RecordFoundFix[tryNDeletions]; ResetNTokensWorth[I+1]; FoundAFix[]};
fail =>
BEGIN -- we couldn't parse as many input tokens as we wanted
IF I >= MinSuccesses THEN -- but we got enough to record
RecordTrialScore[tryNDeletions, I];
ResetNTokensWorth[I];
EXIT;
END;
ENDCASE => ERROR;
ENDLOOP; -- success loop
-- we didn't parse ahead as far as we wanted, so reset
ResetTokens[];
--now try each plausable inserted token, but with one fewer deletions
IF tryNDeletions > 0 THEN
BEGIN
SeeOneTokenToInsert: PROC[kind: TokenKind, text: ROPE] =
BEGIN
AddOneTrialInsertion[kind, text];
SELECT FullTryToken[kind, text, 0] FROM
shift =>
BEGIN
ExploreForBestRecovery[nInsertsSoFar+1, tryNDeletions-1];
Reset1TokenWorth[];
END;
fail => NULL;
accept => ResetToShift[];
ENDCASE => ERROR;
BackUpOneTrialInsertion[];
END;
GenPlausableInsertedTokens[table, currentStack.state, SeeOneTokenToInsert];
-- this might generate FoundAFix, which will be caught by our ultimate caller
END;
END;
-- following is the main loop that drives the parser
successfulTrys: CARDINAL ← 0;
DO
kind: TokenKind; text: Rope.ROPE; position: INT;
[kind, text, position] ← GetToken[];
SELECT FullTryToken[kind, text, position] FROM
shift =>
BEGIN
IF successfulTrys = 0 THEN successfulTrys ← 1
ELSE
BEGIN -- two source tokens in a row have worked, so show first to caller
IF Play1TokenWorth[] THEN ERROR; -- can't produce an acceptance
DeleteOneToken[]; -- from the saved input tokens
END;
END;
accept =>
BEGIN
IF Play1TokenWorth[] THEN
{IF successfulTrys = 1 THEN ERROR ELSE RETURN[TRUE]};
IF successfulTrys # 1 THEN ERROR;
IF Play1TokenWorth[] THEN RETURN[TRUE];
ERROR; -- one of them should have produced an acceptance
END;
fail =>
BEGIN
-- first forget what we have seen
successfulTrys ← 0;
ResetStack[];
ResetTokens[];
ResetTrials[];
-- now conduct an exploration for a replacement
FOR diag: INT IN [0..MaxSkip] DO
ExploreForBestRecovery[0, diag
! FoundAFix => -- found a replacement that results in acceptance or good match
BEGIN
-- we could try to replay the current stack, but it is easier to program using the code for "go with the best".
EXIT
END]
ENDLOOP;
-- didn't not find a replacement that resulted in acceptance or good match
-- so, go with the best
-- (or, did find one, and are defaulting to this code)
BEGIN
reportStream: IO.STREAM ← GetReportStream[];
msgStream: IO.STREAM ← IO.ROS[];
nInsertions: CARDINAL ← 0;
ReportOneInsertion: PROC[kind: TokenKind, text: Rope.ROPE] =
{IO.PutF[msgStream, " %g", IO.rope[text]]};
TryOneTrialInsertion: PROC[kind: TokenKind, text: Rope.ROPE] =
BEGIN
nInsertions ← nInsertions+1;
SELECT FullTryToken[kind, text, 0] FROM
shift, accept => RETURN;
fail => ERROR;
ENDCASE;
END;
ResetStack[];
ResetTokens[];
IO.PutF[msgStream, "\N\NSyntaxError at %g\N", IF firstToken=NIL THEN IO.rope["<unknown place>"] ELSE IO.int[firstToken.position]];
IF bestDeletions # 0 THEN IO.PutF[msgStream, "\Tdeleting"];
FOR I: CARDINAL IN [0..bestDeletions) DO
text: Rope.ROPE;
[, text, ] ← GetToken[];
IO.PutF[msgStream, " %g", IO.rope[text]];
ENDLOOP;
ResetTokens[];
IO.PutF[msgStream, "\Tinserting"];
PlayTrialInsertions[ReportOneInsertion];
IO.PutF[msgStream, "\N\N"];
IO.PutF[reportStream, "%g", IO.rope[IO.RopeFromROS[msgStream]]];
IF bestScore < MinSuccesses THEN UnRecoverableSyntaxError[];
FOR I: CARDINAL IN [0..bestDeletions) DO DeleteOneToken[] ENDLOOP;
PlayTrialInsertions[TryOneTrialInsertion];
FOR I: CARDINAL IN [0..nInsertions) DO
IF Play1TokenWorth[] THEN
{IF I = (nInsertions - 1) THEN RETURN[TRUE] ELSE ERROR};
ENDLOOP;
successfulTrys ← 0;
END;
END;
ENDCASE => ERROR;
ENDLOOP;
END;
GenPlausableInsertedTokens: PROC[table: ParserTable, state: State, for: PROC[TokenKind, ROPE]] =
BEGIN
-- we try each possible terminal token, and generate those that result in a non NIL actionCell
-- eventually we want to control the order in which the terminal tokens are tried
FOR cellX: CARDINAL IN [0..table.tokenTable.nSlots) DO
FOR trialTokenCell: TokenCell ← table.tokenTable.tokens[cellX], trialTokenCell.next WHILE trialTokenCell # NIL DO
actionCell: ActionCell ← LookUpAction[table.actionTable, state, trialTokenCell.symbolCode];
IF actionCell # NIL THEN
BEGIN
text: Rope.ROPE ← IF trialTokenCell.case = unique THEN trialTokenCell.spelling ELSE
SELECT trialTokenCell.kind FROM
tokenID => "id",
tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL => "num",
tokenROPE => "rope",
tokenCHAR => "char",
tokenATOM => "atom",
tokenEOF => "eof",
ENDCASE => ERROR;
for[trialTokenCell.kind, text];
END;
ENDLOOP;
ENDLOOP;
END;
-- temporary stuff to test kippering of parser tables
ConvertParserToKippered: Commander.CommandProc = TRUSTED
BEGIN
commandLineStream: STREAMRIS[cmd.commandLine];
rootName: ROPE;
sourceStream: STREAM;
table: ParserTable;
kipperer: KipperSupport.Kipperer;
outStream: STREAM;
rootName ← GetTokenRope[commandLineStream, IO.IDProc
! EndOfStream => {rootName ← NIL; CONTINUE}].token;
IO.Close[commandLineStream];
sourceStream ← FS.StreamOpen[Rope.Cat[rootName, ".parseTables"]];
table ← BuildParserTableFromStreamFallBack[sourceStream];
IO.Close[sourceStream];
outStream ← FS.StreamOpen[Rope.Cat[rootName, ".kipperedTables"], create];
kipperer ← KipperSupport.CreateKipperer[outStream];
NewParserData.KipperParserTable[kipperer, table];
IO.Close[outStream];
END;
Commander.Register["///Commands/ConvertParserToKippered", ConvertParserToKippered];
END..