-- NewParserImpl.mesa -- last edit September 18, 1984 9:00:29 am PDT -- Sturgis, January 2, 1986 11:03:23 am PST DIRECTORY BasicTime USING[GMT, Period], Commander USING[CommandProc, Handle, Register], FS USING[Close, ComponentPositions, Error, ExpandName, GetInfo, GetName, Open, OpenFile, OpenFileFromStream, StreamFromOpenFile, StreamOpen], IO USING[STREAM, CharClass, Close, EndOfStream, Error, GetCard, GetCedarTokenRope, GetTokenRope, IDProc, int, PutF, PutRope, card, RIS, rope, RopeFromROS, ROS, SP, SkipWhitespace, TokenKind], NewParserData USING[KipperParserTable, UnKipperParserTable], NewParserPrivate USING[ParserTableBody, SymbolTable, SymbolTableBody, SymbolCell, SymbolCellBody, TokenTable, TokenTableBody, TokenCell, TokenCellBody, TokenCase, ActionTable, ActionTableBody, ActionCell, ActionCellBody, Action], KipperSupport USING[CreateKipperer, CreateUnKipperer, Kipperer, UnKipperer], Parser USING[State], ParserExtra USING[], ProcessProps USING[GetProp], Rope USING[Cat, Equal, Fetch, Length, ROPE, Substr]; NewParserImpl: CEDAR PROGRAM IMPORTS BasicTime, Commander, FS, IO, KipperSupport, NewParserData, ProcessProps, Rope EXPORTS Parser, ParserExtra = BEGIN OPEN IO, NewParserPrivate, Parser, Rope; ParserTable: TYPE = REF ParserTableBody; ParserTableBody: PUBLIC TYPE = NewParserPrivate.ParserTableBody; -- parser table procedures -- issue: want to use either EOF or client specified terminator CreateParserTable: PROC RETURNS[ParserTable] = BEGIN table: ParserTable _ NEW[ParserTableBody]; RecordSymbol[table, ""]; -- the empty seq RecordTokenInfo[table, "", tokenEOF, "", generic]; RETURN[table]; END; RecordSymbol: PROC[table: ParserTable, name: ROPE] = BEGIN IF table.symbolTable # NIL THEN ERROR; -- must record all symbols before any actions table.unAnalyzedSymbols _ NEW[SymbolCellBody_[name, table.nSymbols, table.unAnalyzedSymbols]]; table.nSymbols _ table.nSymbols+1; END; RecordUniqueToken: PROC[table: ParserTable, name: ROPE, spelling: ROPE] = BEGIN spellingStream: STREAM _ RIS[spelling]; kind: TokenKind; -- the following hack is because GetCedarToken fails on | -- of course, the scanner will still fail if it ever sees a | IF Equal[spelling, "|"] THEN BEGIN kind _ tokenSINGLE; END ELSE BEGIN kind _ GetCedarTokenRope[spellingStream].tokenKind; END; Close[spellingStream]; IF kind # tokenID AND NOT HashKind[kind].includeRope THEN ERROR; RecordTokenInfo[table, name, kind, spelling, unique]; END; RecordGenericToken: PROC[table: ParserTable, name: ROPE, class: ROPE] = BEGIN kind: TokenKind _ DecodeTokenKind[class]; IF kind # tokenID AND HashKind[kind].includeRope THEN ERROR; RecordTokenInfo[table, name, kind, "", generic]; END; RecordTokenInfo: PROC[table: ParserTable, name: ROPE, kind: TokenKind, spelling: ROPE, case: TokenCase] = BEGIN IF table.tokenTable # NIL THEN ERROR; -- must record all tokens before any actions table.unAnalyzedTokens _ NEW[TokenCellBody_[name, 0, kind, spelling, case, table.unAnalyzedTokens]]; table.nTokens _ table.nTokens+1; END; RecordShift: PROC[table: ParserTable, state: State, terminalSeq: ROPE, newState: State] = BEGIN IF table.symbolTable = NIL THEN AnalyzeSymbols[table]; table.unAnalyzedActions _ NEW[ActionCellBody _[ state: state, terminal: LookUpSymbol[table.symbolTable, terminalSeq], action: shift, nextState: newState, ruleNumber: 0, leftSide: 0, ruleSize: 0, next: table.unAnalyzedActions]]; table.nActions _ table.nActions+1; END; RecordReduction: PROC[table: ParserTable, state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL] = BEGIN IF table.symbolTable = NIL THEN AnalyzeSymbols[table]; table.unAnalyzedActions _ NEW[ActionCellBody _[ state: state, terminal: LookUpSymbol[table.symbolTable, terminalSeq], action: reduce, nextState: 0, ruleNumber: ruleNumber, leftSide: LookUpSymbol[table.symbolTable, leftSide], ruleSize: ruleSize, next: table.unAnalyzedActions]]; table.nActions _ table.nActions+1; END; RecordAcceptance: PROC[table: ParserTable, state: State, terminalSeq: ROPE] = BEGIN IF table.symbolTable = NIL THEN AnalyzeSymbols[table]; table.unAnalyzedActions _ NEW[ActionCellBody _[ state: state, terminal: LookUpSymbol[table.symbolTable, terminalSeq], action: accept, nextState: 0, ruleNumber: 0, leftSide: 0, ruleSize: 0, next: table.unAnalyzedActions]]; table.nActions _ table.nActions+1; END; RecordStartState: PROC[table: ParserTable, state: State] = {table.startState _ state}; -- Symbol Table Procedures AnalyzeSymbols: PROC[table: ParserTable] = BEGIN symbolTable: SymbolTable; next: SymbolCell; IF table.symbolTable # NIL THEN ERROR; symbolTable _ table.symbolTable _ NEW[SymbolTableBody[table.nSymbols]]; FOR I: CARDINAL IN [0..symbolTable.nSlots) DO symbolTable.symbols[I] _ NIL ENDLOOP; FOR symbol: SymbolCell _ table.unAnalyzedSymbols, next WHILE symbol # NIL DO hash: CARDINAL _ HashOneSymbol[symbol.name]; hashCode: CARDINAL _ hash MOD symbolTable.nSlots; next _ symbol.next; symbol.next _ symbolTable.symbols[hashCode]; symbolTable.symbols[hashCode] _ symbol; ENDLOOP; table.unAnalyzedSymbols _ NIL; AnalyzeTokens[table]; END; AnalyzeTokens: PROC[table: ParserTable] = BEGIN tokenTable: TokenTable; next: TokenCell; IF table.tokenTable # NIL THEN ERROR; tokenTable _ table.tokenTable _ NEW[TokenTableBody[table.nTokens]]; FOR token: TokenCell _ table.unAnalyzedTokens, next WHILE token # NIL DO spellingHash: CARDINAL _ TokenHash[token.kind, token.spelling].hash; spellingHashCode: CARDINAL _ spellingHash MOD tokenTable.nSlots; next _ token.next; token.symbolCode _ LookUpSymbol[table.symbolTable, token.name]; token.next _ tokenTable.tokens[spellingHashCode]; tokenTable.tokens[spellingHashCode] _ token; IF token.kind = tokenID AND token.case = generic THEN BEGIN IF tokenTable.idToken # NIL THEN ERROR; tokenTable.idToken _ token; END; ENDLOOP; table.unAnalyzedTokens _ NIL; END; LookUpSymbol: PROC[symbolTable: SymbolTable, name: ROPE] RETURNS[code: CARDINAL] = BEGIN hash: CARDINAL _ HashOneSymbol[name]; hashCode: CARDINAL _ hash MOD symbolTable.nSlots; FOR symbol: SymbolCell _ symbolTable.symbols[hashCode], symbol.next WHILE symbol # NIL DO IF Equal[symbol.name, name] THEN RETURN[symbol.code]; ENDLOOP; ERROR; END; HashOneSymbol: PROC[rope: ROPE] RETURNS[hash: CARDINAL] = BEGIN hash _ 0; FOR I: INT IN [0..Length[rope]) DO hash _ hash + LOOPHOLE[Fetch[rope, I], CARDINAL]; ENDLOOP; END; -- following routines are intended for the output of the GetCedarToken routine in IO.mesa LookUpCedarToken: PROC[tokenTable: TokenTable, kind: TokenKind, text: ROPE] RETURNS[code: CARDINAL, info: ROPE, case: TokenCase] = BEGIN includeTextInHash: BOOLEAN; hash: CARDINAL; hashCode: CARDINAL; [hash, includeTextInHash] _ TokenHash[kind, text]; hashCode _ hash MOD tokenTable.nSlots; IF includeTextInHash THEN FOR token: TokenCell _ tokenTable.tokens[hashCode], token.next WHILE token # NIL DO IF token.kind = kind AND Equal[token.spelling, text] THEN RETURN[token.symbolCode, NIL, token.case]; ENDLOOP ELSE FOR token: TokenCell _ tokenTable.tokens[hashCode], token.next WHILE token # NIL DO IF token.kind = kind THEN RETURN[token.symbolCode, text, token.case]; ENDLOOP; IF kind = tokenID AND tokenTable.idToken # NIL THEN RETURN[tokenTable.idToken.symbolCode, text, tokenTable.idToken.case]; ERROR; END; TokenHash: PROC[kind: TokenKind, text: ROPE] RETURNS[hash: CARDINAL, includeTextInHash: BOOLEAN] = BEGIN [hash, includeTextInHash] _ HashKind[kind]; IF includeTextInHash AND text # NIL THEN FOR I: INT IN [0..Length[text]) DO hash _ hash + LOOPHOLE[Fetch[text, I], CARDINAL]; ENDLOOP; END; HashKind: PROC[kind: TokenKind] RETURNS[hash: CARDINAL, includeRope: BOOLEAN] = BEGIN SELECT kind FROM tokenERROR => RETURN[0, FALSE]; tokenID => RETURN[1, TRUE]; tokenDECIMAL => RETURN[2, FALSE]; tokenOCTAL => RETURN[3, FALSE]; tokenHEX => RETURN[4, FALSE]; tokenREAL => RETURN[5, FALSE]; tokenROPE => RETURN[6, FALSE]; tokenCHAR => RETURN[7, FALSE]; tokenATOM => RETURN[8, FALSE]; tokenSINGLE => RETURN[9, TRUE]; tokenDOUBLE => RETURN[10, TRUE]; tokenCOMMENT => RETURN[11, FALSE]; tokenEOF => RETURN[12, FALSE]; ENDCASE => ERROR; END; DecodeTokenKind: PROC[class: ROPE] RETURNS[TokenKind] = BEGIN SELECT TRUE FROM Equal[class, "tokenERROR"] => RETURN[tokenERROR]; Equal[class, "tokenID"] => RETURN[tokenID]; Equal[class, "tokenDECIMAL"] => RETURN[tokenDECIMAL]; Equal[class, "tokenOCTAL"] => RETURN[tokenOCTAL]; Equal[class, "tokenHEX"] => RETURN[tokenHEX]; Equal[class, "tokenREAL"] => RETURN[tokenREAL]; Equal[class, "tokenROPE"] => RETURN[tokenROPE]; Equal[class, "tokenCHAR"] => RETURN[tokenCHAR]; Equal[class, "tokenATOM"] => RETURN[tokenATOM]; Equal[class, "tokenSINGLE"] => RETURN[tokenSINGLE]; Equal[class, "tokenDOUBLE"] => RETURN[tokenDOUBLE]; Equal[class, "tokenCOMMENT"] => RETURN[tokenCOMMENT]; Equal[class, "tokenEOF"] => RETURN[tokenEOF]; ENDCASE => ERROR; END; -- exported Parser procedures ParserInstance: TYPE = REF ParserBody; ParserBody: PUBLIC TYPE = RECORD[ table: ParserTable, stack: StackCell]; StackCell: TYPE = REF StackCellBody; StackCellBody: TYPE = RECORD[ state: State, firstPosition, length: INT, next: StackCell]; BuildParserTable: PROC[genInfo: PROC[ recordSymbol: PROC[name: ROPE], recordUniqueToken: PROC[name: ROPE, spelling: ROPE], recordGenericToken: PROC[name: ROPE, class: ROPE], recordShift: PROC[state: State, terminalSeq: ROPE, newState: State], recordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL], recordAcceptance: PROC[state: State, terminalSeq: ROPE], recordStartState: PROC[state: State]]] RETURNS[ParserTable] = BEGIN table: ParserTable _ CreateParserTable[]; LocalRecordSymbol: PROC[symbol: ROPE] = {RecordSymbol[table, symbol]}; LocalRecordUniqueToken: PROC[name: ROPE, spelling: ROPE] ={RecordUniqueToken[table, name, spelling]}; LocalRecordGenericToken: PROC[name: ROPE, class: ROPE] = {RecordGenericToken[table, name, class]}; LocalRecordShift: PROC[state: State, terminalSeq: ROPE, newState: State] = {RecordShift[table, state, terminalSeq, newState]}; LocalRecordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL] = {RecordReduction[table, state, terminalSeq, leftSide, ruleNumber, ruleSize]}; LocalRecordAcceptance: PROC[state: State, terminalSeq: ROPE] = {RecordAcceptance[table, state, terminalSeq]}; LocalRecordStartState: PROC[state: State] = {RecordStartState[table, state]}; genInfo[LocalRecordSymbol, LocalRecordUniqueToken, LocalRecordGenericToken, LocalRecordShift, LocalRecordReduction, LocalRecordAcceptance, LocalRecordStartState]; AnalyzeActions[table]; RETURN[table] END; CreateParser: PUBLIC PROC[genInfo: PROC[ recordSymbol: PROC[name: ROPE], recordUniqueToken: PROC[name: ROPE, spelling: ROPE], recordGenericToken: PROC[name: ROPE, class: ROPE], recordShift: PROC[state: State, terminalSeq: ROPE, newState: State], recordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL], recordAcceptance: PROC[state: State, terminalSeq: ROPE], recordStartState: PROC[state: State]]] RETURNS[ParserInstance] = {RETURN[BuildParserFromTable[BuildParserTable[genInfo]]]}; BuildParserFromTable: PUBLIC PROC[table: ParserTable] RETURNS[ParserInstance] = BEGIN cell: StackCell; parser: ParserInstance; cell _ NEW[StackCellBody _ [table.startState, 0, 0, NIL]]; parser _ NEW[ParserBody _ [table, cell]]; RETURN[parser] END; RecordParserTableOnStream: PUBLIC PROC[s: STREAM, genInfo: PROC[ recordSymbol: PROC[symbol: ROPE], recordUniqueToken: PROC[name: ROPE, spelling: ROPE], recordGenericToken: PROC[name: ROPE, class: ROPE], recordShift: PROC[state: State, terminalSeq: ROPE, newState: State], recordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL], recordAcceptance: PROC[state: State, terminalSeq: ROPE], recordStartState: PROC[state: State]]] = BEGIN LocalRecordSymbol: PROC[symbol: ROPE] = {PutF[s, "sy %g ", rope[symbol]]}; LocalRecordUniqueToken: PROC[name: ROPE, spelling: ROPE] = {PutF[s, "ut %g %g ", rope[name], rope[spelling]]}; LocalRecordGenericToken: PROC[name: ROPE, class: ROPE] = {PutF[s, "gt %g %g ", rope[name], rope[class]]}; LocalRecordShift: PROC[state: State, terminalSeq: ROPE, newState: State] = {PutF[s, "sh %g <%g> %g ", card[state], rope[terminalSeq], card[newState]]}; LocalRecordReduction: PROC[state: State, terminalSeq: ROPE, leftSide: ROPE, ruleNumber: CARDINAL, ruleSize: CARDINAL] = {PutF[s, "rd %g <%g> %g %g %g ", card[state], rope[terminalSeq], rope[leftSide], card[ruleNumber], card[ruleSize]]}; LocalRecordAcceptance: PROC[state: State, terminalSeq: ROPE] = {PutF[s, "ac %g <%g> ", card[state], rope[terminalSeq]]}; LocalRecordStartState: PROC[state: State] = {PutF[s, "start %g ", card[state]]}; genInfo[LocalRecordSymbol, LocalRecordUniqueToken, LocalRecordGenericToken, LocalRecordShift, LocalRecordReduction, LocalRecordAcceptance, LocalRecordStartState]; PutRope[s, " end "]; END; CreateParserFromStream: PUBLIC PROC[s: STREAM] RETURNS[ParserInstance] = {RETURN[BuildParserFromTable[BuildParserTableFromStream[s]]]}; BuildParserTableFromStreamFallBack: PROC[s: STREAM] RETURNS[ParserTable] = BEGIN GenInfo: PROC[ recordSymbol: PROC[ROPE], recordUniqueToken: PROC[ROPE, ROPE], recordGenericToken: PROC[ROPE, ROPE], recordShift: PROC[State, ROPE, State], recordReduction: PROC[State, ROPE, ROPE, CARDINAL, CARDINAL], recordAcceptance: PROC[State, ROPE], recordStartState: PROC[State]] = BEGIN Break: PROC [char: CHAR] RETURNS [CharClass] = {RETURN[IF char = SP THEN sepr ELSE other]}; WHILE TRUE DO firstToken: ROPE _ GetTokenRope[s, Break].token; GetTermSeq: PROC RETURNS[ROPE] = BEGIN text: ROPE; [] _ SkipWhitespace[s]; text _ GetTokenRope[s, Break].token; RETURN[Substr[text, 1, Length[text]-2]]; END; SkipAndGetCard: PROC RETURNS[CARDINAL] = BEGIN [] _ SkipWhitespace[s]; RETURN[GetCard[s]]; END; SELECT TRUE FROM Equal[firstToken, "sy"] => recordSymbol[GetTokenRope[s, Break].token]; Equal[firstToken, "ut"] => BEGIN name: ROPE _ GetTokenRope[s, Break].token; spelling: ROPE _ GetTokenRope[s, Break].token; recordUniqueToken[name, spelling] END; Equal[firstToken, "gt"] => BEGIN name: ROPE _ GetTokenRope[s, Break].token; class: ROPE _ GetTokenRope[s, Break].token; recordGenericToken[name, class]; END; Equal[firstToken, "sh"] => BEGIN state: CARDINAL _ SkipAndGetCard[]; terms: ROPE _ GetTermSeq[]; newState: CARDINAL _ SkipAndGetCard[]; recordShift[state, terms, newState]; END; Equal[firstToken, "rd"] => BEGIN state: CARDINAL _ SkipAndGetCard[]; terms: ROPE _ GetTermSeq[]; leftSide: ROPE _ GetTokenRope[s, Break].token; ruleNumber: CARDINAL _ SkipAndGetCard[]; ruleSize: CARDINAL _ SkipAndGetCard[]; recordReduction[state, terms, leftSide, ruleNumber, ruleSize]; END; Equal[firstToken, "ac"] => BEGIN state: CARDINAL _ SkipAndGetCard[]; terms: ROPE _ GetTermSeq[]; recordAcceptance[state, terms]; END; Equal[firstToken, "start"] => recordStartState[SkipAndGetCard[]]; Equal[firstToken, "end"] => EXIT; ENDCASE => ERROR; ENDLOOP; END; RETURN[BuildParserTable[GenInfo]]; END; BuildParserTableFromStream: PUBLIC PROC[s: STREAM] RETURNS[ParserTable] = BEGIN unKipperedName: Rope.ROPE; kipperedName: Rope.ROPE; fullName: Rope.ROPE; components: FS.ComponentPositions; TRUSTED BEGIN backingFile: FS.OpenFile _ FS.OpenFileFromStream[s ! IO.Error => GOTO dontTry]; backingFileCreateDate: BasicTime.GMT _ FS.GetInfo[backingFile].created; kipperedFile: FS.OpenFile; kipperedFileCreateDate: BasicTime.GMT; kipperedStream: IO.STREAM; unkipperer: KipperSupport.UnKipperer; table: ParserTable; --cmd: Commander.Handle; unKipperedName _ FS.GetName[backingFile].fullFName; [fullName, components] _ FS.ExpandName[unKipperedName]; IF NOT Rope.Equal["parseTables", Rope.Substr[fullName, components.ext.start, components.ext.length]] THEN GOTO dontTry; kipperedName _ Rope.Cat[Rope.Substr[fullName, components.base.start, components.base.length], ".kipperedTables"]; kipperedFile _ FS.Open[kipperedName ! FS.Error => GOTO dontTry]; kipperedFileCreateDate _ FS.GetInfo[kipperedFile].created; IF BasicTime.Period[backingFileCreateDate, kipperedFileCreateDate] <= 0 THEN BEGIN -- dont try FS.Close[kipperedFile]; GOTO dontTry; END; kipperedStream _ FS.StreamFromOpenFile[kipperedFile]; unkipperer _ KipperSupport.CreateUnKipperer[kipperedStream]; table _ NewParserData.UnKipperParserTable[unkipperer]; --cmd _ NARROW[ProcessProps.GetProp[$CommanderHandle]]; --IO.PutF[cmd.out, "\N\Ncongratulations, you are about to parse using Kippered tables\N\N"]; IO.Close[kipperedStream]; -- FS.Close[kipperedFile]; seems to be accomplished by the IO.Close? RETURN[table]; EXITS dontTry => BEGIN rootName: Rope.ROPE _ Rope.Cat[Rope.Substr[fullName, 0, components.ext.start-1]]; cmd: Commander.Handle _ NARROW[ProcessProps.GetProp[$CommanderHandle]]; IO.PutF[cmd.out, "\N\NWarning\N\NFailed to find a file named %g with an appropriate date, so will fall back on un kippered parser tables in %g\N", IO.rope[kipperedName], IO.rope[unKipperedName]]; IO.PutF[cmd.out, "\NFor future use, you might want to contruct a file with kippered parser tables by executing:\N\NConvertParserToKippered %g\N\N", IO.rope[rootName]]; END; END; -- failed to find kippered tables, so fall back case RETURN[BuildParserTableFromStreamFallBack[s]]; END; AbsorbCedarToken: PUBLIC PROC[parser: ParserInstance, tokenKind: TokenKind, tokenText: ROPE, position: INT, showGenericShift: PROC[code: CARDINAL, text: ROPE, firstCharPosition: INT], showReduce: PROC[rule: CARDINAL, firstCharPosition: INT, length: INT]] RETURNS[accepted: BOOLEAN] = BEGIN terminalSeqCode: CARDINAL; terminalSeqInfo: ROPE; tokenCase: TokenCase; [terminalSeqCode, terminalSeqInfo, tokenCase] _ LookUpCedarToken[parser.table.tokenTable, tokenKind, tokenText]; WHILE TRUE DO action: ActionCell _ LookUpAction[parser.table.actionTable, parser.stack.state, terminalSeqCode]; IF action = NIL THEN ERROR SyntaxError[position]; SELECT action.action FROM shift => BEGIN parser.stack _ NEW[StackCellBody _ [action.nextState, position, Rope.Length[tokenText], parser.stack]]; IF tokenCase = generic THEN showGenericShift[terminalSeqCode, tokenText, position]; RETURN[FALSE]; END; reduce => BEGIN shift: ActionCell; firstPosition: INT _ position; limitPosition: INT _ IF action.ruleSize = 0 THEN position ELSE parser.stack.firstPosition + parser.stack.length; FOR I: CARDINAL IN [0..action.ruleSize) DO firstPosition _ parser.stack.firstPosition; parser.stack _ parser.stack.next; ENDLOOP; showReduce[action.ruleNumber, firstPosition, limitPosition-firstPosition]; shift _ LookUpAction[parser.table.actionTable, parser.stack.state, action.leftSide]; IF shift.action # shift THEN ERROR; parser.stack _ NEW[StackCellBody _ [shift.nextState, firstPosition, limitPosition-firstPosition, parser.stack]]; END; accept => RETURN[TRUE]; ENDCASE => ERROR; ENDLOOP; END; SyntaxError: PUBLIC ERROR[position: INT] = CODE; -- Action Table procedures AnalyzeActions: PROC[table: ParserTable] = BEGIN actionTable: ActionTable; next: ActionCell; IF table.actionTable # NIL THEN ERROR; actionTable _ table.actionTable _ NEW[ActionTableBody[table.nActions]]; FOR I: CARDINAL IN [0..actionTable.nSlots) DO actionTable.actions[I] _ NIL ENDLOOP; FOR action: ActionCell _ table.unAnalyzedActions, next WHILE action # NIL DO hash: CARDINAL _ HashOneAction[action.state, action.terminal]; hashCode: CARDINAL _ hash MOD actionTable.nSlots; next _ action.next; action.next _ actionTable.actions[hashCode]; actionTable.actions[hashCode] _ action; ENDLOOP; table.unAnalyzedActions _ NIL; END; LookUpAction: PROC[actionTable: ActionTable, state: State, terminalCode: CARDINAL] RETURNS[ActionCell] = BEGIN hash: CARDINAL _ HashOneAction[state, terminalCode]; hashCode: CARDINAL _ hash MOD actionTable.nSlots; FOR action: ActionCell _ actionTable.actions[hashCode], action.next WHILE action # NIL DO IF action.state = state AND action.terminal = terminalCode THEN RETURN[action]; ENDLOOP; RETURN[NIL]; END; HashOneAction: PROC[state: CARDINAL, terminal: CARDINAL] RETURNS[hash: CARDINAL] = INLINE {RETURN[(state*terminal) MOD 256]}; -- fancy parser stuff ReductionCell: TYPE = REF ReductionCellBody; ReductionCellBody: TYPE = RECORD[ case: ReductionCellCase, firstCharPosition: INT, -- if reduce case rule: CARDINAL, length: INT, -- if shift code: CARDINAL, text: ROPE, tokenCase: TokenCase, tokenKind: TokenKind, previous: ReductionCell, -- always a shift stack: StackCell, -- as it appears just after applying this shift -- if accept -- nothing -- all cases next: ReductionCell]; ReductionCellCase: TYPE = {reduction, shift, accept}; SimpleTryTokenCase: TYPE = {reduction, shift, accept, fail}; SourceTokenCell: TYPE = REF SourceTokenCellBody; SourceTokenCellBody: TYPE = RECORD[ kind: TokenKind, text: ROPE, position: INT, next: SourceTokenCell]; TrialTokenCell: TYPE = REF TrialTokenCellBody; TrialTokenCellBody: TYPE = RECORD[ kind: TokenKind, text: ROPE, next: TrialTokenCell]; MaxTrialInserts: INT _ 4; MaxSkip: INT _ 4; MinSuccesses: INT _ 4; DesiredSuccesses: INT _ 12; FoundAFix: ERROR = CODE; GetReportStream: PUBLIC SIGNAL RETURNS[IO.STREAM] = CODE; UnRecoverableSyntaxError: PUBLIC ERROR = CODE; FancyParse: PUBLIC PROC[ table: ParserTable, getSourceToken: PROC RETURNS[tokenKind: TokenKind, tokenText: ROPE, position: INT], showGenericShift: PROC[code: CARDINAL, kind: IO.TokenKind, text: ROPE, firstCharPosition: INT], showReduce: PROC[rule: CARDINAL, firstCharPosition: INT, length: INT]] RETURNS[accepted: BOOLEAN] = BEGIN -- this uses a scheme obtained orally from Ed Satterthwaite on Nov 7, 1985. -- I am begining to suspect I have not got the search strategy right, I am breadth first on insertions, but not on deletions? -- state stack and saved reductions for later play to caller -- old stack is as the stack appears just before applying firstRed -- current stack is as the stack appears just after appling the last reduction chained from firstRed oldStack: StackCell _ NEW[StackCellBody _ [table.startState, 0, 0, NIL]]; currentStack: StackCell _ oldStack; -- these reduction cells form a linear list from firstRed to LastRed -- last shift points into this list. firstRed: ReductionCell _ NIL; lastShift: ReductionCell _ NIL; -- always a shift lastRed: ReductionCell _ NIL; SimpleTryToken: PROC[tokenKind: TokenKind, tokenText: ROPE, position: INT] RETURNS[SimpleTryTokenCase] = BEGIN terminalSeqCode: CARDINAL; terminalSeqInfo: ROPE; tokenCase: TokenCase; action: ActionCell; [terminalSeqCode, terminalSeqInfo, tokenCase] _ LookUpCedarToken[table.tokenTable, tokenKind, tokenText]; action _ LookUpAction[table.actionTable, currentStack.state, terminalSeqCode]; IF action = NIL THEN RETURN[fail]; SELECT action.action FROM shift => BEGIN redCell: ReductionCell; currentStack _ NEW[StackCellBody _ [action.nextState, position, Rope.Length[tokenText], currentStack]]; redCell _ NEW[ReductionCellBody_ [ shift, position, , , terminalSeqCode, tokenText, tokenCase, tokenKind, lastShift, currentStack, NIL]]; IF lastRed # NIL THEN lastRed.next _ redCell ELSE firstRed _ redCell; lastRed _ redCell; lastShift _ redCell; RETURN[shift]; END; reduce => BEGIN shift: ActionCell; firstPosition: INT _ position; limitPosition: INT _ IF action.ruleSize = 0 THEN position ELSE currentStack.firstPosition + currentStack.length; redCell: ReductionCell; FOR I: CARDINAL IN [0..action.ruleSize) DO firstPosition _ currentStack.firstPosition; currentStack _ currentStack.next; ENDLOOP; redCell _ NEW[ReductionCellBody_[ reduction, firstPosition, action.ruleNumber, limitPosition-firstPosition, , , , , , , NIL]]; IF lastRed # NIL THEN lastRed.next _ redCell ELSE firstRed _ redCell; lastRed _ redCell; shift _ LookUpAction[table.actionTable, currentStack.state, action.leftSide]; IF shift.action # shift THEN ERROR; currentStack _ NEW[StackCellBody _ [shift.nextState, firstPosition, limitPosition-firstPosition, currentStack]]; RETURN[reduction] END; accept => BEGIN redCell: ReductionCell _ NEW[ReductionCellBody_[ accept, , , , , , , , , , NIL]]; IF lastRed # NIL THEN lastRed.next _ redCell ELSE firstRed _ redCell; lastRed _ redCell; RETURN[accept]; END; ENDCASE => ERROR; END; FullTryToken: PROC[tokenKind: TokenKind, tokenText: ROPE, position: INT] RETURNS[SimpleTryTokenCase] = BEGIN DO SELECT SimpleTryToken[tokenKind, tokenText, position] FROM reduction => LOOP; shift => RETURN[shift]; accept => RETURN[accept]; fail => BEGIN ResetToShift[]; RETURN[fail]; END; ENDCASE => ERROR; ENDLOOP; END; ResetStack: PROC = {currentStack _ oldStack; firstRed _ lastShift _ lastRed _ NIL}; Play1TokenWorth: PROC RETURNS[accepted: BOOLEAN] = BEGIN DO SELECT firstRed.case FROM reduction => BEGIN showReduce[firstRed.rule, firstRed.firstCharPosition, firstRed.length]; firstRed _ firstRed.next; IF firstRed = NIL THEN lastShift _ lastRed _ NIL; END; shift => BEGIN IF firstRed.tokenCase = generic THEN showGenericShift[firstRed.code, firstRed.tokenKind, firstRed.text, firstRed.firstCharPosition]; oldStack _ firstRed.stack; firstRed _ firstRed.next; IF firstRed = NIL THEN lastShift _ lastRed _ NIL; RETURN[FALSE]; END; accept => BEGIN firstRed _ firstRed.next; IF firstRed = NIL THEN lastShift _ lastRed _ NIL; RETURN[TRUE]; END; ENDCASE => ERROR; ENDLOOP; END; ResetToShift: PROC = BEGIN IF lastShift # NIL THEN {lastRed _ lastShift; lastRed.next _ NIL; currentStack _ lastRed.stack} ELSE {lastRed _ NIL; currentStack _ oldStack}; END; Reset1TokenWorth: PROC = BEGIN lastRed _ lastShift; lastShift _ lastRed.previous; ResetToShift[]; END; ResetNTokensWorth: PROC[n: CARDINAL] = {FOR I: CARDINAL IN [0..n) DO Reset1TokenWorth[] ENDLOOP}; -- now we deal with delivery of tokens. this mechanism allows back up and replay of tokens without any assumptions about the ultimate source of tokens firstToken: SourceTokenCell _ NIL; currentToken: SourceTokenCell _ NIL; lastToken: SourceTokenCell _ NIL; GetToken: PROC RETURNS[TokenKind, ROPE, INT] = BEGIN t: SourceTokenCell; IF currentToken = NIL THEN BEGIN currentToken _ NEW[SourceTokenCellBody]; [currentToken.kind, currentToken.text, currentToken.position] _ getSourceToken[]; IF firstToken # NIL THEN lastToken.next _ currentToken ELSE firstToken _ currentToken; lastToken _ currentToken; END; t _ currentToken; currentToken _ currentToken.next; RETURN[t.kind, t.text, t.position]; END; ResetTokens: PROC = {currentToken _ firstToken}; DeleteOneToken: PROC = BEGIN IF currentToken = firstToken THEN currentToken _ firstToken.next; firstToken _ firstToken.next; END; -- syntactic error recovery involves trying the deletion of some incomming tokens and the insertion of other plausable tokens. The following data records current best such attempt as well as the currently explored attempt. currentInsertions: TrialTokenCell _ NIL; bestInsertions: TrialTokenCell _ NIL; bestDeletions: CARDINAL _ 0; bestScore: INT _ 0; BackUpOneTrialInsertion: PROC = {currentInsertions _ currentInsertions.next}; AddOneTrialInsertion: PROC[kind: TokenKind, text: ROPE] = BEGIN cell: TrialTokenCell _ NEW[TrialTokenCellBody_[kind, text, currentInsertions]]; currentInsertions _ cell; END; RecordTrialScore: PROC[nDeletions: INT, score: INT] = BEGIN IF score > bestScore THEN BEGIN bestInsertions _ currentInsertions; bestDeletions _ nDeletions; bestScore _ score; END; END; RecordFoundFix: PROC[nDeletions: INT] = BEGIN bestInsertions _ currentInsertions; bestDeletions _ nDeletions; bestScore _ DesiredSuccesses; END; ResetTrials: PROC = BEGIN currentInsertions _ bestInsertions _ NIL; bestDeletions _ bestScore _ 0; END; PlayTrialInsertions: PROC[for: PROC[TokenKind, ROPE]] = BEGIN SubPlayTrialInsertions: PROC[cell: TrialTokenCell] = BEGIN IF cell.next # NIL THEN SubPlayTrialInsertions[cell.next]; for[cell.kind, cell.text]; END; IF bestInsertions # NIL THEN SubPlayTrialInsertions[bestInsertions]; END; ExploreForBestRecovery: PROC[nInsertsSoFar: INT, tryNDeletions: INT] = BEGIN -- first see if we can get away by trying an appropriate number of deletions without any insertions FOR I: INT IN [0..tryNDeletions) DO [] _ GetToken[] ENDLOOP; FOR I: INT IN [0..DesiredSuccesses) DO tKind: TokenKind; tText: ROPE; tPosition: INT; [tKind, tText, tPosition] _ GetToken[]; SELECT FullTryToken[tKind, tText, tPosition] FROM shift => IF I+1 # DesiredSuccesses THEN LOOP ELSE -- we parsed ahead as far as we wanted, go with this {RecordFoundFix[tryNDeletions]; ResetNTokensWorth[I+1]; FoundAFix[]}; accept => {RecordFoundFix[tryNDeletions]; ResetNTokensWorth[I+1]; FoundAFix[]}; fail => BEGIN -- we couldn't parse as many input tokens as we wanted IF I >= MinSuccesses THEN -- but we got enough to record RecordTrialScore[tryNDeletions, I]; ResetNTokensWorth[I]; EXIT; END; ENDCASE => ERROR; ENDLOOP; -- success loop -- we didn't parse ahead as far as we wanted, so reset ResetTokens[]; --now try each plausable inserted token, but with one fewer deletions IF tryNDeletions > 0 THEN BEGIN SeeOneTokenToInsert: PROC[kind: TokenKind, text: ROPE] = BEGIN AddOneTrialInsertion[kind, text]; SELECT FullTryToken[kind, text, 0] FROM shift => BEGIN ExploreForBestRecovery[nInsertsSoFar+1, tryNDeletions-1]; Reset1TokenWorth[]; END; fail => NULL; accept => ResetToShift[]; ENDCASE => ERROR; BackUpOneTrialInsertion[]; END; GenPlausableInsertedTokens[table, currentStack.state, SeeOneTokenToInsert]; -- this might generate FoundAFix, which will be caught by our ultimate caller END; END; -- following is the main loop that drives the parser successfulTrys: CARDINAL _ 0; DO kind: TokenKind; text: Rope.ROPE; position: INT; [kind, text, position] _ GetToken[]; SELECT FullTryToken[kind, text, position] FROM shift => BEGIN IF successfulTrys = 0 THEN successfulTrys _ 1 ELSE BEGIN -- two source tokens in a row have worked, so show first to caller IF Play1TokenWorth[] THEN ERROR; -- can't produce an acceptance DeleteOneToken[]; -- from the saved input tokens END; END; accept => BEGIN IF Play1TokenWorth[] THEN {IF successfulTrys = 1 THEN ERROR ELSE RETURN[TRUE]}; IF successfulTrys # 1 THEN ERROR; IF Play1TokenWorth[] THEN RETURN[TRUE]; ERROR; -- one of them should have produced an acceptance END; fail => BEGIN -- first forget what we have seen successfulTrys _ 0; ResetStack[]; ResetTokens[]; ResetTrials[]; -- now conduct an exploration for a replacement FOR diag: INT IN [0..MaxSkip] DO ExploreForBestRecovery[0, diag ! FoundAFix => -- found a replacement that results in acceptance or good match BEGIN -- we could try to replay the current stack, but it is easier to program using the code for "go with the best". EXIT END] ENDLOOP; -- didn't not find a replacement that resulted in acceptance or good match -- so, go with the best -- (or, did find one, and are defaulting to this code) BEGIN reportStream: IO.STREAM _ GetReportStream[]; msgStream: IO.STREAM _ IO.ROS[]; nInsertions: CARDINAL _ 0; ReportOneInsertion: PROC[kind: TokenKind, text: Rope.ROPE] = {IO.PutF[msgStream, " %g", IO.rope[text]]}; TryOneTrialInsertion: PROC[kind: TokenKind, text: Rope.ROPE] = BEGIN nInsertions _ nInsertions+1; SELECT FullTryToken[kind, text, 0] FROM shift, accept => RETURN; fail => ERROR; ENDCASE; END; ResetStack[]; ResetTokens[]; IO.PutF[msgStream, "\N\NSyntaxError at %g\N", IF firstToken=NIL THEN IO.rope[""] ELSE IO.int[firstToken.position]]; IF bestDeletions # 0 THEN IO.PutF[msgStream, "\Tdeleting"]; FOR I: CARDINAL IN [0..bestDeletions) DO text: Rope.ROPE; [, text, ] _ GetToken[]; IO.PutF[msgStream, " %g", IO.rope[text]]; ENDLOOP; ResetTokens[]; IO.PutF[msgStream, "\Tinserting"]; PlayTrialInsertions[ReportOneInsertion]; IO.PutF[msgStream, "\N\N"]; IO.PutF[reportStream, "%g", IO.rope[IO.RopeFromROS[msgStream]]]; IF bestScore < MinSuccesses THEN UnRecoverableSyntaxError[]; FOR I: CARDINAL IN [0..bestDeletions) DO DeleteOneToken[] ENDLOOP; PlayTrialInsertions[TryOneTrialInsertion]; FOR I: CARDINAL IN [0..nInsertions) DO IF Play1TokenWorth[] THEN {IF I = (nInsertions - 1) THEN RETURN[TRUE] ELSE ERROR}; ENDLOOP; successfulTrys _ 0; END; END; ENDCASE => ERROR; ENDLOOP; END; GenPlausableInsertedTokens: PROC[table: ParserTable, state: State, for: PROC[TokenKind, ROPE]] = BEGIN -- we try each possible terminal token, and generate those that result in a non NIL actionCell -- eventually we want to control the order in which the terminal tokens are tried FOR cellX: CARDINAL IN [0..table.tokenTable.nSlots) DO FOR trialTokenCell: TokenCell _ table.tokenTable.tokens[cellX], trialTokenCell.next WHILE trialTokenCell # NIL DO actionCell: ActionCell _ LookUpAction[table.actionTable, state, trialTokenCell.symbolCode]; IF actionCell # NIL THEN BEGIN text: Rope.ROPE _ IF trialTokenCell.case = unique THEN trialTokenCell.spelling ELSE SELECT trialTokenCell.kind FROM tokenID => "id", tokenDECIMAL, tokenOCTAL, tokenHEX, tokenREAL => "num", tokenROPE => "rope", tokenCHAR => "char", tokenATOM => "atom", tokenEOF => "eof", ENDCASE => ERROR; for[trialTokenCell.kind, text]; END; ENDLOOP; ENDLOOP; END; -- temporary stuff to test kippering of parser tables ConvertParserToKippered: Commander.CommandProc = TRUSTED BEGIN commandLineStream: STREAM _ RIS[cmd.commandLine]; rootName: ROPE; sourceStream: STREAM; table: ParserTable; kipperer: KipperSupport.Kipperer; outStream: STREAM; rootName _ GetTokenRope[commandLineStream, IO.IDProc ! EndOfStream => {rootName _ NIL; CONTINUE}].token; IO.Close[commandLineStream]; sourceStream _ FS.StreamOpen[Rope.Cat[rootName, ".parseTables"]]; table _ BuildParserTableFromStreamFallBack[sourceStream]; IO.Close[sourceStream]; outStream _ FS.StreamOpen[Rope.Cat[rootName, ".kipperedTables"], create]; kipperer _ KipperSupport.CreateKipperer[outStream]; NewParserData.KipperParserTable[kipperer, table]; IO.Close[outStream]; END; Commander.Register["///Commands/ConvertParserToKippered", ConvertParserToKippered]; END.. ¶Following two procedures are used for building parser tables from info recorded on files. BuildParserTableFromStreamFallBack uses the old style tables, and BuildParserTableFromStream tries to use the new kippered format. It does so in a very awkward manner that will eventually be replaced. This awkwardness is designed to support old interfaces that are used by machine generated code. BuildParserTableFromStream assumes that it has been handed a stream backed by the old format file, it tries to find a new format file of a later date and if successful uses the new format file. One fix would be to do away with the old format entirely, but I am not sure I will take that route. results in a shift, a single reduction, an acceptance, or a failure repeats calls on SimpleTryToken, so that all reductions are done, up to and including a shift, acceptance, or failure. plays successive reductions up to and including a shift or acceptance ʘJšœ˜J˜2J˜+J˜˜ Jšœ˜Jšœ/˜/J˜Jšœ¿˜¿J˜J˜.—J˜MJ˜J˜J˜J˜¢J˜J˜J˜J˜ J˜—J˜˜(J˜J˜4J˜2J˜DJ˜qJ˜8˜@J˜:——J˜˜OJ˜J˜J˜J˜:J˜)J˜J˜—J˜˜@J˜!J˜4J˜2J˜DJ˜qJ˜8J˜(J˜˜(J˜"—˜:J˜3—˜8J˜0—˜JJ˜L—˜wJ˜t—˜>J˜9—J˜PJ˜¢J˜J˜—J˜˜HJ˜>—˜Jšœ­™­—˜JJ˜˜J˜J˜$J˜%J˜&J˜=J˜%˜ J˜šœ.˜.J˜,—˜ J˜1˜ J˜J˜ J˜J˜$J˜(J˜—˜(J˜J˜J˜J˜—˜˜J˜+—˜J˜J˜*J˜.J˜!J˜—˜J˜J˜*J˜+J˜ J˜—˜J˜J˜#J˜J˜&J˜$J˜—˜J˜J˜#J˜J˜.J˜(J˜&J˜>J˜—˜J˜J˜#J˜J˜J˜—˜J˜#—J˜!J˜——J˜—J˜—J˜"J˜—J˜˜IJ˜Jšœ˜Jšœ˜J˜7˜J˜ šœ2˜2J˜—JšœH˜HJ˜Jšœ˜Jšœ&˜&Jšœ˜Jšœ%˜%J˜J˜J˜J˜Jšœ3˜3Jšœ7˜7J˜J˜wJ˜Jšœq˜qJ˜šœ#˜#J˜—J˜Jšœ:˜:J˜šœL˜LJ˜Jšœ˜J˜ J˜—J˜Jšœ5˜5J˜Jšœ<˜J˜1J˜J˜,J˜'J˜—J˜J˜—J˜˜hJ˜J˜4J˜1˜Y˜?J˜—J˜—J˜ J˜J˜—˜YJ˜$J˜—J˜J˜J˜J˜,˜!J˜J˜J˜˜J˜J˜ J˜—˜ J˜J˜ J˜J˜J˜*J˜A—J˜˜ J˜ J˜—J˜ J˜J˜—J˜J˜5J˜J˜J˜˜'J˜J˜J˜—J˜J˜—J˜ J˜J˜‚J˜J˜;˜(J˜J˜J˜)J˜—J˜J˜J˜J˜J˜"J˜(J˜J˜J˜@J˜J˜