PGSTab.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Satterthwaite, October 18, 1985 10:24:14 am PDT
Maxwell, August 10, 1983 10:39 am
Wyatt, March 20, 1984 4:48:28 pm PST
Russ Atkinson (RRA) March 19, 1985 10:03:52 am PST
DIRECTORY
PGSConDefs: TYPE,
PGSTypes: TYPE,
Rope: TYPE USING [ROPE];
PGSTab: PROGRAM
IMPORTS PGSConDefs
EXPORTS PGSConDefs = {
OPEN PGSConDefs;
linewidth, hashval, lastntstate, vocabspace, nlim, tlim: CARDINAL;
offset: CARDINAL;
hashtab: PGSTypes.HashTab;
ttab: PGSTypes.TTab;
ntab: PGSTypes.NTab;
packedProdInfo: BOOL;
statedata: PGSTypes.StateData;
ntdefaults: PGSTypes.NTDefaults;
column: PGSTypes.Column;
renumber: PGSTypes.Renumber;
vocabindex: PGSTypes.VocabIndex;
scantab: ARRAY [40b..177b] OF CARDINAL;
TabGen: PUBLIC PROC[prefix, suffix: PROC] RETURNS[success: BOOLTRUE] = {
Error: PROC[sel: CARDINAL] = {
outstring["\nFAIL - "];
outstring[SELECT sel FROM
1 => "more than 255 terminal symbols",
2 => "more than 254 nonterminal symbols",
3 => "more than 2047 states or productions",
4 => "more than 15 symbols in a production right part",
ENDCASE => "PGS error"];
outeol[1]; success ← FALSE};
IF eofMark > 255 THEN Error[1];
IF totalTokens-eofMark+1 > 255 THEN Error[2];
IF pssLim > 2047 THEN Error[3];
IF rhsLim > 15 THEN Error[4];
IF ~success THEN RETURN [success];
ntab ← MakeTab[ntEntries];
ttab ← MakeTab[tEntries];
packedProdInfo ← (numRules <= 255);
statedata ← MakeStateData[sLim];
ntdefaults ← MakeNTDefaults[totalTokens-eofMark+2];
column ← MakeColumn[ntEntries+1];
TableSetup[]; closewordstream[]; SquashNTab[];
outstring["\nParse Table Data\n"];
IF flags[printLALR] THEN {PrintNDef[]; TableOverlays[]};
vocabindex ← MakeVocabIndex[eofMark+1];
hashval ← 3*eofMark/2;
hashval ← MIN[251, IF hashval MOD 2 = 0 THEN hashval+1 ELSE hashval];
hashtab ← MakeHashTab[hashval+1];
HashTabSetup[];
start output file
openwordstream[scratch:FALSE];
IF prefix # NIL THEN prefix[];
output scanner table relative offsets (see table output below)
offset ← 14; -- number of tables
outword[offset]; offset ← offset + 96;
outword[offset]; offset ← offset + (hashval+1);
outword[offset]; offset ← offset + (eofMark+1);
outword[offset]; offset ← offset + (2 + (vocabspace+cpw-1)/cpw);
output parser table relative offsets (see table output below)
StatePartition[]; -- compute lastntstate
outword[offset]; offset ← offset + (numProd+1)*(IF packedProdInfo THEN 1 ELSE 2);
outword[offset]; offset ← offset + (lastntstate+1);
outword[offset]; offset ← offset + (lastntstate+1);
outword[offset]; offset ← offset + nlim;
outword[offset]; offset ← offset + nlim;
outword[offset]; offset ← offset + (totalTokens-eofMark+2);
outword[offset]; offset ← offset + sLim;
outword[offset]; offset ← offset + sLim;
outword[offset]; offset ← offset + tlim;
outword[offset];
output scanner tables
outblock[scantab.BASE, 96];
outblock[LOOPHOLE[hashtab], hashval+1, PGSTypes.HashTabSeq[0].SIZE];
outblock[LOOPHOLE[vocabindex], eofMark+1, PGSTypes.CardinalsSeq[0].SIZE];
outword[vocabspace]; outword[vocabspace];
outblock[LOOPHOLE[symTab], (vocabspace+cpw-1)/cpw, PGSTypes.SymTabSeq[0].SIZE];
hashtab ← NIL; vocabindex ← NIL;
symInfo ← NIL;
renumber ← MakeRenumber[sLim];
StateRenumber[]; --now output parser tables
OutArray[1,numProd, IF packedProdInfo THEN ProdDataItem1 ELSE ProdDataItem2];
OutArray[1,lastntstate, NStateItem];
OutArray[1,lastntstate, NLenItem];
OutArray[0,nlim-1, NSymItem];
OutArray[0,nlim-1, NActionItem];
OutArray[2,totalTokens-eofMark+1, NTDefaultItem];
OutArray[1,sLim-1, TStateItem];
OutArray[1,sLim-1, TLenItem];
OutArray[0,tlim-1, TSymItem];
OutArray[0,tlim-1, TActionItem];
prodInfo ← NIL; ntab ← NIL;
ntdefaults ← NIL; renumber ← NIL;
statedata ← NIL; ttab ← NIL;
IF suffix # NIL THEN suffix[]};
TableSetup: PROC = {
defaultitem, item: PGSTypes.ItemRec;
defaultprod, i, j, k, symbol, ttix, ntix, six, index, ptr: CARDINAL;
ColumnEntry: PROC = {
j: INTEGER; lastptr: CARDINAL;
lastptr ← 0; ptr ← ntdefaults[symbol].count;
DO
IF ptr=0 THEN GOTO insert;
j ← item.jf-column[ptr].item.jf; IF j=0 THEN j ← item.pss-column[ptr].item.pss;
IF j=0 THEN GOTO countup;
IF j<0 THEN GOTO insert;
lastptr ← ptr; ptr ← column[ptr].link;
REPEAT
countup => column[ptr].count ← column[ptr].count+1;
insert => {
column[index].item ← item;
IF lastptr=0 THEN { --head of list for column "symbol"
column[index].link ← ntdefaults[symbol].count;
ntdefaults[symbol].count ← index}
ELSE {
column[index].link ← column[lastptr].link;
column[lastptr].link ← index};
index ← index+1};
ENDLOOP
};
ntix ← ttix ← 0; index ← 1; statedata[1] ← [0,0,0,0];
FOR six IN [1..sLim) DO
defaultprod ← inword[]; statedata[six] ← [ttix,ntix,0,0];
THROUGH [1..inword[]] DO --set up entries for state six in ttab and ntab
symbol ← inword[]; k ← inword[];
item ← [k MOD 4, k/4, inword[]];
IF item.tag=2 AND item.pss=defaultprod THEN defaultitem ← item ELSE
IF symbol<=eofMark THEN {ttab[ttix] ← [symbol, item]; ttix ← ttix+1}
ELSE {
symbol ← symbol-eofMark+1; ntab[ntix] ← [symbol, item]; ntix ← ntix+1;
ColumnEntry[]};
ENDLOOP;
IF defaultprod # 0 THEN {
ttab[ttix] ← [defaultMarker, defaultitem]; ttix ← ttix+1};
statedata[six].tLink ← ttix-statedata[six].tIndex; --length
statedata[six].ntLink ← ntix-statedata[six].ntIndex; --length
now see if these terminal entries match those of an existing state
FOR i IN [1..six) DO
k ← statedata[six].tIndex-statedata[i].tIndex;
IF statedata[six].tLink=statedata[i].tLink THEN -- same length
FOR j IN [statedata[i].tIndex..statedata[i].tIndex+statedata[i].tLink) DO
IF ttab[j]#ttab[j+k] THEN EXIT;
REPEAT FINISHED => { --there is a matching set of entries
statedata[six].tLink ← -i; -- point state six at state i
ttix ← statedata[six].tIndex; -- back up index
EXIT};
ENDLOOP;
ENDLOOP;
ENDLOOP;
tlim ← ttix;
ttab contains terminal entries; statedata[i].tindex indexes state i entries if
statedata[i].link>0, otherwise statedata[-statedata[i].link].tindex indexes them.
the following code leaves the most frequently occurring item in column "symbol"
of the lalr tables in the appropriate entry of ntdefaults
FOR symbol IN [0..totalTokens-eofMark+1] DO
IF (ptr ← ntdefaults[symbol].count)=0 THEN LOOP;
k ← 0; ntdefaults[symbol].item ← column[ptr].item;
WHILE ptr#0 DO
IF column[ptr].count>k THEN {
k ← column[ptr].count; ntdefaults[symbol].item ← column[ptr].item};
ptr ← column[ptr].link;
ENDLOOP;
ntdefaults[symbol].count ← k;
ENDLOOP
};
SquashNTab: PROC = {
i, j, k, six, ntix: CARDINAL;
nlim ← 0;
FOR six IN [1..sLim) DO
k ← nlim;
FOR ntix IN [statedata[six].ntIndex..statedata[six].ntIndex+statedata[six].ntLink) DO
IF ntab[ntix].item # ntdefaults[ntab[ntix].symbol].item THEN {
ntab[nlim] ← ntab[ntix]; nlim ← nlim+1};
ENDLOOP;
statedata[six].ntIndex ← k; k ← nlim-k; statedata[six].ntLink ← k;
IF k#0 THEN FOR i IN [1..six) DO --do these entries match those of an existing state
k ← statedata[six].ntIndex-statedata[i].ntIndex;
IF statedata[six].ntLink=statedata[i].ntLink THEN -- same length
FOR j IN [statedata[i].ntIndex..statedata[i].ntIndex+statedata[i].ntLink) DO
IF ntab[j]#ntab[j+k] THEN EXIT;
REPEAT FINISHED => { --there is a matching set of entries
statedata[six].ntLink ← -i; -- point state six at state i
nlim ← statedata[six].ntIndex; -- back up index
EXIT};
ENDLOOP;
ENDLOOP;
ENDLOOP
};
PrintNDef: PROC = {
j: CARDINAL; item: PGSTypes.ItemRec;
outstring["\nNonterminal Default Actions\n"]; linewidth ← 0; j ← 0;
FOR i: CARDINAL IN [2..totalTokens-eofMark+1] DO
IF (linewidth ← linewidth+tokenSize+8)>outbufLim THEN {
outeol[1]; linewidth ← tokenSize+8};
item ← ntdefaults[i].item; j ← j+ntdefaults[i].count;
outnum[IF item.tag=0 THEN item.pss ELSE -item.pss, 5]; outchar[' ,1];
outchar[' , tokenSize+2-OutToken[i+eofMark-1]];
ENDLOOP;
outstring["\nEntries removed = "]; outnum[j,5]; outeol[1]};
TableOverlays: PROC = {
j, k: INTEGER;
outstring["\nTable Overlays\n row ttab ntab\n"];
FOR i: CARDINAL IN [1..sLim) DO
j ← statedata[i].tLink; k ← statedata[i].ntLink;
IF j<0 OR k<0 THEN {
outnum[i,4];
IF j<0 THEN outnum[-j, 5] ELSE outchar[' ,5];
IF k<0 THEN outnum[-k, 5];
outeol[1]};
ENDLOOP
};
HashTabSetup: PROC = {
i, j, k, h, p, count, freeptr, code: CARDINAL;
null: CHAR = '\000;
outstring["\nScanner hashtable probe counts (terminal symbol, probecount, hashcode)\n"];
vocabspace ← vocabindex[0] ← 0; freeptr ← hashval; linewidth ← 0;
scantab ← ALL[0];
FOR i IN [1..eofMark] DO -- strip quotes, Repack, build vocabindex, scantab and hashtab
p ← i*tokenSize; k ← symInfo[i].length;
IF k=2 AND symTab[p]='' THEN {p ← p+1; k ← 1}
ELSE IF k>2 AND symTab[p]='" AND symTab[p+k-1]='" THEN {p←p+1; k←k-2};
IF k=1 AND ~(symTab[p] IN ['a..'z] OR symTab[p] IN ['A..'Z]) THEN {
scantab[CARDINAL[symTab[p]-null]] ← i;
the next two statements are redundant, they keep the new tables identical to the old
symTab[vocabspace] ← symTab[p]; vocabspace ← vocabspace+1;
vocabindex[i] ← vocabspace
}
ELSE {
FOR j IN [0..k) DO
symTab[vocabspace] ← symTab[p+j]; vocabspace ← vocabspace+1;
ENDLOOP;
vocabindex[i] ← vocabspace;
IF i=eofMark THEN LOOP;
count ← 1;
code ← h ← (((symTab[p]-null)*127+(symTab[p+k-1]-null)) MOD hashval) + 1;
IF hashtab[h].symPtr#0 THEN {
WHILE h#0 DO j ← h; h ← hashtab[h].link; count ← count+1 ENDLOOP;
WHILE hashtab[freeptr].symPtr#0 DO freeptr ← freeptr-1 ENDLOOP;
hashtab[j].link ← h ← freeptr};
hashtab[h].symPtr ← i;
IF (linewidth ← linewidth+tokenSize+8)>outbufLim THEN {
outeol[1]; linewidth ← tokenSize+8};
FOR j IN [vocabindex[i-1]..vocabindex[i]) DO outchar[symTab[j],1] ENDLOOP;
outchar[' ,tokenSize-k]; outnum[count,2];
outnum[code,4]; outchar[' ,2]};
ENDLOOP;
IF (j ← vocabspace MOD cpw)#0 THEN -- pad to word boundary
THROUGH [j..cpw) DO symTab[vocabspace] ← null ENDLOOP;
outeol[1]};
StatePartition: PROC = {
i, j: CARDINAL;
i𡤂 j←sLim-1;
DO
WHILE statedata[j].ntLink=0 AND i<=j DO j←j-1 ENDLOOP; -- j indexes an ntstate
WHILE statedata[i].ntLink#0 AND i<j DO i←i+1 ENDLOOP; -- i, a tstate unless i>=j
IF i>=j THEN EXIT
ELSE {i←i+1; j←j-1}; -- simulate renumbering
ENDLOOP;
lastntstate←j};
StateRenumber: PROC = {
i, j: CARDINAL; k: INTEGER;
swaprec: PGSTypes.StateDataRec;
i𡤂 j←sLim-1;
DO
WHILE statedata[j].ntLink=0 AND i<=j DO j←j-1 ENDLOOP; -- j indexes an ntstate
WHILE statedata[i].ntLink#0 AND i<j DO i←i+1 ENDLOOP; -- i, a tstate unless i>=j
IF i>=j THEN EXIT
ELSE {renumber[i]←j; renumber[j]←i; i←i+1; j←j-1};
if j<i now, j indexes the renumbered nstate
ENDLOOP;
IF lastntstate # j THEN ERROR;
renumber the transition entries
FOR i IN [0..tlim) DO
IF ttab[i].item.tag=0 AND (j←renumber[ttab[i].item.pss])#0 THEN ttab[i].item.pss←j;
ENDLOOP;
FOR i IN [0..nlim) DO
IF ntab[i].item.tag=0 AND (j←renumber[ntab[i].item.pss])#0 THEN ntab[i].item.pss←j;
ENDLOOP;
FOR i IN [2..totalTokens-eofMark+1] DO
IF ntdefaults[i].item.tag=0 AND (j←renumber[ntdefaults[i].item.pss])#0 THEN
ntdefaults[i].item.pss←j;
ENDLOOP;
remove links and renumber states
FOR i IN [1..sLim) DO
k ← statedata[i].tLink;
IF k<0 THEN {
statedata[i].tLink ← statedata[-k].tLink;
statedata[i].tIndex ← statedata[-k].tIndex};
k ← statedata[i].ntLink;
IF k<0 THEN {
statedata[i].ntLink ← statedata[-k].ntLink;
statedata[i].ntIndex ← statedata[-k].ntIndex};
ENDLOOP;
FOR i IN [2..lastntstate] DO
IF (j←renumber[i])#0 THEN {
swaprec ← statedata[i]; statedata[i] ← statedata[j]; statedata[j] ← swaprec};
ENDLOOP;
IF flags[printLALR] THEN {
outstring["\nRenumbered States\n"];
FOR i IN [2..lastntstate] DO
IF renumber[i]#0 THEN {
outnum[i,4]; outstring[" swapped with"]; outnum[renumber[i],4]; outeol[1]};
ENDLOOP
}
};
OutModule: PUBLIC PROC[typename, modfname: Rope.ROPE, long: BOOL] = {
i,j,k: CARDINAL;
OutRange: PROC[id, cover: Rope.ROPE, ulim: CARDINAL] = {
outchar[' ,2]; outstring[id]; outstring[": TYPE = "]; outstring[cover];
outstring["[0.."]; outnum[ulim,3]; outstring["];\n"]};
OutArray: PROC[id, index, range: Rope.ROPE] = {
outchar[' ,2]; outstring[id]; outstring[": TYPE = ARRAY "];
outstring[index]; outstring[" OF "]; outstring[range];
outchar[';,1]; outeol[1]};
OutRefType: PROC[referent: Rope.ROPE, base: BOOLFALSE] = {
outchar[' ,2]; outstring[referent]; outstring["Ref: TYPE = "];
IF long THEN outstring["LONG "];
IF base THEN outstring["BASE "];
outstring["POINTER TO "]; outstring[referent];
outchar[';,1]; outeol[1]};
outstring["-- file "]; outstring[modfname];
outstring["\n-- created by PGS from "]; outstring[sourceName];
outstring[", "]; outtime[]; outeol[2];
outstring[typename]; outstring[": DEFINITIONS = {\n\n"];
OutRange["Symbol","",maxCharCode];
OutRange["TSymbol","Symbol",eofMark];
OutRange["NTSymbol","Symbol",totalTokens-eofMark+1];
outstring["\n-- token indices for the scanner and parser\n"];
FOR i IN [0..nextAlias) DO
outchar[' ,2]; k ← aliases[i].alias*tokenSize;
FOR j IN [k..k+tokenSize) WHILE symTab[j]#0c DO outchar[symTab[j],1] ENDLOOP;
outstring[": TSymbol ="]; outnum[aliases[i].terminal,3]; outchar[';,1]; outeol[1];
ENDLOOP;
symTab ← NIL; aliases ← NIL;
outstring["\n default"];
outstring["Marker: TSymbol = TSymbol"]; outstring[".FIRST;\n"];
outstring[" end"];
outstring["Marker: TSymbol = TSymbol"]; outstring[".LAST;\n\n"];
OutRange["HashIndex","",hashval]; OutRange["VIndex","",vocabspace-1];
outstring["
VocabHashEntry: TYPE = MACHINE DEPENDENT RECORD [
symbol(0: 0..7): TSymbol, -- symbol index
link(0: 8..15): HashIndex]; -- link to next entry\n\n"];
OutRange["State","",sLim-1];
OutRange["NTState","State",lastntstate];
OutRange["TIndex","",tlim-1];
OutRange["NTIndex","",nlim-1];
OutRange["Production","",numProd];
outstring["
ActionTag: TYPE = MACHINE DEPENDENT RECORD [
reduce(0: 0..0): BOOL,  -- TRUE iff reduce entry
pLength(0: 1..4): [0..15]]; -- number of symbols in production rhs
ActionEntry: TYPE = MACHINE DEPENDENT RECORD [
tag(0: 0..4): ActionTag,  -- [FALSE,0] if a shift entry
transition(0: 5..15): [0..2047]]; -- production number / next state"];
IF packedProdInfo THEN outstring["
ProductionInfo: TYPE = MACHINE DEPENDENT RECORD [
rule(0: 0..7): [0..256),  -- reduction rule
lhs(0: 8..15): NTSymbol]; -- production lhs symbol\n\n"]
ELSE outstring["
ProductionInfo: TYPE = MACHINE DEPENDENT RECORD [
rule(0): CARDINAL, -- reduction rule
lhs(1): Symbol];  -- production lhs symbol (NTSymbol)\n\n"];
OutArray["ScanTable","CHAR['\\040..'\\177]","TSymbol"];
OutArray["HashTable","HashIndex","VocabHashEntry"];
OutArray["IndexTable","TSymbol","CARDINAL"];
outstring[" Vocabulary: TYPE = MACHINE DEPENDENT RECORD [\n"];
outstring[" length(0), maxlength(1): CARDINAL,\n"];
outstring[" text(2): PACKED ARRAY VIndex OF CHAR];\n"];
OutArray["ProdData","Production","ProductionInfo"];
OutArray["NStarts","NTState","NTIndex"];
OutArray["NLengths","NTState","CARDINAL"];
OutArray["NSymbols","NTIndex","NTSymbol"];
OutArray["NActions","NTIndex","ActionEntry"];
OutArray["NTDefaults","NTSymbol","ActionEntry"];
OutArray["TStarts","State","TIndex"];
OutArray["TLengths","State","CARDINAL"];
OutArray["TSymbols","TIndex","TSymbol"];
OutArray["TActions","TIndex","ActionEntry"]; outeol[1];
outstring[" initial"];
outstring["State: State = "]; outnum[1,1]; outchar[';,1]; outeol[1];
outstring[" final"];
outstring["State: State = "]; outnum[0,1]; outchar[';,1]; outeol[1];
outstring["
Table: TYPE = MACHINE DEPENDENT RECORD [
scanTable: RECORD [
scanTab: TableRef RELATIVE POINTER TO ScanTable,
hashTab: TableRef RELATIVE POINTER TO HashTable,
vocabIndex: TableRef RELATIVE POINTER TO IndexTable,
vocabBody: TableRef RELATIVE POINTER TO Vocabulary],
parseTable: RECORD [
prodData: TableRef RELATIVE POINTER TO ProdData,
nStart: TableRef RELATIVE POINTER TO NStarts,
nLength: TableRef RELATIVE POINTER TO NLengths,
nSymbol: TableRef RELATIVE POINTER TO NSymbols,
nAction: TableRef RELATIVE POINTER TO NActions,
ntDefaults: TableRef RELATIVE POINTER TO NTDefaults,
tStart: TableRef RELATIVE POINTER TO TStarts,
tLength: TableRef RELATIVE POINTER TO TLengths,
tSymbol: TableRef RELATIVE POINTER TO TSymbols,
tAction: TableRef RELATIVE POINTER TO TActions]];\n\n"];
OutRefType["Table", TRUE];
OutRefType["ScanTable"]; OutRefType["HashTable"]; OutRefType["IndexTable"];
OutRefType["Vocabulary"];
OutRefType["ProdData"];
OutRefType["NStarts"]; OutRefType["NLengths"];
OutRefType["NSymbols"]; OutRefType["NActions"];
OutRefType["NTDefaults"];
OutRefType["TStarts"]; OutRefType["TLengths"];
OutRefType["TSymbols"]; OutRefType["TActions"];
outstring["\n }.\n"]};
IProc: TYPE = PROC[index: CARDINAL];
OutArray: PROC[llim, ulim: CARDINAL, p: IProc] = {
THROUGH [0..llim) DO outword[0] ENDLOOP;
FOR i: CARDINAL IN [llim..ulim] DO p[i] ENDLOOP};
ProdDataItem1: IProc = {
outword[prodInfo[index].rule*256+(prodInfo[index].lhs-eofMark+1)]};
ProdDataItem2: IProc = {
outword[prodInfo[index].rule]; outword[prodInfo[index].lhs-eofMark+1]};
NStateItem: IProc = {outword[statedata[index].ntIndex]};
NLenItem: IProc = {outword[statedata[index].ntLink]};
NSymItem: IProc = {outword[ntab[index].symbol]};
NActionItem: IProc = {Repack[ntab[index].item]};
NTDefaultItem: IProc = {Repack[ntdefaults[index].item]};
TStateItem: IProc = {outword[statedata[index].tIndex]};
TLenItem: IProc = {outword[statedata[index].tLink]};
TSymItem: IProc = {outword[ttab[index].symbol]};
TActionItem: IProc = {Repack[ttab[index].item]};
Repack: PROC[item: PGSTypes.ItemRec] = {
SELECT item.tag FROM
0 => outword[item.pss];
1 => outword[item.jf*2048+item.pss];
2 => outword[(16+item.jf)*2048+item.pss];
ENDCASE
};
}.