PGSFormat.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Satterthwaite, October 16, 1985 5:38:49 pm PDT
Maxwell, August 9, 1983 9:38 am
Wyatt, March 29, 1984 2:45:26 pm PST
Russ Atkinson (RRA) March 19, 1985 9:57:15 am PST
DIRECTORY
PGSConDefs: TYPE,
PGSTypes: TYPE,
Rope: TYPE USING [Fetch, FromProc, ROPE];
PGSFormat: PROGRAM
IMPORTS PGSConDefs, Rope
EXPORTS PGSConDefs = {
OPEN PGSConDefs;
shared global state (set by Format, used by PrintGrammar)
sInfIndex: CARDINAL;
aliasIndex, lastTerminal: CARDINAL;
symString, aliasText: PGSTypes.SymTab;
sInfo: PGSTypes.SInfo;
pInfo: PGSTypes.PInfo;
rhsText: PGSTypes.RhsChar;
grammar extraction and source rewriting
leftIndex, symIndex, nextProd, nextRhsChar, topSymbol: CARDINAL;
rule, nextRule: CARDINAL;
KeyWord: TYPE = {table, type, export, goal, terminals, aliases, productions};
text: Rope.ROPE = "TABLETYPEEXPORTSGOALTERMINALSALIASESPRODUCTIONS";
textKeys: ARRAY KeyWord OF RECORD[index, len: CARDINAL] =
[[0,5], [5,4], [9,7], [16,4], [20,9], [29,7], [36,11]];
Error: PROC = {
rhsText ← NIL;
sInfo ← NIL; pInfo ← NIL;
symString ← NIL; aliasText ← NIL;
ERROR PGSFail[]};
Directive: PROC[key: KeyWord] RETURNS[BOOL] = {
IF symIndex-leftIndex-1 # textKeys[key].len OR symString[symIndex-1] # ': THEN
RETURN[FALSE];
FOR i: CARDINAL IN [0..textKeys[key].len) DO
IF symString[leftIndex+i] # text.Fetch[textKeys[key].index+i] THEN RETURN[FALSE]
ENDLOOP;
RETURN[TRUE]};
GetItem: PROC RETURNS[Rope.ROPE] = {
i: NAT; p: SAFE PROC RETURNS[c: CHAR] ~ TRUSTED { c ← symString[i]; i ← i+1 };
GetText[]; i ← leftIndex;
RETURN[Rope.FromProc[symIndex-leftIndex, p]]};
hashChain: PGSTypes.HashHeadsRef ← NIL;
FindText: PROC RETURNS[CARDINAL] = {
h, i, j, k: CARDINAL;
h ← (256*(symIndex-leftIndex)+symString[leftIndex].ORD) MOD hashChain^.LENGTH;
j ← hashChain[h];
WHILE j#0 DO
IF symIndex-leftIndex = sInfo[j+1].symPtr-sInfo[j].symPtr THEN { -- same length
i ← sInfo[j].symPtr;
FOR k IN [leftIndex..symIndex) DO
IF symString[k]#symString[i] THEN EXIT;
i ← i+1;
REPEAT
FINISHED => {symIndex ← leftIndex; RETURN[j]};
ENDLOOP};
j ← sInfo[j].link;
ENDLOOP;
new symbol
sInfo[sInfIndex] ← [leftIndex,hashChain[h],0];
hashChain[h] ← sInfIndex; sInfIndex ← sInfIndex+1;
sInfo[sInfIndex].symPtr ← symIndex;
IF sInfIndex=sInfo.length THEN
sInfo ← ExpandSInfo[sInfo, sInfo.length/8];
RETURN[sInfIndex-1]};
Formatter: PROC RETURNS[tableId, typeId, exportId: Rope.ROPENIL] = {
DoTerminals: PROC ~ {
symIndex ← leftIndex;
DO
GetText[];
IF symString[symIndex-1] = ': AND (Directive[aliases] OR Directive[productions])
THEN EXIT;
[] ← FindText[];
ENDLOOP
};
DoAliases: PROC ~ {
append: PROC[char: CHAR] ~ {
IF aliasIndex=aliasText.length THEN
aliasText ← ExpandSymTab[aliasText, aliasText.length/8];
aliasText[aliasIndex] ← char; aliasIndex ← aliasIndex+1;
};
symIndex ← leftIndex;
DO
GetText[];
IF Directive[productions] THEN EXIT;
FOR i: CARDINAL IN [leftIndex..symIndex) DO append[symString[i]] ENDLOOP;
append[' ]; symIndex ← leftIndex;
GetText[];
FOR i: CARDINAL IN [leftIndex..symIndex) DO append[symString[i]] ENDLOOP;
append[' ]; symIndex ← leftIndex;
ENDLOOP;
};
DoProductions: PROC ~ {
chain: BOOL;
symIndex ← leftIndex; nextProd ← 1; nextRhsChar ← 0;
GetText[]; topSymbol ← FindText[];
DO -- exit from this loop on EndOfFIle, topSymbol distinguishes cases
GetText[];
IF symString[leftIndex] = ': AND symString[leftIndex+1] = ':
AND symString[leftIndex+2] = '= THEN {
i, oldi: CARDINAL ← 0;
IF symIndex-leftIndex=4 AND symString[leftIndex+3]='C THEN chain ← TRUE
ELSE IF symIndex-leftIndex=3 THEN chain ← FALSE ELSE GOTO notlhs;
symIndex ← leftIndex;
pInfo[nextProd] ← [rule,chain,0,nextRhsChar];
IF (i ← sInfo[topSymbol].lhsHead)=0 THEN sInfo[topSymbol].lhsHead ← nextProd ELSE {
WHILE i#0 DO oldi ← i; i ← pInfo[i].link ENDLOOP;
pInfo[oldi].link ← nextProd};
nextProd ← nextProd+1;
IF nextProd=pInfo.length THEN
pInfo ← ExpandPInfo[pInfo, pInfo.length/8];
topSymbol ← 0; GetText[]; topSymbol ← FindText[];
LOOP
EXITS notlhs => NULL};
rhsText[nextRhsChar] ← topSymbol; nextRhsChar ← nextRhsChar+1;
IF nextRhsChar = rhsText.length THEN
rhsText ← ExpandRhsChar[rhsText, rhsText.length/8];
topSymbol ← FindText[];
ENDLOOP
};
outstring["-- file "]; outstring[sourceName];
outstring[" rewritten by PGS, "]; outtime[]; outeol[1];
ScanInit[];
DO
GetText[];
IF Directive[$table] THEN tableId ← GetItem[]
ELSE IF Directive[$type] THEN typeId ← GetItem[]
ELSE IF Directive[$export] THEN exportId ← GetItem[]
ELSE EXIT;
ENDLOOP;
IF Directive[$goal] THEN {symIndex ← 0; GetText[]; sInfIndex ← 1; [] ← FindText[]}
ELSE Error[];
GetText[];
IF Directive[$terminals] THEN DoTerminals[];
lastTerminal ← sInfIndex-1;
aliasIndex ← 0;
IF Directive[$aliases] THEN DoAliases[];
IF Directive[$productions] THEN DoProductions[ ! EndOfFile => CONTINUE]
ELSE Error[]};
Format: PUBLIC PROC RETURNS[table, type, export: Rope.ROPE] = {
nextRule ← 0; symIndex ← 0;
hashChain ← NEW[PGSTypes.HashHeads ← ALL[0]];
rhsText ← MakeRhsChar[maxRhsSymbols+1];
sInfo ← MakeSInfo[symTabSize+1];
pInfo ← MakePInfo[maxProd+1];
symString ← MakeSymTab[500];
aliasText ← MakeSymTab[100];
[tableId: table, typeId: type, exportId: export] ← Formatter[! UNWIND => hashChain ← NIL];
hashChain ← NIL;
IF topSymbol#0 THEN {
rhsText[nextRhsChar] ← topSymbol; nextRhsChar ← nextRhsChar+1};
sInfo[sInfIndex].symPtr ← symIndex;
pInfo[nextProd].rhsPtr ← nextRhsChar};
text input routines
char: CHAR; -- current (most recently scanned) character
EndOfFile: SIGNAL = CODE;
GetText: PROC = {
c: CHAR;
WHILE char IN ['\000..' ] DO
IF char='\032--^Z-- THEN
WHILE char#'\n DO outchar[char,1]; NextChar[]; ENDLOOP;
outchar[char,1];
IF char='\n THEN {
WHILE char IN ['\000..' ] DO NextChar[]; outchar[char,1] ENDLOOP;
c ← char; NextChar[]; outchar[char,1];
IF c#char OR c#'- THEN {NextChar[]; FindHeader[]}};
NextChar[];
ENDLOOP;
leftIndex ← symIndex;
WHILE char NOT IN ['\000..' ] DO
outchar[char,1];
IF symIndex=symString.length THEN
symString ← ExpandSymTab[symString, symString.length/8];
symString[symIndex] ← char; symIndex ← symIndex+1; NextChar[];
ENDLOOP
};
FindHeader: PROC = {
bIndex, i, k: CARDINAL;
maxLength: CARDINAL = 2000;
buffer: REF TEXTNEW[TEXT[maxLength]]; -- line assembly area
BufferOverflow: ERROR = CODE;
PutChar: PROC = {
IF bIndex = maxLength THEN ERROR BufferOverflow;
buffer[bIndex] ← char; bIndex ← bIndex+1; NextChar[]};
DO {
bIndex ← 0;
WHILE char IN ['\000..' ] DO
IF char = '\n OR char = '\032 --^Z-- THEN GOTO copyline; PutChar[];
ENDLOOP;
IF char NOT IN ['0..'9] AND char # query THEN GOTO copyline; PutChar[];
WHILE char IN ['0..'9] DO PutChar[] ENDLOOP;
WHILE char IN ['\000..' ] DO
IF char='\n OR char='\032 --^Z-- THEN GOTO copyline; PutChar[];
ENDLOOP;
IF char # '= THEN GOTO copyline; PutChar[];
IF char # '> THEN GOTO copyline; PutChar[];
WHILE char IN ['\000..' ]
DO IF char='\n OR char='\032 --^Z-- THEN GOTO copyline; PutChar[];
ENDLOOP;
IF char # '- THEN GOTO copyline; PutChar[];
IF char # '- THEN GOTO copyline;
FOR i ← bIndex-1, i-1 WHILE buffer[i] # '= DO NULL ENDLOOP;
k ← 0;
FOR j: CARDINAL IN [0..i) DO k ← IF buffer[j] = '\t THEN k+8 ELSE k+1 ENDLOOP;
outnum[nextRule,k-2]; rule ← nextRule; nextRule ← nextRule+1;
outchar[' ,2]; FOR j: CARDINAL IN [i..bIndex) DO outchar[buffer[j],1] ENDLOOP;
outchar['-,1];
RETURN
EXITS
copyline => {
FOR i: CARDINAL IN [0..bIndex) DO outchar[buffer[i],1] ENDLOOP;
outchar[char,1];
WHILE char # '\n DO NextChar[]; outchar[char,1] ENDLOOP;
NextChar[]}};
ENDLOOP
};
NextChar: PROC = {
ended: BOOL;
[char, ended] ← inchar[];
IF ended THEN SIGNAL EndOfFile[]};
ScanInit: PROC = {
[char, ] ← inchar[];
FindHeader[]; NextChar[]};
grammar output
PrintGrammar: PUBLIC PROC = {
i, p, s, listIndex: CARDINAL;
list: PGSTypes.SInfo;
PrintToken: PROC[i: CARDINAL] RETURNS[length: CARDINAL𡤀] = {
FOR j: CARDINAL IN [sInfo[i].symPtr..sInfo[i+1].symPtr) DO
outchar[symString[j],1]; length ← length+1 ENDLOOP;
RETURN};
PrintSymbol: PROC[i: CARDINAL] = {
outnum[s, 3]; s ← s+1; outchar[' , 2]; [] ← PrintToken[i]; outeol[1]};
PrintProd: PROC[i, p: CARDINAL, first: BOOL] = {
outnum[s,3]; s ← s+1;
outstring[IF pInfo[p].chain THEN " C " ELSE " "];
outnum[pInfo[p].rule,3]; outchar[' ,2];
outchar[' ,tokenSize-(IF first THEN PrintToken[i] ELSE 0)];
outstring[IF first THEN " ::= " ELSE " | "];
FOR j: CARDINAL IN [pInfo[p].rhsPtr..pInfo[p+1].rhsPtr) DO
[] ← PrintToken[rhsText[j]]; outchar[' , 1] ENDLOOP;
outeol[1]};
outstring["-- grammar extracted from "]; outstring[sourceName];
outstring[" by PGS, "]; outtime[]; outeol[2];
outstring["||CHAIN ||LISTS\n\n"];
outstring["||TABLE1\n"]; s ← 1;
IF lastTerminal=1 THEN
FOR i IN [2..sInfIndex) DO IF sInfo[i].lhsHead=0 THEN PrintSymbol[i] ENDLOOP
ELSE
FOR i IN [2..lastTerminal] DO PrintSymbol[i] ENDLOOP;
outnum[s, 3]; s ← s+1; outstring[" eof\n\n"];
outstring["||TABLE2\n"];
PrintSymbol[1]; p ← 1;
FOR i IN (lastTerminal..sInfIndex) DO
IF sInfo[i].lhsHead#0 THEN {PrintSymbol[i]; p ← p+1} ENDLOOP;
IF aliasIndex # 0 THEN {
state: {init, id1, sp, id2} ← init;
nc: CARDINAL ← 0;
outstring["\n\n||TABLE3\n"];
FOR i IN [0..aliasIndex) DO
c: CHAR = aliasText[i];
IF c # ' THEN {
outchar[c, 1]; nc ← nc+1;
state ← SELECT state FROM
init => id1, sp => id2, ENDCASE => state}
ELSE SELECT state FROM
id1 => {outchar[' , tokenSize-nc]; nc ← 0; state ← sp};
id2 => {outeol[1]; nc ← 0; state ← init};
ENDCASE;
ENDLOOP;
IF state # init THEN outeol[1]};
outstring["\n\n||TABLE4\n\n"]; s ← 1;
list ← MakeSInfo[p];
p ← sInfo[1].lhsHead; list[0] ← [1,pInfo[p].rule,p]; listIndex ← 1;
FOR i IN (lastTerminal..sInfIndex) DO
IF (p ← sInfo[i].lhsHead)#0 THEN {
list[listIndex] ← [i,pInfo[p].rule,p]; listIndex ← listIndex+1};
ENDLOOP;
FOR i DECREASING IN [0..list.length) DO
noSwap: BOOLTRUE;
FOR p IN [0..i) DO
IF list[p].link>list[p+1].link THEN {
t: PGSTypes.SInfoRec ← list[p];
list[p] ← list[p+1]; list[p+1] ← t; noSwap ← FALSE};
ENDLOOP;
IF noSwap THEN EXIT;
ENDLOOP;
FOR i IN [0..list.length) DO
p ← list[i].lhsHead; PrintProd[list[i].symPtr,p,TRUE]; p ← pInfo[p].link;
WHILE p#0 DO PrintProd[0,p,FALSE]; p ← pInfo[p].link ENDLOOP;
outeol[1];
ENDLOOP;
list ← NIL; rhsText ← NIL;
sInfo ← NIL; pInfo ← NIL;
symString ← NIL; aliasText ← NIL};
}.