StringCompactor.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Satterthwaite, November 2, 1982 11:51 am
Maxwell, August 10, 1983 1:03 pm
Doug Wyatt, April 2, 1984 2:30:44 pm PST
Russ Atkinson (RRA) March 19, 1985 10:05:56 am PST
DIRECTORY
Basics USING [bytesPerWord, charsPerWord],
FS USING [StreamOpen],
IO USING [card, char, Close, EndOf, GetChar, GetIndex, Put, PutChar, PutRope, rope, SetIndex, STREAM, UnsafePutBlock],
Rope USING [Fetch, Length, ROPE],
TableCommand USING [CreateBCDStream, FinishBcdStream];
StringCompactor:
PROGRAM
IMPORTS FS, IO, Rope, TableCommand
EXPORTS TableCommand = {
charsPerWord: CARDINAL = Basics.charsPerWord;
StreamIndex: TYPE = INT; -- FileStream.FileByteIndex
SyntaxError: ERROR = CODE;
CompStrDesc: TYPE = RECORD [offset, length: CARDINAL];
nArrays: CARDINAL;
nStrings: CARDINAL;
nChars: CARDINAL;
nWords: CARDINAL;
in: IO.STREAM;
SLptr: TYPE = REF SL;
SL:
TYPE =
RECORD [
link: SLptr ← NIL,
startIndex: StreamIndex ← 0,
length: CARDINAL ← 0];
ALptr: TYPE = REF AL;
AL:
TYPE =
RECORD [
link: ALptr ← NIL,
name: NL ← [0, 0],
ARRAYindex: StreamIndex ← 0,
needsIndexDef: BOOL ← FALSE,
headSL, tailSL: SLptr ← NIL,
nstrings: CARDINAL ← 0];
NL: TYPE = RECORD [startIndex: StreamIndex, length: CARDINAL];
BackUp:
PROC [s:
IO.
STREAM] = {
IO.SetIndex[s, IO.GetIndex[s] - 1]};
NextString:
PROC [s: SLptr]
RETURNS [found:
BOOL ←
TRUE] = {
nc: CARDINAL ← 0;
quoteFound, collectingChars: BOOL ← FALSE;
DO
c: CHAR;
IF IO.EndOf[in] THEN ERROR SyntaxError;
c ← in.GetChar[];
IF c = '; AND ~collectingChars THEN RETURN [FALSE];
IF c = '"
THEN
IF quoteFound
THEN
IF collectingChars THEN {quoteFound ← FALSE; nc ← nc+1} ELSE ERROR
ELSE
IF collectingChars THEN quoteFound ← TRUE
ELSE {s.startIndex ← IO.GetIndex[in]; collectingChars ← TRUE}
ELSE
IF quoteFound THEN {s.length ← nc; BackUp[in]; EXIT}
ELSE IF collectingChars THEN nc ← nc+1;
ENDLOOP;
nStrings ← nStrings+1;
nChars ← nChars + nc; nWords ← nWords + StringBody[nc].SIZE};
lastCR: StreamIndex;
AllDone: ERROR = CODE;
NextItem:
PROC [a: ALptr] = {
nc: CARDINAL ← 0;
state: {
start, aRray, arRay, arrAy, arraY,
sTring, stRing, strIng, striNg, strinG, Of, oF, end} ← $start;
array: BOOL;
DO
c: CHAR;
IF in.EndOf[] THEN ERROR AllDone;
c ← in.GetChar[]; nc ← nc+1;
SELECT c
FROM
'A =>
state ←
SELECT state
FROM
$start => $aRray,
$arrAy => $arraY,
$stRing => $strIng,
ENDCASE => $start;
'R =>
state ←
SELECT state
FROM
$aRray => $arRay,
$arRay => $arrAy,
$stRing => $strIng,
ENDCASE => $start;
'Y =>
IF state = $arraY
THEN {
array ← TRUE; a.ARRAYindex ← IO.GetIndex[in]; state ← $end}
ELSE state ← $start;
'S =>
IF state = $start THEN {a.name.length ← nc-1; state ← $sTring} ELSE state ← $start;
'T => state ← IF state = $sTring THEN $stRing ELSE $start;
'I => state ← IF state = $strIng THEN $striNg ELSE $start;
'N => state ← IF state = $striNg THEN $strinG ELSE $start;
'G => IF state = $strinG THEN {array ← FALSE; state ← $end} ELSE state ← $start;
'\n => {
IF state = $end THEN EXIT;
lastCR ← IO.GetIndex[in]; nc ← 0; state ← $start};
IN ['\000..' ] => IF state = $end THEN EXIT ELSE state ← $start;
ENDCASE => state ← $start;
ENDLOOP;
a.name.startIndex ← lastCR; a.needsIndexDef ← array;
IF array
THEN {
state ← $Of;
DO
c: CHAR;
IF IO.EndOf[in] THEN ERROR SyntaxError;
c ← in.GetChar[]; nc ← nc+1;
SELECT c
FROM
IN ['\000..' ] =>
SELECT state
FROM
$start => state ← $Of;
$Of => NULL;
$end => EXIT;
ENDCASE => state ← $start;
'O => state ← IF state = $Of THEN $oF ELSE $start;
'F => state ← IF state = $oF THEN $end ELSE $start;
ENDCASE => {a.needsIndexDef ← FALSE; state ← $start};
ENDLOOP;
a.name.length ← nc};
CollectStrings[a];
IF array THEN nArrays ← nArrays + 1};
headAL, tailAL: ALptr;
CollectStrings:
PROC [a: ALptr] = {
s: SLptr;
oldnStrings: CARDINAL = nStrings;
a.headSL ← a.tailSL ← NIL;
WHILE NextString[s ← NEW[SL]] DO AppendSL[a, s] ENDLOOP;
a.nstrings ← nStrings - oldnStrings};
CollectArrays:
PROC = {
a: ALptr;
headAL ← tailAL ← NIL;
nArrays ← nStrings ← nChars ← nWords ← 0;
lastCR ← 0;
DO
NextItem[a ← NEW[AL] ! AllDone => {EXIT}];
AppendAL[a];
ENDLOOP};
AppendSL:
PROC [a: ALptr, s: SLptr] = {
IF a.tailSL = NIL THEN a.headSL ← s ELSE a.tailSL.link ← s;
a.tailSL ← s};
AppendAL:
PROC [a: ALptr] = {
IF tailAL = NIL THEN headAL ← a ELSE tailAL.link ← a;
tailAL ← a};
OutStrings:
PROC [out:
IO.
STREAM, compact:
BOOL] = {
charPos: CARDINAL;
buffer: PACKED ARRAY [0..charsPerWord) OF CHAR;
byte: [0 .. charsPerWord] ← 0;
FlushBuffer: PROC = {UNTIL byte = 0 DO PutChar['\000] ENDLOOP};
PutChar:
PROC [c:
CHAR] = {
buffer[byte] ← c;
IF (byte ← byte+1) = charsPerWord
THEN {
FOR i: [0..charsPerWord)
IN [0..charsPerWord)
DO
out.PutChar[buffer[i]] ENDLOOP;
byte ← 0}};
PutWord:
PROC [stream:
IO.
STREAM, word:
WORD] = {
base: LONG POINTER ~ @word;
stream.UnsafePutBlock[[base: base, count: Basics.bytesPerWord]]
};
IF compact THEN {PutWord[out, nStrings*CompStrDesc.SIZE+1]; charPos ← 0}
ELSE {PutWord[out, nStrings]; charPos ← (nStrings+1)*charsPerWord};
FOR a: ALptr ← headAL, a.link
UNTIL a =
NIL
DO
FOR s: SLptr ← a.headSL, s.link
UNTIL s =
NIL
DO
IF compact
THEN {
PutWord[out, charPos]; PutWord[out, s.length];
charPos ← charPos + s.length}
ELSE {
PutWord[out, charPos/charsPerWord];
charPos ← charPos + StringBody[s.length].SIZE*charsPerWord};
ENDLOOP;
ENDLOOP;
IF compact THEN {PutWord[out, nChars]; PutWord[out, nChars]};
FOR a: ALptr ← headAL, a.link
UNTIL a =
NIL
DO
s: SLptr ← a.headSL;
UNTIL s =
NIL
DO
nextS: SLptr = s.link;
IF ~compact THEN {PutWord[out, s.length]; PutWord[out, s.length]};
IO.SetIndex[in, s.startIndex];
FOR i:
CARDINAL
IN [0 .. s.length)
DO
c: CHAR = in.GetChar[];
PutChar[IF c # '" THEN c ELSE in.GetChar[]];
ENDLOOP;
IF ~compact THEN FlushBuffer[];
s ← nextS;
ENDLOOP;
ENDLOOP;
FlushBuffer[]};
OutRecordDecl:
PROC [
out: IO.STREAM, formatId: Rope.ROPE, compact, altoCode: BOOL] = {
OPEN IO;
a: ALptr ← headAL;
FOR i:
INT
IN [0..formatId.Length[])
DO
char: CHAR = formatId.Fetch[i];
IF char = '. THEN EXIT; out.PutChar[char];
ENDLOOP;
out.PutRope[": DEFINITIONS = {
CSRptr:
TYPE = "];
IF ~altoCode THEN out.PutRope["LONG "];
out.PutRope["BASE POINTER TO CompStrRecord;"];
out.PutRope[IF compact THEN "
CompStrDesc: TYPE = RECORD [offset, length: CARDINAL];
CompStrRecord:
TYPE =
RECORD [
stringOffset: CSRptr RELATIVE POINTER TO StringBody,\n"
ELSE "
StringOffset: TYPE = CSRptr RELATIVE POINTER TO StringBody;
StringRecord:
TYPE =
RECORD [
nStrings: CARDINAL,\n"];
a ← headAL;
DO
nextA: ALptr = a.link;
nonBlank: BOOL ← FALSE;
IO.SetIndex[in, a.name.startIndex];
out.PutRope[" "];
FOR i:
CARDINAL
IN [0..a.name.length)
DO
c: CHAR;
IF a.needsIndexDef
AND
IO.GetIndex[in] = a.ARRAYindex
THEN out.Put[IO.rope[" [0.."], IO.card[a.nstrings], IO.char[')]];
c ← in.GetChar[];
IF nonBlank OR c # ' THEN {out.PutChar[c]; nonBlank ← TRUE};
ENDLOOP;
out.PutRope[IF compact THEN "CompStrDesc" ELSE "StringOffset"];
IF (a ← nextA) = NIL THEN EXIT;
out.PutChar[',]; out.PutChar['\n];
ENDLOOP;
out.PutRope["];\n\n }.\n"]};
CompileStrings:
PUBLIC
PROC [
inputFile: Rope.ROPE, -- the source file
interfaceId: Rope.ROPE, -- exported interface or "SELF"
formatId: Rope.ROPE, -- ASCII record declaration
moduleId: Rope.ROPE, -- output file
compact, altoCode: BOOL]
RETURNS [CARDINAL, CARDINAL] = {
sStream, rStream: IO.STREAM;
in ← FS.StreamOpen[inputFile, $read];
CollectArrays[];
sStream ← TableCommand.CreateBCDStream[
in: in, modId: moduleId,
interfaceId: interfaceId,
altoCode: altoCode];
OutStrings[sStream, compact];
TableCommand.FinishBcdStream[]; IO.Close[sStream];
rStream ← FS.StreamOpen[formatId, $create];
OutRecordDecl[rStream, formatId, compact, altoCode]; IO.Close[rStream];
IO.Close[in];
RETURN [nStrings, nChars]
};
}.