Compress.mesa
Copyright Ó 1987 by Xerox Corporation. All rights reserved.
Heavily stolen from Rick Barth who acquired it from Ed Fiala
Created by: Gasbarro December 7, 1987 3:28:22 pm PST
Last Edited by: Gasbarro December 14, 1987 9:40:15 am PST
DIRECTORY Basics, CommandTool, FS, IO, Rope, TerminalIO, Real, Process;
Compress: CEDAR PROGRAM
IMPORTS Basics, CommandTool, FS, IO, TerminalIO, Real, Process
~ BEGIN
debug: BOOLFALSE;
statistics: BOOLFALSE;
CompressStreamData: TYPE = REF CompressStreamRec;
CompressStreamRec: TYPE = RECORD [
copyCmds, litCmds: ARRAY [0..MAX[maxLitLength, maxCopyLength]) OF CARD,
windowSize: NAT ← 10,
bytesIn, bytesOut, max: NAT ← 0,
min: NATLAST[NAT],
inputStream: IO.STREAMNIL,
outputStream: IO.STREAMNIL,
validBits: MatchBits ← NIL,
matchBits: MatchBits ← NIL,
history: Vectors ← NIL,
outState: INTEGER ← 0, -- number of bits in partial output byte.
outRemainder: CARDINAL ← 0, -- remaining bits in [8..8+outState).
nextInsert: NAT ← 0, -- buffer position at which next input vector goes.
litPos: NAT ← 0, -- buffer position at which literal in progress begins.
matchLen: NAT ← 0, -- number of vectors matched previously.
mostRecentMatchPos: NAT]; -- last vector for match of length matchLen
Vector: TYPE = CARDINAL;
MatchBits: TYPE = REF MatchBitsRec;
MatchBitsRec: TYPE = RECORD [
bits: SEQUENCE size: NAT OF BOOL];
Vectors: TYPE = REF VectorsRec;
VectorsRec: TYPE = RECORD [
v: SEQUENCE size: NAT OF Vector];
bufferSize: NAT ← 64;
maxLitLength: NAT = 64;
maxCopyLength: NAT = 15;
minCopyLength: NAT = 1;
MaskArraySize: TYPE = [0..9);
masks: ARRAY MaskArraySize OF CARD ← [0FFFFh, 0FF00h, 00FFh, 00F0Fh, 0F0F0h, 0CCCCh, 03333h, 0AAAAh, 05555h];
RepeatMask: PROC [inFile, outFile: Rope.ROPE] ~ {
TerminalIO.PutF["\n\nFile: %g", IO.rope[inFile]];
FOR m: MaskArraySize IN MaskArraySize DO
Compress [inFile, outFile, masks[m]];
ENDLOOP;
};
Compress: PROC [inFile, outFile: Rope.ROPE, mask: CARD ← 0FFFFh] ~ {
PutVector: PROC [vector: Vector] = {
match: BOOLFALSE;
IF (match ← MatchVector[vector, h.matchLen=0]) AND (h.matchLen < maxCopyLength) THEN {
WriteVector[vector];
h.matchLen ← h.matchLen + 1;
IF h.matchLen = minCopyLength THEN { --min copy found, flush literal if needed
litLen: NAT ← LiteralLength[];
IF litLen > minCopyLength THEN OutputLiteral[h, litLen-minCopyLength];
};
}
ELSE { --no match or max copy length exceeded
IF h.matchLen = 0 THEN { --literal in progress
WriteVector[vector];
CheckMaxLiteral[];
} ELSE { --literal in progress or end of copy
IF h.matchLen < minCopyLength THEN CheckMaxLiteral[] ELSE {
IF h.matchLen=maxCopyLength AND match THEN h.mostRecentMatchPos ← (h.mostRecentMatchPos + bufferSize - 1) MOD bufferSize;
OutputCopy[h];
};
h.matchLen ← 0;
match ← MatchVector[vector, TRUE];
WriteVector[vector];
IF match THEN h.matchLen ← 1
ELSE CheckMaxLiteral[];
};
};
};
CheckMaxLiteral: PROC = {
litLen: NAT ← LiteralLength[];
IF litLen = maxLitLength THEN OutputLiteral[h, litLen];
};
WriteVector: PROC [vector: Vector] = {
h.history[h.nextInsert] ← vector;
h.validBits[h.nextInsert] ← TRUE;
h.nextInsert ← (h.nextInsert + 1) MOD bufferSize;
};
MatchVector: PROC [vector: Vector, ignore: BOOL] RETURNS [match: BOOLFALSE] = {
address: NAT ← h.nextInsert;
previousMatch: BOOL ← h.matchBits[h.nextInsert];
DO
nextMatch: BOOL;
address ← (address + 1) MOD bufferSize;
nextMatch ← h.matchBits[address];
h.matchBits[address] ← vector=h.history[address] AND (previousMatch OR ignore) AND h.validBits[address];
IF address=h.nextInsert THEN EXIT;
previousMatch ← nextMatch;
ENDLOOP;
DO
IF h.matchBits[address] THEN {
match ← TRUE;
h.mostRecentMatchPos ← address;
EXIT;
};
address ← IF address=0 THEN bufferSize-1 ELSE address - 1;
IF address=h.nextInsert THEN EXIT;
ENDLOOP;
};
LiteralLength: PROC RETURNS [litLen: NAT] = {
litLen ← LOOPHOLE[h.nextInsert - h.litPos, CARDINAL] MOD bufferSize;
};
Flush: PROC = {
IF h.matchLen < minCopyLength THEN { -- Make a literal of all the characters
litLen: NAT ← LiteralLength[];
IF litLen # 0 THEN OutputLiteral[h, litLen];
}
ELSE OutputCopy[h]; -- Make a copy (can't be any literal)
h.matchLen ← 0;
IF h.outState # 0 THEN ERROR;
};
copies, literals: NAT ← 0;
h: CompressStreamData ← NEW[CompressStreamRec];
vector: Vector;
h.inputStream ← FS.StreamOpen[fileName: inFile, accessOptions: $read, wDir: CommandTool.CurrentWorkingDirectory[], streamOptions: FS.binaryStreamOptions];
h.outputStream ← FS.StreamOpen[fileName: outFile, accessOptions: $create, wDir: CommandTool.CurrentWorkingDirectory[], streamOptions: FS.binaryStreamOptions];
h.validBits ← NEW[MatchBitsRec[bufferSize]];
h.matchBits ← NEW[MatchBitsRec[bufferSize]];
h.history ← NEW[VectorsRec[bufferSize]];
h.litCmds ← ALL[0];
h.copyCmds ← ALL[0];
FOR i: NAT IN [0..bufferSize) DO
h.validBits[i] ← FALSE;
ENDLOOP;
DO
vector ← LOOPHOLE[IO.GetHWord[h.inputStream ! IO.EndOfStream => EXIT]];
h.bytesIn ← h.bytesIn+1;
PutVector[LOOPHOLE[Basics.BITAND[LOOPHOLE[vector], mask]]];
Process.CheckForAbort[];
ENDLOOP;
Flush[];
IF statistics THEN TerminalIO.PutF["\nFile: %g, Window: %g, Min: %g, Max %g", IO.rope[inFile], IO.card[h.windowSize], IO.card[h.min], IO.card[h.max]];
FOR i: NAT IN [0..MAX[maxLitLength, maxCopyLength]) DO
copies ← copies + h.copyCmds[i];
literals ← literals + h.litCmds[i];
ENDLOOP;
IF statistics THEN TerminalIO.PutF["\n\nFile: %g", IO.rope[inFile]];
TerminalIO.PutF["\n Mask: %04x, Compression: %5.3f, Copies: %4g, Literals: %4g", IO.card[mask], IO.real[Real.Float[IO.GetIndex[h.outputStream]] / (2.0 * Real.Float[IO.GetIndex[h.inputStream]])], IO.card[copies], IO.card[literals]];
IF statistics THEN FOR i: NAT IN [1..MAX[maxLitLength, maxCopyLength]) DO
IF h.copyCmds[i]#0 OR h.litCmds[i]#0 THEN TerminalIO.PutF["\n Copy[%2g]: %4g, Lit[%2g]: %4g", IO.card[i], IO.card[h.copyCmds[i]], IO.card[i], IO.card[h.litCmds[i]]];
ENDLOOP;
IO.Close[h.outputStream];
IO.Close[h.inputStream];
};
OutputLiteral: PROC [h: CompressStreamData, litLen: NAT] = {
litPos: NAT ← h.litPos;
Output[h, 0, 6]; -- pad
Output[h, 0, 4]; -- cmd
Output[h, litLen, 6]; -- length
IF debug THEN TerminalIO.PutF["\nLit: '"];
FOR i: NAT IN [0..litLen) DO
vector: Vector ← h.history[(litPos + i) MOD bufferSize];
Output[h, 0, 6]; -- pad
Output[h, 0, 4]; -- ctl bits
Output[h, Basics.BITSHIFT[vector, -10], 6]; -- most significant bits
Output[h, 0, 6]; -- pad
Output[h, vector, 10]; -- least significant bits
IF debug THEN TerminalIO.PutF["%x ", IO.card[vector]];
ENDLOOP;
IF debug THEN TerminalIO.PutRope["'"];
h.litPos ← h.nextInsert;
h.litCmds[litLen] ← h.litCmds[litLen]+1;
};
OutputCopy: PROC [h: CompressStreamData] = {
outPos: NATLOOPHOLE[h.mostRecentMatchPos - h.matchLen + 1, CARDINAL] MOD bufferSize;
Output[h, 0, 6]; -- pad
Output[h, h.matchLen, 4]; -- length
Output[h, outPos, 6]; -- position
h.litPos ← h.nextInsert;
IF debug THEN TerminalIO.PutF["\nCopy l:%g, p: %g", IO.card[h.matchLen], IO.card[outPos]];
h.copyCmds[h.matchLen] ← h.copyCmds[h.matchLen]+1;
};
IncCount: PROC [h: CompressStreamData] ~ {
h.bytesOut ← h.bytesOut+1;
IF h.bytesOut = h.windowSize THEN {
IF h.bytesIn<h.min THEN h.min ← h.bytesIn;
IF h.bytesIn>h.max THEN h.max ← h.bytesIn;
h.bytesIn ← h.bytesOut ← 0;
};
};
Output: PROC [h: CompressStreamData, code, nbits: CARDINAL] ~ {
code ← Basics.BITAND[code, Basics.BITNOT[Basics.BITSHIFT[0FFFFh, nbits]]];
IF (h.outState ← nbits + h.outState - 8) >= 0 THEN { --No. bits left over if 1 byte is output
IO.PutChar[h.outputStream, VAL[Basics.BITSHIFT[code, - h.outState] + h.outRemainder]];
IncCount[h];
code ← Basics.BITSHIFT[code, 16 - h.outState]; --left-justify residual bits
IF (h.outState ← h.outState - 8) >= 0 THEN {
IO.PutChar[h.outputStream, VAL[Basics.BITSHIFT[code, - 8]]];
IncCount[h];
code ← Basics.BITSHIFT[code, 8]; --left-justify residual bits
}
ELSE h.outState ← h.outState + 8;
h.outRemainder ← Basics.BITSHIFT[code, - 8];
}
ELSE {
h.outRemainder ← h.outRemainder + Basics.BITSHIFT[code, - h.outState];
h.outState ← h.outState + 8;
};
};
END.