<> <> <> <> <> DIRECTORY Basics, CommandTool, FS, IO, Rope, TerminalIO, Real, Process; Compress: CEDAR PROGRAM IMPORTS Basics, CommandTool, FS, IO, TerminalIO, Real, Process ~ BEGIN debug: BOOL _ FALSE; statistics: BOOL _ FALSE; CompressStreamData: TYPE = REF CompressStreamRec; CompressStreamRec: TYPE = RECORD [ copyCmds, litCmds: ARRAY [0..MAX[maxLitLength, maxCopyLength]) OF CARD, windowSize: NAT _ 10, bytesIn, bytesOut, max: NAT _ 0, min: NAT _ LAST[NAT], inputStream: IO.STREAM _ NIL, outputStream: IO.STREAM _ NIL, validBits: MatchBits _ NIL, matchBits: MatchBits _ NIL, history: Vectors _ NIL, outState: INTEGER _ 0, -- number of bits in partial output byte. outRemainder: CARDINAL _ 0, -- remaining bits in [8..8+outState). nextInsert: NAT _ 0, -- buffer position at which next input vector goes. litPos: NAT _ 0, -- buffer position at which literal in progress begins. matchLen: NAT _ 0, -- number of vectors matched previously. mostRecentMatchPos: NAT]; -- last vector for match of length matchLen Vector: TYPE = CARDINAL; MatchBits: TYPE = REF MatchBitsRec; MatchBitsRec: TYPE = RECORD [ bits: SEQUENCE size: NAT OF BOOL]; Vectors: TYPE = REF VectorsRec; VectorsRec: TYPE = RECORD [ v: SEQUENCE size: NAT OF Vector]; bufferSize: NAT _ 64; maxLitLength: NAT = 64; maxCopyLength: NAT = 15; minCopyLength: NAT = 1; MaskArraySize: TYPE = [0..9); masks: ARRAY MaskArraySize OF CARD _ [0FFFFh, 0FF00h, 00FFh, 00F0Fh, 0F0F0h, 0CCCCh, 03333h, 0AAAAh, 05555h]; RepeatMask: PROC [inFile, outFile: Rope.ROPE] ~ { TerminalIO.PutF["\n\nFile: %g", IO.rope[inFile]]; FOR m: MaskArraySize IN MaskArraySize DO Compress [inFile, outFile, masks[m]]; ENDLOOP; }; Compress: PROC [inFile, outFile: Rope.ROPE, mask: CARD _ 0FFFFh] ~ { PutVector: PROC [vector: Vector] = { match: BOOL _ FALSE; IF (match _ MatchVector[vector, h.matchLen=0]) AND (h.matchLen < maxCopyLength) THEN { WriteVector[vector]; h.matchLen _ h.matchLen + 1; IF h.matchLen = minCopyLength THEN { --min copy found, flush literal if needed litLen: NAT _ LiteralLength[]; IF litLen > minCopyLength THEN OutputLiteral[h, litLen-minCopyLength]; }; } ELSE { --no match or max copy length exceeded IF h.matchLen = 0 THEN { --literal in progress WriteVector[vector]; CheckMaxLiteral[]; } ELSE { --literal in progress or end of copy IF h.matchLen < minCopyLength THEN CheckMaxLiteral[] ELSE { IF h.matchLen=maxCopyLength AND match THEN h.mostRecentMatchPos _ (h.mostRecentMatchPos + bufferSize - 1) MOD bufferSize; OutputCopy[h]; }; h.matchLen _ 0; match _ MatchVector[vector, TRUE]; WriteVector[vector]; IF match THEN h.matchLen _ 1 ELSE CheckMaxLiteral[]; }; }; }; CheckMaxLiteral: PROC = { litLen: NAT _ LiteralLength[]; IF litLen = maxLitLength THEN OutputLiteral[h, litLen]; }; WriteVector: PROC [vector: Vector] = { h.history[h.nextInsert] _ vector; h.validBits[h.nextInsert] _ TRUE; h.nextInsert _ (h.nextInsert + 1) MOD bufferSize; }; MatchVector: PROC [vector: Vector, ignore: BOOL] RETURNS [match: BOOL _ FALSE] = { address: NAT _ h.nextInsert; previousMatch: BOOL _ h.matchBits[h.nextInsert]; DO nextMatch: BOOL; address _ (address + 1) MOD bufferSize; nextMatch _ h.matchBits[address]; h.matchBits[address] _ vector=h.history[address] AND (previousMatch OR ignore) AND h.validBits[address]; IF address=h.nextInsert THEN EXIT; previousMatch _ nextMatch; ENDLOOP; DO IF h.matchBits[address] THEN { match _ TRUE; h.mostRecentMatchPos _ address; EXIT; }; address _ IF address=0 THEN bufferSize-1 ELSE address - 1; IF address=h.nextInsert THEN EXIT; ENDLOOP; }; LiteralLength: PROC RETURNS [litLen: NAT] = { litLen _ LOOPHOLE[h.nextInsert - h.litPos, CARDINAL] MOD bufferSize; }; Flush: PROC = { IF h.matchLen < minCopyLength THEN { -- Make a literal of all the characters litLen: NAT _ LiteralLength[]; IF litLen # 0 THEN OutputLiteral[h, litLen]; } ELSE OutputCopy[h]; -- Make a copy (can't be any literal) h.matchLen _ 0; IF h.outState # 0 THEN ERROR; }; copies, literals: NAT _ 0; h: CompressStreamData _ NEW[CompressStreamRec]; vector: Vector; h.inputStream _ FS.StreamOpen[fileName: inFile, accessOptions: $read, wDir: CommandTool.CurrentWorkingDirectory[], streamOptions: FS.binaryStreamOptions]; h.outputStream _ FS.StreamOpen[fileName: outFile, accessOptions: $create, wDir: CommandTool.CurrentWorkingDirectory[], streamOptions: FS.binaryStreamOptions]; h.validBits _ NEW[MatchBitsRec[bufferSize]]; h.matchBits _ NEW[MatchBitsRec[bufferSize]]; h.history _ NEW[VectorsRec[bufferSize]]; h.litCmds _ ALL[0]; h.copyCmds _ ALL[0]; FOR i: NAT IN [0..bufferSize) DO h.validBits[i] _ FALSE; ENDLOOP; DO vector _ LOOPHOLE[IO.GetHWord[h.inputStream ! IO.EndOfStream => EXIT]]; h.bytesIn _ h.bytesIn+1; PutVector[LOOPHOLE[Basics.BITAND[LOOPHOLE[vector], mask]]]; Process.CheckForAbort[]; ENDLOOP; Flush[]; <> FOR i: NAT IN [0..MAX[maxLitLength, maxCopyLength]) DO copies _ copies + h.copyCmds[i]; literals _ literals + h.litCmds[i]; ENDLOOP; IF statistics THEN TerminalIO.PutF["\n\nFile: %g", IO.rope[inFile]]; TerminalIO.PutF["\n Mask: %04x, Compression: %5.3f, Copies: %4g, Literals: %4g", IO.card[mask], IO.real[Real.Float[IO.GetIndex[h.outputStream]] / (2.0 * Real.Float[IO.GetIndex[h.inputStream]])], IO.card[copies], IO.card[literals]]; IF statistics THEN FOR i: NAT IN [1..MAX[maxLitLength, maxCopyLength]) DO IF h.copyCmds[i]#0 OR h.litCmds[i]#0 THEN TerminalIO.PutF["\n Copy[%2g]: %4g, Lit[%2g]: %4g", IO.card[i], IO.card[h.copyCmds[i]], IO.card[i], IO.card[h.litCmds[i]]]; ENDLOOP; IO.Close[h.outputStream]; IO.Close[h.inputStream]; }; OutputLiteral: PROC [h: CompressStreamData, litLen: NAT] = { litPos: NAT _ h.litPos; Output[h, 0, 6]; -- pad Output[h, 0, 4]; -- cmd Output[h, litLen, 6]; -- length IF debug THEN TerminalIO.PutF["\nLit: '"]; FOR i: NAT IN [0..litLen) DO vector: Vector _ h.history[(litPos + i) MOD bufferSize]; Output[h, 0, 6]; -- pad Output[h, 0, 4]; -- ctl bits Output[h, Basics.BITSHIFT[vector, -10], 6]; -- most significant bits Output[h, 0, 6]; -- pad Output[h, vector, 10]; -- least significant bits IF debug THEN TerminalIO.PutF["%x ", IO.card[vector]]; ENDLOOP; IF debug THEN TerminalIO.PutRope["'"]; h.litPos _ h.nextInsert; h.litCmds[litLen] _ h.litCmds[litLen]+1; }; OutputCopy: PROC [h: CompressStreamData] = { outPos: NAT _ LOOPHOLE[h.mostRecentMatchPos - h.matchLen + 1, CARDINAL] MOD bufferSize; Output[h, 0, 6]; -- pad Output[h, h.matchLen, 4]; -- length Output[h, outPos, 6]; -- position h.litPos _ h.nextInsert; IF debug THEN TerminalIO.PutF["\nCopy l:%g, p: %g", IO.card[h.matchLen], IO.card[outPos]]; h.copyCmds[h.matchLen] _ h.copyCmds[h.matchLen]+1; }; IncCount: PROC [h: CompressStreamData] ~ { h.bytesOut _ h.bytesOut+1; IF h.bytesOut = h.windowSize THEN { IF h.bytesInh.max THEN h.max _ h.bytesIn; h.bytesIn _ h.bytesOut _ 0; }; }; Output: PROC [h: CompressStreamData, code, nbits: CARDINAL] ~ { code _ Basics.BITAND[code, Basics.BITNOT[Basics.BITSHIFT[0FFFFh, nbits]]]; IF (h.outState _ nbits + h.outState - 8) >= 0 THEN { --No. bits left over if 1 byte is output IO.PutChar[h.outputStream, VAL[Basics.BITSHIFT[code, - h.outState] + h.outRemainder]]; IncCount[h]; code _ Basics.BITSHIFT[code, 16 - h.outState]; --left-justify residual bits IF (h.outState _ h.outState - 8) >= 0 THEN { IO.PutChar[h.outputStream, VAL[Basics.BITSHIFT[code, - 8]]]; IncCount[h]; code _ Basics.BITSHIFT[code, 8]; --left-justify residual bits } ELSE h.outState _ h.outState + 8; h.outRemainder _ Basics.BITSHIFT[code, - 8]; } ELSE { h.outRemainder _ h.outRemainder + Basics.BITSHIFT[code, - h.outState]; h.outState _ h.outState + 8; }; }; END.