file GSortImpl.mesa (converted from GPsort 3-Mar-81)
last modified by:
Evans on 30-Dec-80 9:03:08
Sweet on 23-Mar-81 14:30:30
translated from Ed McCreight's BCPL by Jim Frandeen
DIRECTORY
Heap,
GSort,
Inline USING [LongCOPY],
Segments,
Streams;
GSortImpl: PROGRAM IMPORTS Heap, Inline, Segments, Streams EXPORTS GSort =
BEGIN
Fd: TYPE = RECORD [
dh: Streams.Handle ← NIL,
endOfRun: BOOLEANFALSE,
dummyRuns, totalRuns: CARDINALNULL,
head, tail: CARDINALNULL,
len: CARDINALNULL,
buffSize: CARDINAL,
buffer: ItemHandle ← NIL,
record: ItemHandle ← NIL];
FdHandle: TYPE = POINTER TO Fd;
WordsInPage: CARDINAL = 256;
Item: TYPE = RECORD [SEQUENCE COMPUTED CARDINAL OF UNSPECIFIED]; -- the item being sorted
ItemHandle: TYPE = LONG POINTER TO Item;
ItemHeader: TYPE = RECORD [SEQUENCE len: CARDINAL OF UNSPECIFIED];
ItemHeaderHandle: TYPE = LONG POINTER TO ItemHeader;
SortHeap: TYPE = RECORD [
SEQUENCE COMPUTED CARDINAL [0..LAST[CARDINAL]/SIZE[ItemHeaderHandle]) -- to avoid multiply
OF ItemHeaderHandle];
EOR: CARDINAL = LAST[CARDINAL]; -- end of run
NFiles: CARDINAL = 3; -- number of scratch files
RecordTooLong: PUBLIC ERROR = CODE;
systemZone: MDSZone = Heap.systemMDSZone;
Sort: PUBLIC PROCEDURE [
get: GSort.GetProcType, put: GSort.PutProcType,
compare: GSort.CompareProcType, expectedItemSize: CARDINAL ← 30,
maxItemSize: CARDINAL ← 1000, pagesInHeap: CARDINAL ← 100] =
BEGIN
filesArray: ARRAY [0..NFiles] OF FdHandle;
firstFreeEnt: CARDINAL ← 1; -- 1 + end of unsorted part of heap vector
heapArray: LONG POINTER TO SortHeap;
heapSize: CARDINAL; -- end of heap-sorted part of heap vector
inputFinished: BOOLEANFALSE;
itemIsLeftOver: BOOLEANFALSE;
leftoverItem: ItemHandle ← NIL;
leftoverItemLen: CARDINAL;
level: CARDINAL;
maxHeapSize: CARDINAL;
maxItemWords: CARDINAL;
occItemWords: CARDINAL;
recordSize: CARDINAL;
f: FdHandle; -- declared out here to be sorted with locals (compiler shortcoming)
item: ItemHeaderHandle;
fid: STRING = "SORT.SCRATCH0$";
lastChar: CARDINAL ← fid.length - 2;
sortZone: UNCOUNTED ZONE ← Heap.Create[
initial: MAX[pagesInHeap, 10], swapUnit: 10, increment: 0];
Initialize: PROCEDURE [expected, max: CARDINAL] =
BEGIN
blockSize: CARDINAL = MAX[pagesInHeap, 10]*WordsInPage;
bufferSize: CARDINAL = (blockSize)/NFiles - 100;
recordSize ← IF bufferSize > max THEN max ELSE bufferSize;
maxHeapSize ← (blockSize - recordSize)/(expected + 3);
maxItemWords ← blockSize - maxHeapSize - recordSize;
FOR i: CARDINAL IN [1..NFiles] DO
filesArray[i] ← systemZone.NEW[Fd ← [buffSize: bufferSize]]; ENDLOOP;
RETURN;
END;
FreeAllocatedStuff: PROCEDURE =
BEGIN
i: CARDINAL;
FOR i IN [1..NFiles] DO
IF filesArray[i] = NIL THEN LOOP;
IF filesArray[i].buffer # NIL THEN sortZone.FREE[@filesArray[i].buffer];
IF filesArray[i].dh # NIL THEN DeleteFile[filesArray[i].dh];
systemZone.FREE[@filesArray[i]];
ENDLOOP;
IF heapArray # NIL THEN sortZone.FREE[@heapArray];
IF leftoverItem # NIL THEN sortZone.FREE[@leftoverItem];
Heap.Delete[sortZone];
RETURN;
END;
BuildHeap: PROCEDURE =
BEGIN
L: CARDINAL;
heapSize ← 0;
MaintainHeap[];
heapSize ← firstFreeEnt - 1;
L ← (heapSize/2) + 1;
WHILE L > 1 DO LL - 1; SiftDown[L, heapArray[L]]; ENDLOOP;
RETURN;
END;
BuildRuns: PROCEDURE =
BEGIN
Continue reading and sorting, alternating in Fibonacci sequence, until the input is exhausted.
A: CARDINAL;
files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray;
item: ItemHeaderHandle;
i: CARDINAL;
j: CARDINAL ← 1;
LFile: FdHandle;
NT: CARDINAL;
level ← 1;
DO
f: FdHandle ← files[j];
IF level > 1 THEN Streams.PutWord[f.dh, EOR]; -- end-of-run marker
FOR item ← GetHeap[], GetHeap[] UNTIL item = NIL DO
Streams.PutWord[f.dh, item.len];
[] ← Streams.PutBlock[f.dh, @item[0], item.len];
occItemWords ←
occItemWords - SIZE[ItemHeader [item.len]] - Heap.minimumNodeSize;
sortZone.FREE[@item];
ENDLOOP;
f.dummyRuns ← f.dummyRuns - 1;
IF inputFinished AND (firstFreeEnt = 1) THEN EXIT;
IF f.dummyRuns < files[j + 1].dummyRuns THEN {j ← j + 1; f ← files[j]}
ELSE
BEGIN
j ← 1;
f ← files[1];
IF f.dummyRuns = 0 THEN
BEGIN
level ← level + 1;
A ← files[1].totalRuns;
FOR i IN [1..NFiles - 1] DO
LFile ← files[i];
NTA + files[i + 1].totalRuns;
LFile.dummyRuns ← NT - LFile.totalRuns;
LFile.totalRuns ← NT;
ENDLOOP;
END;
END;
BuildHeap[];
ENDLOOP;
FOR i IN [1..NFiles - 1] DO
f: FdHandle = files[i];
Streams.PutWord[f.dh, EOR];
Streams.SetIndex[f.dh, 0];
ENDLOOP;
RETURN;
END;
GetHeap: PROCEDURE RETURNS [itemHP: ItemHeaderHandle] =
BEGIN
heap: LONG POINTER TO SortHeap = heapArray;
IF heapSize = 0 THEN RETURN[NIL];
MaintainHeap[];
itemHP ← heap[1];
SiftDown[1, heap[heapSize]];
firstFreeEnt ← firstFreeEnt - 1;
heap[heapSize] ← heap[firstFreeEnt];
heapSize ← heapSize - 1;
RETURN;
END;
MaintainHeap: PROCEDURE =
BEGIN
Fill the heap as full as possible
itemHP: ItemHeaderHandle;
heap: LONG POINTER TO SortHeap = heapArray;
IF inputFinished THEN RETURN;
WHILE firstFreeEnt <= maxHeapSize DO
Try adding another heap element
IF NOT itemIsLeftOver THEN
BEGIN
leftoverItemLen ← get[leftoverItem];
IF leftoverItemLen > recordSize THEN ERROR RecordTooLong;
IF leftoverItemLen = 0 THEN BEGIN inputFinished ← TRUE; EXIT; END;
END;
IF occItemWords >= maxItemWords THEN
BEGIN itemIsLeftOver ← TRUE; EXIT; END;
itemHP ← sortZone.NEW[
ItemHeader [leftoverItemLen] !
Heap.Error =>
IF type = insufficientSpace THEN
BEGIN
maxItemWords ← occItemWords - 100;
itemIsLeftOver ← TRUE;
EXIT;
END];
occItemWords ←
occItemWords + SIZE[ItemHeader [leftoverItemLen]] +
Heap.minimumNodeSize;
Inline.LongCOPY[
from: leftoverItem, nwords: leftoverItemLen, to: @itemHP[0]];
heap[firstFreeEnt] ← heap[heapSize + 1];
firstFreeEnt ← firstFreeEnt + 1;
heap[heapSize + 1] ← itemHP;
itemIsLeftOver ← FALSE;
IF heapSize > 0 AND compare[@itemHP[0], @heap[1][0]] > 0 THEN
BEGIN heapSize ← heapSize + 1; SiftUp[]; END;
ENDLOOP;
RETURN;
END;
MergePass: PROCEDURE =
BEGIN
files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray;
dummiesThisPass: CARDINAL;
lastFile: FdHandle;
OFile: FdHandle;
runNo: CARDINAL;
runsThisPass: CARDINAL;
OFile ← files[NFiles];
lastFile ← files[NFiles - 1];
runsThisPass ← lastFile.totalRuns;
dummiesThisPass ← lastFile.dummyRuns;
FOR i IN[1..NFiles-2]
dummiesThisPass ← MIN[dummiesThisPass, files[1].dummyRuns];
OFile.totalRuns ← runsThisPass;
OFile.dummyRuns ← dummiesThisPass;
FOR i IN[1..NFiles-2]
files[1].totalRuns ← files[1].totalRuns - runsThisPass;
files[1].dummyRuns ← files[1].dummyRuns - dummiesThisPass;
FOR runNo IN [dummiesThisPass + 1..runsThisPass] DO
MergeRun[OFile]; ENDLOOP;
IF level > 1 THEN
BEGIN
fd: FdHandle;
i: CARDINAL;
FlushBuffer[OFile];
FOR i IN [NFiles - 1..NFiles] DO
f: FdHandle = files[i];
Streams.SetIndex[f.dh, 0];
f.head ← 0;
f.tail ← 0;
ENDLOOP;
fd ← files[NFiles];
FOR i DECREASING IN (1..NFiles] DO files[i] ← files[i - 1]; ENDLOOP;
files[1] ← fd;
END;
RETURN;
END;
MergeRun: PROCEDURE [OFile: FdHandle] =
BEGIN
Process a run. Fill up the applicable records.
files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray;
i: CARDINAL;
SR: CARDINAL;
FOR i IN [1..NFiles - 1] DO
f: FdHandle = files[i];
IF f.dummyRuns = 0 THEN [] ← ReadRecord[f]
ELSE BEGIN f.dummyRuns ← f.dummyRuns - 1; f.endOfRun ← TRUE; END;
ENDLOOP;
DO
SR ← 0; -- selected record (which file is it from)
FOR i IN [1..NFiles - 1] DO
f: FdHandle = files[i];
IF (NOT f.endOfRun)
AND (SR = 0 OR compare[f.record, files[SR].record] < 0) THEN SR ← i;
ENDLOOP;
IF SR = 0 THEN EXIT; -- come back and fix this
IF level = 1 THEN put[files[SR].record, files[SR].len]
ELSE WriteRecord[OFile, files[SR].len, files[SR].record];
files[SR].record ← NIL;
[] ← ReadRecord[files[SR]];
ENDLOOP;
IF level > 1 THEN WriteRecord[OFile, EOR, NIL]; -- end-of-run marker
RETURN;
END;
SiftDown: PROCEDURE [L: CARDINAL, K: ItemHeaderHandle] =
BEGIN
heap: LONG POINTER TO SortHeap = heapArray;
J: CARDINALL;
I: CARDINAL;
DO
IJ;
JI*2;
IF J > heapSize THEN EXIT;
IF J < heapSize THEN
IF compare[@heap[J][0], @heap[J + 1][0]] > 0 THEN JJ + 1;
IF compare[@K[0], @heap[J][0]] <= 0 THEN EXIT;
heap[I] ← heap[J];
ENDLOOP;
heap[I] ← K;
RETURN;
END;
SiftUp: PROCEDURE =
BEGIN
heap: LONG POINTER TO SortHeap = heapArray;
i: CARDINAL;
j: CARDINAL ← heapSize;
k: ItemHeaderHandle ← heap[heapSize];
i ← j/2;
WHILE i > 0 DO
IF compare[@heap[i][0], @k[0]] <= 0 THEN EXIT;
heap[j] ← heap[i];
j ← i;
i ← j/2;
ENDLOOP;
heap[j] ← k;
RETURN;
END;
Initialize[expected: expectedItemSize, max: maxItemSize];
BEGIN
ENABLE UNWIND => FreeAllocatedStuff[];
heapArray ← sortZone.NEW[SortHeap [maxHeapSize]];
leftoverItem ← sortZone.NEW[Item [recordSize]];
occItemWords ← SIZE[SortHeap [maxHeapSize]] + SIZE[Item [recordSize]];
itemIsLeftOver ← FALSE;
inputFinished ← FALSE;
First, fill up the heap as much as possible and sort it.
BuildHeap[];
IF inputFinished THEN
THROUGH [1..heapSize] DO item ← GetHeap[]; put[@item[0], item.len]; ENDLOOP
ELSE
BEGIN
FOR i: CARDINAL IN [1..NFiles - 1] DO
f ← filesArray[i];
fid[lastChar] ← fid[lastChar] + 1;
f.dh ← Streams.NewStream[fid, Streams.AllAccess];
f.totalRuns ← 1;
f.dummyRuns ← 1;
ENDLOOP;
filesArray[NFiles].totalRuns ← 0;
filesArray[NFiles].dummyRuns ← 0;
BuildRuns[];
Put runs on input files 1..NFiles-1 so that they have Fibonacci relationship
sortZone.FREE[@leftoverItem];
sortZone.FREE[@heapArray];
IF level > 1 THEN
BEGIN
fid[lastChar] ← fid[lastChar] + 1;
filesArray[NFiles].dh ← Streams.NewStream[fid, Streams.AllAccess];
END;
FOR i: CARDINAL IN [1..NFiles] DO -- seems like not needed for N if level = 1
f ← filesArray[i];
f.buffer ← sortZone.NEW[Item [f.buffSize]];
f.head ← 0;
f.tail ← 0;
ENDLOOP;
Now carry out merge passes until the level has returned to zero.
UNTIL level = 0 DO
MergePass[];
also cycles the files afterward if level>1
level ← level - 1;
IF level = 1 THEN {
DeleteFile[filesArray[NFiles].dh]; filesArray[NFiles].dh ← NIL};
Output will go to the putItemParam routine
ENDLOOP;
FOR i: CARDINAL IN [1..NFiles - 1] DO
DeleteFile[filesArray[i].dh]; filesArray[i].dh ← NIL; ENDLOOP;
END;
END; -- enable block
FreeAllocatedStuff[];
RETURN;
END;
ReadRecord: PROCEDURE [f: FdHandle] RETURNS [BOOLEAN] =
BEGIN
itemLen: CARDINAL;
headIndex: CARDINAL;
IF f.head = f.tail THEN FillBuffer[f];
headIndex ← f.head;
itemLen ← f.buffer[headIndex];
f.head ← headIndex ← headIndex + 1;
IF itemLen = EOR THEN BEGIN f.endOfRun ← TRUE; RETURN[FALSE]; END;
IF headIndex + itemLen > f.tail THEN FillBuffer[f];
headIndex ← f.head;
f.record ← @f.buffer[headIndex];
f.head ← headIndex + itemLen;
f.len ← itemLen;
f.endOfRun ← FALSE;
RETURN[TRUE];
END;
FillBuffer: PROCEDURE [f: FdHandle] =
BEGIN
cnt: CARDINAL;
f.tail ← f.tail - f.head;
IF f.tail > 0 THEN
Inline.LongCOPY[from: @f.buffer[f.head], nwords: f.tail, to: f.buffer];
cnt ← Streams.GetBlock[f.dh, @f.buffer[f.tail], f.buffSize - f.tail];
f.head ← 0;
f.tail ← f.tail + cnt;
RETURN;
END;
WriteRecord: PROCEDURE [f: FdHandle, itemLen: CARDINAL, itemPtr: ItemHandle] =
BEGIN
buffer: ItemHandle ← f.buffer;
tailIndex: CARDINAL ← f.tail;
IF tailIndex + (IF itemLen = EOR THEN 1 ELSE itemLen + 1) > f.buffSize THEN
BEGIN FlushBuffer[f]; tailIndex ← f.tail; END;
buffer[tailIndex] ← itemLen;
tailIndex ← tailIndex + 1;
IF itemLen # EOR THEN
BEGIN
Inline.LongCOPY[itemPtr, itemLen, @buffer[tailIndex]];
tailIndex ← tailIndex + itemLen;
END;
f.tail ← tailIndex;
RETURN;
END;
FlushBuffer: PROCEDURE [f: FdHandle] =
BEGIN [] ← Streams.PutBlock[f.dh, f.buffer, f.tail]; f.tail ← 0; END;
DeleteFile: PROCEDURE [dh: Streams.Handle] =
BEGIN
fh: Segments.FHandle ← Streams.FileFromStream[dh];
Segments.LockFile[fh];
Streams.Destroy[dh];
Segments.UnlockFile[fh];
Segments.DestroyFile[fh];
END;
END...