Fd:
TYPE =
RECORD [
dh: Streams.Handle ← NIL,
endOfRun: BOOLEAN ← FALSE,
dummyRuns, totalRuns: CARDINAL ← NULL,
head, tail: CARDINAL ← NULL,
len: CARDINAL ← NULL,
buffSize: CARDINAL,
buffer: ItemHandle ← NIL,
record: ItemHandle ← NIL];
Sort:
PUBLIC
PROCEDURE [
get: GSort.GetProcType, put: GSort.PutProcType,
compare: GSort.CompareProcType, expectedItemSize: CARDINAL ← 30,
maxItemSize: CARDINAL ← 1000, pagesInHeap: CARDINAL ← 100] =
BEGIN
filesArray: ARRAY [0..NFiles] OF FdHandle;
firstFreeEnt: CARDINAL ← 1; -- 1 + end of unsorted part of heap vector
heapArray: LONG POINTER TO SortHeap;
heapSize: CARDINAL; -- end of heap-sorted part of heap vector
inputFinished: BOOLEAN ← FALSE;
itemIsLeftOver: BOOLEAN ← FALSE;
leftoverItem: ItemHandle ← NIL;
leftoverItemLen: CARDINAL;
level: CARDINAL;
maxHeapSize: CARDINAL;
maxItemWords: CARDINAL;
occItemWords: CARDINAL;
recordSize: CARDINAL;
f: FdHandle; -- declared out here to be sorted with locals (compiler shortcoming)
item: ItemHeaderHandle;
fid: STRING = "SORT.SCRATCH0$";
lastChar: CARDINAL ← fid.length - 2;
sortZone:
UNCOUNTED
ZONE ← Heap.Create[
initial: MAX[pagesInHeap, 10], swapUnit: 10, increment: 0];
Initialize:
PROCEDURE [expected, max:
CARDINAL] =
BEGIN
blockSize: CARDINAL = MAX[pagesInHeap, 10]*WordsInPage;
bufferSize: CARDINAL = (blockSize)/NFiles - 100;
recordSize ← IF bufferSize > max THEN max ELSE bufferSize;
maxHeapSize ← (blockSize - recordSize)/(expected + 3);
maxItemWords ← blockSize - maxHeapSize - recordSize;
FOR i:
CARDINAL
IN [1..NFiles]
DO
filesArray[i] ← systemZone.NEW[Fd ← [buffSize: bufferSize]]; ENDLOOP;
RETURN;
END;
FreeAllocatedStuff:
PROCEDURE =
BEGIN
i: CARDINAL;
FOR i
IN [1..NFiles]
DO
IF filesArray[i] = NIL THEN LOOP;
IF filesArray[i].buffer # NIL THEN sortZone.FREE[@filesArray[i].buffer];
IF filesArray[i].dh # NIL THEN DeleteFile[filesArray[i].dh];
systemZone.FREE[@filesArray[i]];
ENDLOOP;
IF heapArray # NIL THEN sortZone.FREE[@heapArray];
IF leftoverItem # NIL THEN sortZone.FREE[@leftoverItem];
Heap.Delete[sortZone];
RETURN;
END;
BuildHeap:
PROCEDURE =
BEGIN
L: CARDINAL;
heapSize ← 0;
MaintainHeap[];
heapSize ← firstFreeEnt - 1;
L ← (heapSize/2) + 1;
WHILE L > 1 DO L ← L - 1; SiftDown[L, heapArray[L]]; ENDLOOP;
RETURN;
END;
BuildRuns:
PROCEDURE =
BEGIN
Continue reading and sorting, alternating in Fibonacci sequence, until the input is exhausted.
A: CARDINAL;
files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray;
item: ItemHeaderHandle;
i: CARDINAL;
j: CARDINAL ← 1;
LFile: FdHandle;
NT: CARDINAL;
level ← 1;
DO
f: FdHandle ← files[j];
IF level > 1 THEN Streams.PutWord[f.dh, EOR]; -- end-of-run marker
FOR item ← GetHeap[], GetHeap[]
UNTIL item =
NIL
DO
Streams.PutWord[f.dh, item.len];
[] ← Streams.PutBlock[f.dh, @item[0], item.len];
occItemWords ←
occItemWords - SIZE[ItemHeader [item.len]] - Heap.minimumNodeSize;
sortZone.FREE[@item];
ENDLOOP;
f.dummyRuns ← f.dummyRuns - 1;
IF inputFinished AND (firstFreeEnt = 1) THEN EXIT;
IF f.dummyRuns < files[j + 1].dummyRuns THEN {j ← j + 1; f ← files[j]}
ELSE
BEGIN
j ← 1;
f ← files[1];
IF f.dummyRuns = 0
THEN
BEGIN
level ← level + 1;
A ← files[1].totalRuns;
FOR i
IN [1..NFiles - 1]
DO
LFile ← files[i];
NT ← A + files[i + 1].totalRuns;
LFile.dummyRuns ← NT - LFile.totalRuns;
LFile.totalRuns ← NT;
ENDLOOP;
END;
END;
BuildHeap[];
ENDLOOP;
FOR i
IN [1..NFiles - 1]
DO
f: FdHandle = files[i];
Streams.PutWord[f.dh, EOR];
Streams.SetIndex[f.dh, 0];
ENDLOOP;
RETURN;
END;
GetHeap:
PROCEDURE
RETURNS [itemHP: ItemHeaderHandle] =
BEGIN
heap: LONG POINTER TO SortHeap = heapArray;
IF heapSize = 0 THEN RETURN[NIL];
MaintainHeap[];
itemHP ← heap[1];
SiftDown[1, heap[heapSize]];
firstFreeEnt ← firstFreeEnt - 1;
heap[heapSize] ← heap[firstFreeEnt];
heapSize ← heapSize - 1;
RETURN;
END;
MaintainHeap:
PROCEDURE =
BEGIN
Fill the heap as full as possible
itemHP: ItemHeaderHandle;
heap: LONG POINTER TO SortHeap = heapArray;
IF inputFinished THEN RETURN;
WHILE firstFreeEnt <= maxHeapSize
DO
Try adding another heap element
IF
NOT itemIsLeftOver
THEN
BEGIN
leftoverItemLen ← get[leftoverItem];
IF leftoverItemLen > recordSize THEN ERROR RecordTooLong;
IF leftoverItemLen = 0 THEN BEGIN inputFinished ← TRUE; EXIT; END;
END;
IF occItemWords >= maxItemWords
THEN
BEGIN itemIsLeftOver ← TRUE; EXIT; END;
itemHP ← sortZone.
NEW[
ItemHeader [leftoverItemLen] !
Heap.Error =>
IF type = insufficientSpace THEN
BEGIN
maxItemWords ← occItemWords - 100;
itemIsLeftOver ← TRUE;
EXIT;
END];
occItemWords ←
occItemWords +
SIZE[ItemHeader [leftoverItemLen]] +
Heap.minimumNodeSize;
Inline.LongCOPY[
from: leftoverItem, nwords: leftoverItemLen, to: @itemHP[0]];
heap[firstFreeEnt] ← heap[heapSize + 1];
firstFreeEnt ← firstFreeEnt + 1;
heap[heapSize + 1] ← itemHP;
itemIsLeftOver ← FALSE;
IF heapSize > 0
AND compare[@itemHP[0], @heap[1][0]] > 0
THEN
BEGIN heapSize ← heapSize + 1; SiftUp[]; END;
ENDLOOP;
RETURN;
END;
MergePass:
PROCEDURE =
BEGIN
files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray;
dummiesThisPass: CARDINAL;
lastFile: FdHandle;
OFile: FdHandle;
runNo: CARDINAL;
runsThisPass: CARDINAL;
OFile ← files[NFiles];
lastFile ← files[NFiles - 1];
runsThisPass ← lastFile.totalRuns;
dummiesThisPass ← lastFile.dummyRuns;
FOR i IN[1..NFiles-2]
dummiesThisPass ← MIN[dummiesThisPass, files[1].dummyRuns];
OFile.totalRuns ← runsThisPass;
OFile.dummyRuns ← dummiesThisPass;
FOR i IN[1..NFiles-2]
files[1].totalRuns ← files[1].totalRuns - runsThisPass;
files[1].dummyRuns ← files[1].dummyRuns - dummiesThisPass;
FOR runNo
IN [dummiesThisPass + 1..runsThisPass]
DO
MergeRun[OFile]; ENDLOOP;
IF level > 1
THEN
BEGIN
fd: FdHandle;
i: CARDINAL;
FlushBuffer[OFile];
FOR i
IN [NFiles - 1..NFiles]
DO
f: FdHandle = files[i];
Streams.SetIndex[f.dh, 0];
f.head ← 0;
f.tail ← 0;
ENDLOOP;
fd ← files[NFiles];
FOR i DECREASING IN (1..NFiles] DO files[i] ← files[i - 1]; ENDLOOP;
files[1] ← fd;
END;
RETURN;
END;
MergeRun:
PROCEDURE [OFile: FdHandle] =
BEGIN
Process a run. Fill up the applicable records.
files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray;
i: CARDINAL;
SR: CARDINAL;
FOR i
IN [1..NFiles - 1]
DO
f: FdHandle = files[i];
IF f.dummyRuns = 0 THEN [] ← ReadRecord[f]
ELSE BEGIN f.dummyRuns ← f.dummyRuns - 1; f.endOfRun ← TRUE; END;
ENDLOOP;
DO
SR ← 0; -- selected record (which file is it from)
FOR i
IN [1..NFiles - 1]
DO
f: FdHandle = files[i];
IF (
NOT f.endOfRun)
AND (SR = 0 OR compare[f.record, files[SR].record] < 0) THEN SR ← i;
ENDLOOP;
IF SR = 0 THEN EXIT; -- come back and fix this
IF level = 1 THEN put[files[SR].record, files[SR].len]
ELSE WriteRecord[OFile, files[SR].len, files[SR].record];
files[SR].record ← NIL;
[] ← ReadRecord[files[SR]];
ENDLOOP;
IF level > 1 THEN WriteRecord[OFile, EOR, NIL]; -- end-of-run marker
RETURN;
END;
SiftDown:
PROCEDURE [
L:
CARDINAL,
K: ItemHeaderHandle] =
BEGIN
heap: LONG POINTER TO SortHeap = heapArray;
J: CARDINAL ← L;
I: CARDINAL;
DO
I ← J;
J ← I*2;
IF J > heapSize THEN EXIT;
IF
J < heapSize
THEN
IF compare[@heap[J][0], @heap[J + 1][0]] > 0 THEN J ← J + 1;
IF compare[@K[0], @heap[J][0]] <= 0 THEN EXIT;
heap[I] ← heap[J];
ENDLOOP;
heap[I] ← K;
RETURN;
END;
SiftUp:
PROCEDURE =
BEGIN
heap: LONG POINTER TO SortHeap = heapArray;
i: CARDINAL;
j: CARDINAL ← heapSize;
k: ItemHeaderHandle ← heap[heapSize];
i ← j/2;
WHILE i > 0
DO
IF compare[@heap[i][0], @k[0]] <= 0 THEN EXIT;
heap[j] ← heap[i];
j ← i;
i ← j/2;
ENDLOOP;
heap[j] ← k;
RETURN;
END;
Initialize[expected: expectedItemSize, max: maxItemSize];
BEGIN
ENABLE UNWIND => FreeAllocatedStuff[];
heapArray ← sortZone.NEW[SortHeap [maxHeapSize]];
leftoverItem ← sortZone.NEW[Item [recordSize]];
occItemWords ← SIZE[SortHeap [maxHeapSize]] + SIZE[Item [recordSize]];
itemIsLeftOver ← FALSE;
inputFinished ← FALSE;
First, fill up the heap as much as possible and sort it.
BuildHeap[];
IF inputFinished
THEN
THROUGH [1..heapSize] DO item ← GetHeap[]; put[@item[0], item.len]; ENDLOOP
ELSE
BEGIN
FOR i:
CARDINAL
IN [1..NFiles - 1]
DO
f ← filesArray[i];
fid[lastChar] ← fid[lastChar] + 1;
f.dh ← Streams.NewStream[fid, Streams.AllAccess];
f.totalRuns ← 1;
f.dummyRuns ← 1;
ENDLOOP;
filesArray[NFiles].totalRuns ← 0;
filesArray[NFiles].dummyRuns ← 0;
BuildRuns[];
Put runs on input files 1..NFiles-1 so that they have Fibonacci relationship
sortZone.FREE[@leftoverItem];
sortZone.FREE[@heapArray];
IF level > 1
THEN
BEGIN
fid[lastChar] ← fid[lastChar] + 1;
filesArray[NFiles].dh ← Streams.NewStream[fid, Streams.AllAccess];
END;
FOR i:
CARDINAL
IN [1..NFiles]
DO
-- seems like not needed for N if level = 1
f ← filesArray[i];
f.buffer ← sortZone.NEW[Item [f.buffSize]];
f.head ← 0;
f.tail ← 0;
ENDLOOP;
Now carry out merge passes until the level has returned to zero.
UNTIL level = 0
DO
MergePass[];
also cycles the files afterward if level>1
level ← level - 1;
IF level = 1
THEN {
DeleteFile[filesArray[NFiles].dh]; filesArray[NFiles].dh ← NIL};
Output will go to the putItemParam routine
ENDLOOP;
FOR i:
CARDINAL
IN [1..NFiles - 1]
DO
DeleteFile[filesArray[i].dh]; filesArray[i].dh ← NIL; ENDLOOP;
END;
END; -- enable block
FreeAllocatedStuff[];
RETURN;
END;
WriteRecord:
PROCEDURE [f: FdHandle, itemLen:
CARDINAL, itemPtr: ItemHandle] =
BEGIN
buffer: ItemHandle ← f.buffer;
tailIndex: CARDINAL ← f.tail;
IF tailIndex + (
IF itemLen =
EOR
THEN 1
ELSE itemLen + 1) > f.buffSize
THEN
BEGIN FlushBuffer[f]; tailIndex ← f.tail; END;
buffer[tailIndex] ← itemLen;
tailIndex ← tailIndex + 1;
IF itemLen #
EOR
THEN
BEGIN
Inline.LongCOPY[itemPtr, itemLen, @buffer[tailIndex]];
tailIndex ← tailIndex + itemLen;
END;
f.tail ← tailIndex;
RETURN;
END;