<> <> <> <> <> DIRECTORY Heap, GSort, Inline USING [LongCOPY], Segments, Streams; GSortImpl: PROGRAM IMPORTS Heap, Inline, Segments, Streams EXPORTS GSort = BEGIN Fd: TYPE = RECORD [ dh: Streams.Handle _ NIL, endOfRun: BOOLEAN _ FALSE, dummyRuns, totalRuns: CARDINAL _ NULL, head, tail: CARDINAL _ NULL, len: CARDINAL _ NULL, buffSize: CARDINAL, buffer: ItemHandle _ NIL, record: ItemHandle _ NIL]; FdHandle: TYPE = POINTER TO Fd; WordsInPage: CARDINAL = 256; Item: TYPE = RECORD [SEQUENCE COMPUTED CARDINAL OF UNSPECIFIED]; -- the item being sorted ItemHandle: TYPE = LONG POINTER TO Item; ItemHeader: TYPE = RECORD [SEQUENCE len: CARDINAL OF UNSPECIFIED]; ItemHeaderHandle: TYPE = LONG POINTER TO ItemHeader; SortHeap: TYPE = RECORD [ SEQUENCE COMPUTED CARDINAL [0..LAST[CARDINAL]/SIZE[ItemHeaderHandle]) -- to avoid multiply OF ItemHeaderHandle]; EOR: CARDINAL = LAST[CARDINAL]; -- end of run NFiles: CARDINAL = 3; -- number of scratch files RecordTooLong: PUBLIC ERROR = CODE; systemZone: MDSZone = Heap.systemMDSZone; Sort: PUBLIC PROCEDURE [ get: GSort.GetProcType, put: GSort.PutProcType, compare: GSort.CompareProcType, expectedItemSize: CARDINAL _ 30, maxItemSize: CARDINAL _ 1000, pagesInHeap: CARDINAL _ 100] = BEGIN filesArray: ARRAY [0..NFiles] OF FdHandle; firstFreeEnt: CARDINAL _ 1; -- 1 + end of unsorted part of heap vector heapArray: LONG POINTER TO SortHeap; heapSize: CARDINAL; -- end of heap-sorted part of heap vector inputFinished: BOOLEAN _ FALSE; itemIsLeftOver: BOOLEAN _ FALSE; leftoverItem: ItemHandle _ NIL; leftoverItemLen: CARDINAL; level: CARDINAL; maxHeapSize: CARDINAL; maxItemWords: CARDINAL; occItemWords: CARDINAL; recordSize: CARDINAL; f: FdHandle; -- declared out here to be sorted with locals (compiler shortcoming) item: ItemHeaderHandle; fid: STRING = "SORT.SCRATCH0$"; lastChar: CARDINAL _ fid.length - 2; sortZone: UNCOUNTED ZONE _ Heap.Create[ initial: MAX[pagesInHeap, 10], swapUnit: 10, increment: 0]; Initialize: PROCEDURE [expected, max: CARDINAL] = BEGIN blockSize: CARDINAL = MAX[pagesInHeap, 10]*WordsInPage; bufferSize: CARDINAL = (blockSize)/NFiles - 100; recordSize _ IF bufferSize > max THEN max ELSE bufferSize; maxHeapSize _ (blockSize - recordSize)/(expected + 3); maxItemWords _ blockSize - maxHeapSize - recordSize; FOR i: CARDINAL IN [1..NFiles] DO filesArray[i] _ systemZone.NEW[Fd _ [buffSize: bufferSize]]; ENDLOOP; RETURN; END; FreeAllocatedStuff: PROCEDURE = BEGIN i: CARDINAL; FOR i IN [1..NFiles] DO IF filesArray[i] = NIL THEN LOOP; IF filesArray[i].buffer # NIL THEN sortZone.FREE[@filesArray[i].buffer]; IF filesArray[i].dh # NIL THEN DeleteFile[filesArray[i].dh]; systemZone.FREE[@filesArray[i]]; ENDLOOP; IF heapArray # NIL THEN sortZone.FREE[@heapArray]; IF leftoverItem # NIL THEN sortZone.FREE[@leftoverItem]; Heap.Delete[sortZone]; RETURN; END; BuildHeap: PROCEDURE = BEGIN L: CARDINAL; heapSize _ 0; MaintainHeap[]; heapSize _ firstFreeEnt - 1; L _ (heapSize/2) + 1; WHILE L > 1 DO L _ L - 1; SiftDown[L, heapArray[L]]; ENDLOOP; RETURN; END; BuildRuns: PROCEDURE = BEGIN <> A: CARDINAL; files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray; item: ItemHeaderHandle; i: CARDINAL; j: CARDINAL _ 1; LFile: FdHandle; NT: CARDINAL; level _ 1; DO f: FdHandle _ files[j]; IF level > 1 THEN Streams.PutWord[f.dh, EOR]; -- end-of-run marker FOR item _ GetHeap[], GetHeap[] UNTIL item = NIL DO Streams.PutWord[f.dh, item.len]; [] _ Streams.PutBlock[f.dh, @item[0], item.len]; occItemWords _ occItemWords - SIZE[ItemHeader [item.len]] - Heap.minimumNodeSize; sortZone.FREE[@item]; ENDLOOP; f.dummyRuns _ f.dummyRuns - 1; IF inputFinished AND (firstFreeEnt = 1) THEN EXIT; IF f.dummyRuns < files[j + 1].dummyRuns THEN {j _ j + 1; f _ files[j]} ELSE BEGIN j _ 1; f _ files[1]; IF f.dummyRuns = 0 THEN BEGIN level _ level + 1; A _ files[1].totalRuns; FOR i IN [1..NFiles - 1] DO LFile _ files[i]; NT _ A + files[i + 1].totalRuns; LFile.dummyRuns _ NT - LFile.totalRuns; LFile.totalRuns _ NT; ENDLOOP; END; END; BuildHeap[]; ENDLOOP; FOR i IN [1..NFiles - 1] DO f: FdHandle = files[i]; Streams.PutWord[f.dh, EOR]; Streams.SetIndex[f.dh, 0]; ENDLOOP; RETURN; END; GetHeap: PROCEDURE RETURNS [itemHP: ItemHeaderHandle] = BEGIN heap: LONG POINTER TO SortHeap = heapArray; IF heapSize = 0 THEN RETURN[NIL]; MaintainHeap[]; itemHP _ heap[1]; SiftDown[1, heap[heapSize]]; firstFreeEnt _ firstFreeEnt - 1; heap[heapSize] _ heap[firstFreeEnt]; heapSize _ heapSize - 1; RETURN; END; MaintainHeap: PROCEDURE = BEGIN <> itemHP: ItemHeaderHandle; heap: LONG POINTER TO SortHeap = heapArray; IF inputFinished THEN RETURN; WHILE firstFreeEnt <= maxHeapSize DO <> IF NOT itemIsLeftOver THEN BEGIN leftoverItemLen _ get[leftoverItem]; IF leftoverItemLen > recordSize THEN ERROR RecordTooLong; IF leftoverItemLen = 0 THEN BEGIN inputFinished _ TRUE; EXIT; END; END; IF occItemWords >= maxItemWords THEN BEGIN itemIsLeftOver _ TRUE; EXIT; END; itemHP _ sortZone.NEW[ ItemHeader [leftoverItemLen] ! Heap.Error => <> BEGIN maxItemWords _ occItemWords - 100; itemIsLeftOver _ TRUE; EXIT; END]; occItemWords _ occItemWords + SIZE[ItemHeader [leftoverItemLen]] + Heap.minimumNodeSize; Inline.LongCOPY[ from: leftoverItem, nwords: leftoverItemLen, to: @itemHP[0]]; heap[firstFreeEnt] _ heap[heapSize + 1]; firstFreeEnt _ firstFreeEnt + 1; heap[heapSize + 1] _ itemHP; itemIsLeftOver _ FALSE; IF heapSize > 0 AND compare[@itemHP[0], @heap[1][0]] > 0 THEN BEGIN heapSize _ heapSize + 1; SiftUp[]; END; ENDLOOP; RETURN; END; MergePass: PROCEDURE = BEGIN files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray; dummiesThisPass: CARDINAL; lastFile: FdHandle; OFile: FdHandle; runNo: CARDINAL; runsThisPass: CARDINAL; OFile _ files[NFiles]; lastFile _ files[NFiles - 1]; runsThisPass _ lastFile.totalRuns; dummiesThisPass _ lastFile.dummyRuns; <> dummiesThisPass _ MIN[dummiesThisPass, files[1].dummyRuns]; OFile.totalRuns _ runsThisPass; OFile.dummyRuns _ dummiesThisPass; <> files[1].totalRuns _ files[1].totalRuns - runsThisPass; files[1].dummyRuns _ files[1].dummyRuns - dummiesThisPass; FOR runNo IN [dummiesThisPass + 1..runsThisPass] DO MergeRun[OFile]; ENDLOOP; IF level > 1 THEN BEGIN fd: FdHandle; i: CARDINAL; FlushBuffer[OFile]; FOR i IN [NFiles - 1..NFiles] DO f: FdHandle = files[i]; Streams.SetIndex[f.dh, 0]; f.head _ 0; f.tail _ 0; ENDLOOP; fd _ files[NFiles]; FOR i DECREASING IN (1..NFiles] DO files[i] _ files[i - 1]; ENDLOOP; files[1] _ fd; END; RETURN; END; MergeRun: PROCEDURE [OFile: FdHandle] = BEGIN <> files: POINTER TO ARRAY [0..NFiles] OF FdHandle = @filesArray; i: CARDINAL; SR: CARDINAL; FOR i IN [1..NFiles - 1] DO f: FdHandle = files[i]; IF f.dummyRuns = 0 THEN [] _ ReadRecord[f] ELSE BEGIN f.dummyRuns _ f.dummyRuns - 1; f.endOfRun _ TRUE; END; ENDLOOP; DO SR _ 0; -- selected record (which file is it from) FOR i IN [1..NFiles - 1] DO f: FdHandle = files[i]; IF (NOT f.endOfRun) AND (SR = 0 OR compare[f.record, files[SR].record] < 0) THEN SR _ i; ENDLOOP; IF SR = 0 THEN EXIT; -- come back and fix this IF level = 1 THEN put[files[SR].record, files[SR].len] ELSE WriteRecord[OFile, files[SR].len, files[SR].record]; files[SR].record _ NIL; [] _ ReadRecord[files[SR]]; ENDLOOP; IF level > 1 THEN WriteRecord[OFile, EOR, NIL]; -- end-of-run marker RETURN; END; SiftDown: PROCEDURE [L: CARDINAL, K: ItemHeaderHandle] = BEGIN heap: LONG POINTER TO SortHeap = heapArray; J: CARDINAL _ L; I: CARDINAL; DO I _ J; J _ I*2; IF J > heapSize THEN EXIT; IF J < heapSize THEN IF compare[@heap[J][0], @heap[J + 1][0]] > 0 THEN J _ J + 1; IF compare[@K[0], @heap[J][0]] <= 0 THEN EXIT; heap[I] _ heap[J]; ENDLOOP; heap[I] _ K; RETURN; END; SiftUp: PROCEDURE = BEGIN heap: LONG POINTER TO SortHeap = heapArray; i: CARDINAL; j: CARDINAL _ heapSize; k: ItemHeaderHandle _ heap[heapSize]; i _ j/2; WHILE i > 0 DO IF compare[@heap[i][0], @k[0]] <= 0 THEN EXIT; heap[j] _ heap[i]; j _ i; i _ j/2; ENDLOOP; heap[j] _ k; RETURN; END; Initialize[expected: expectedItemSize, max: maxItemSize]; BEGIN ENABLE UNWIND => FreeAllocatedStuff[]; heapArray _ sortZone.NEW[SortHeap [maxHeapSize]]; leftoverItem _ sortZone.NEW[Item [recordSize]]; occItemWords _ SIZE[SortHeap [maxHeapSize]] + SIZE[Item [recordSize]]; itemIsLeftOver _ FALSE; inputFinished _ FALSE; <> BuildHeap[]; IF inputFinished THEN THROUGH [1..heapSize] DO item _ GetHeap[]; put[@item[0], item.len]; ENDLOOP ELSE BEGIN FOR i: CARDINAL IN [1..NFiles - 1] DO f _ filesArray[i]; fid[lastChar] _ fid[lastChar] + 1; f.dh _ Streams.NewStream[fid, Streams.AllAccess]; f.totalRuns _ 1; f.dummyRuns _ 1; ENDLOOP; filesArray[NFiles].totalRuns _ 0; filesArray[NFiles].dummyRuns _ 0; BuildRuns[]; <> sortZone.FREE[@leftoverItem]; sortZone.FREE[@heapArray]; IF level > 1 THEN BEGIN fid[lastChar] _ fid[lastChar] + 1; filesArray[NFiles].dh _ Streams.NewStream[fid, Streams.AllAccess]; END; FOR i: CARDINAL IN [1..NFiles] DO -- seems like not needed for N if level = 1 f _ filesArray[i]; f.buffer _ sortZone.NEW[Item [f.buffSize]]; f.head _ 0; f.tail _ 0; ENDLOOP; <> UNTIL level = 0 DO MergePass[]; <1>> level _ level - 1; IF level = 1 THEN { DeleteFile[filesArray[NFiles].dh]; filesArray[NFiles].dh _ NIL}; <> ENDLOOP; FOR i: CARDINAL IN [1..NFiles - 1] DO DeleteFile[filesArray[i].dh]; filesArray[i].dh _ NIL; ENDLOOP; END; END; -- enable block FreeAllocatedStuff[]; RETURN; END; ReadRecord: PROCEDURE [f: FdHandle] RETURNS [BOOLEAN] = BEGIN itemLen: CARDINAL; headIndex: CARDINAL; IF f.head = f.tail THEN FillBuffer[f]; headIndex _ f.head; itemLen _ f.buffer[headIndex]; f.head _ headIndex _ headIndex + 1; IF itemLen = EOR THEN BEGIN f.endOfRun _ TRUE; RETURN[FALSE]; END; IF headIndex + itemLen > f.tail THEN FillBuffer[f]; headIndex _ f.head; f.record _ @f.buffer[headIndex]; f.head _ headIndex + itemLen; f.len _ itemLen; f.endOfRun _ FALSE; RETURN[TRUE]; END; FillBuffer: PROCEDURE [f: FdHandle] = BEGIN cnt: CARDINAL; f.tail _ f.tail - f.head; IF f.tail > 0 THEN Inline.LongCOPY[from: @f.buffer[f.head], nwords: f.tail, to: f.buffer]; cnt _ Streams.GetBlock[f.dh, @f.buffer[f.tail], f.buffSize - f.tail]; f.head _ 0; f.tail _ f.tail + cnt; RETURN; END; WriteRecord: PROCEDURE [f: FdHandle, itemLen: CARDINAL, itemPtr: ItemHandle] = BEGIN buffer: ItemHandle _ f.buffer; tailIndex: CARDINAL _ f.tail; IF tailIndex + (IF itemLen = EOR THEN 1 ELSE itemLen + 1) > f.buffSize THEN BEGIN FlushBuffer[f]; tailIndex _ f.tail; END; buffer[tailIndex] _ itemLen; tailIndex _ tailIndex + 1; IF itemLen # EOR THEN BEGIN Inline.LongCOPY[itemPtr, itemLen, @buffer[tailIndex]]; tailIndex _ tailIndex + itemLen; END; f.tail _ tailIndex; RETURN; END; FlushBuffer: PROCEDURE [f: FdHandle] = BEGIN [] _ Streams.PutBlock[f.dh, f.buffer, f.tail]; f.tail _ 0; END; DeleteFile: PROCEDURE [dh: Streams.Handle] = BEGIN fh: Segments.FHandle _ Streams.FileFromStream[dh]; Segments.LockFile[fh]; Streams.Destroy[dh]; Segments.UnlockFile[fh]; Segments.DestroyFile[fh]; END; END...