-- BTreeInternal.mesa -- Last Edited by: Taft, June 3, 1983 4:18 pm -- Loose ends: -- 1. Consider storing OffsetTables at the end of BTreePages when they happen to fit. WritePage could easily compute an OffsetTable from the EntryTable it already has, and BinSearchPage could save the expense of constructing the OffsetTable when one is already stored in the page. DIRECTORY BTree, Environment USING [Comparison], Mopcodes USING [zMISC, zPOP]; BTreeInternal: DEFINITIONS = BEGIN OPEN BTree; -- Permanent data structures -- Permanent state of BTree, kept in statePage -- The default values are for a freshly-initialized BTree; the TRASH values are not constant but are computed and filled in during Open. TreeState: TYPE = MACHINE DEPENDENT RECORD [ -- Read-only fields which do not change during the life of the tree: seal (0): CARDINAL _ sealValue, pageSize (1): PageSize, -- in words -- Read-write fields, protected by the BTree internal lock: rootPage (2): PageNumber _ nilPage, -- BTree page containing the root greatestPage (3): PageNumber _ statePage, -- greatest page that has ever been part of the tree firstFreePage (4): PageNumber _ nilPage, -- head of list of free BTree pages entryCount (5): LONG CARDINAL _ 0, -- number of entries in the tree depth (7): PathStkIndex _ 0, -- number of levels in the tree updateInProgress (8): BOOLEAN _ FALSE]; sealValue: CARDINAL = 046273B; -- Representation of a BTree data page BTreePagePtr: TYPE = LONG BASE POINTER TO BTreePage; BTreePage: TYPE = MACHINE DEPENDENT RECORD [ freeWords (0): CARDINAL, -- number of free words at end of this page (freePageMarker indicates this page is in the free list) minPage (1): PageNumber, -- page containing least son of this page (or next page in free list) entries (2): --ARRAY [0..0) OF-- BTreeEntry]; PageOffset: TYPE = BTreePagePtr RELATIVE ORDERED POINTER [0..LAST[PageSize]] TO BTreeEntry; BTreeEntry: TYPE = MACHINE DEPENDENT RECORD [ grPage: PageNumber, -- page containing sons greater than this entry but less than the next entry: ClientEntry]; -- client data ClientEntry: TYPE = ARRAY [0..0) OF WORD; nilPage: PageNumber = statePage; -- The "nil" PageNumber is the same as statePage. This is ok because the PageNumber of the statePage is never stored anywhere. freePageMarker: CARDINAL = LAST[CARDINAL]; -- value of freeWords which marks page as being in free list -- Offsets of various fields in BTreePage and BTreeEntry pageOverhead: CARDINAL = 2; -- words of overhead in a page; remainder is available for BTreeEntries nilOffset: PageOffset = LOOPHOLE [0]; entry0Offset: PageOffset = LOOPHOLE [1]; -- offset of a ficticious entry whose grPage would overlay BTreePage.minPage entry1Offset: PageOffset = LOOPHOLE [2]; -- offset of first real entry entryOverhead: CARDINAL = 1; -- overhead in an entry; remainder is client data reservedWordsPerPage: [BTree.reservedWordsPerPage..BTree.reservedWordsPerPage] = pageOverhead+entryOverhead; -- just checking maxMaxRecordsPerPage: CARDINAL = (LAST[PageSize] - pageOverhead) / (entryOverhead + 1); -- worst case assuming largest possible page and smallest possible entry -- Volatile data structures -- Volatile state of open BTree. Tree: TYPE = REF TreeObject; TreeObject: TYPE = MONITORED RECORD [ -- Copy of permanent state in statePage. Access to this copy is protected by the BTree internal read/write lock. Access to the statePage itself is protected by PageStorage locking of the statePage if transactions are in use; in particular, the statePage is always read-locked. state: TreeState, -- Read-only fields which do not change during the life of the BTreeObject: repPrim: RepresentationPrimitives, minEntrySize: CARDINAL, -- in words maxRecordsPerPage: CARDINAL _ TRASH, -- function of pageSize and minEntrySize maxFreeWords: CARDINAL _ TRASH, -- non-overhead words per page awfullyFull: CARDINAL _ TRASH, -- words in 9/10 full page prettyFull: CARDINAL _ TRASH, -- words in 2/3 full page fairlyFull: CARDINAL _ TRASH, -- words in 1/2 full page breathingSpace: CARDINAL _ TRASH, -- words in 1/10 full page storage: PageStorage, storPrim: PageStoragePrimitives, maintainRecomputableState: BOOLEAN, id: CARDINAL _ TRASH, -- distinguishes this TreeObject instance from any other -- Read-write fields, protected by the BTreeObject monitor: version: LONG CARDINAL _ 0, -- number of updates to this tree since it was opened lockCount: INTEGER _ 0, -- + reader count, or -1 for (single) writer unlocked: CONDITION, -- notified when the lock is released -- The following fields hold onto preallocated temporary storage. This storage is handed out to clients for the duration of an operation; if multiple client calls are in progress simultaneously, extra temporary storage is allocated for the second and subsequent clients and dropped on the floor when they finish. offsetTable: OffsetTable _ NIL, -- offsets of entries on a page, indexed by entry ordinal defaultPathStk: PathStk _ NIL, -- used internally when client doesn't pass one entryTable: REF EntryTable _ NIL, -- cumulative lengths and heap offsets of entries in EntSeqRecords heap: REF Heap _ NIL]; -- heap of entries contending for father page -- Representation of a PathStk, the complete description of a search path down the tree PathStk: TYPE = REF PathStkObject; PathStkObject: TYPE = RECORD [ treeID: CARDINAL _ 0, -- instance of tree to which this PathStk belongs version: LONG CARDINAL _ 0, -- value of tree.version for which this PathStk is valid entryTable: REF EntryTable _ NIL, -- cumulative lengths and heap offsets of entries in EntSeqRecords heap: REF Heap _ NIL, -- heap of entries contending for father page top: PathStkIndex _ 0, -- the level corresponding to the BTree entry of interest path: ARRAY PathStkIndex OF PathStkEntry]; -- level 1 is the root; level 0 is dummy, and used only when the root page splits and a new root page must be created PathStkEntry: TYPE = RECORD [ pageNumber: PageNumber _ nilPage, -- also appears as grPage of BTreeEntry indexed by lastOffset in previous stack level offset: PageOffset _ nilOffset, -- offset of entry directly along path lastOffset: PageOffset _ nilOffset, -- entry preceding the one designated by offset nextToLastOffset: PageOffset _ nilOffset, -- entry preceding the one designated by lastOffset eslFront: REF EntSeqRecord _ NIL, -- head of list of entries that belong between lastOffset and offset. Do not assign directly to this field (see below). eslRear: REF EntSeqRecord _ NIL, -- tail of list. Do not assign directly to this field (see below). leastSon: PageNumber _ nilPage]; -- least son of entries belonging on this page -- *** DANGER *** DANGER *** DANGER *** -- Fields of a PathStkEntry are usually accessed via a LONG POINTER (e.g., pse.offset, where pse is a LONG POINTER TO PathStkEntry). LONG POINTERs to the interior of a reference-counted object are inherently unsafe. If the object is also reference-containing there is a special danger: ordinary assignment to a reference-containing field does not perform reference counting! Therefore, do not assign directly to eslFront or eslRear, but instead call AssignRefESR. maxLevelsInTree: CARDINAL = 7; -- maximum depth allowed by the implementation. To exceed this without first exhausting the PageNumber space would require an average branching factor of less than 5, which is totally unreasonable for a BTree. PathStkIndex: TYPE = [0..maxLevelsInTree]; -- Auxiliary data structures OffsetTable: TYPE = REF OffsetTableObject; OffsetTableObject: TYPE = RECORD [ offsets: SEQUENCE maxLength: EntryOrdinal OF PageOffset]; EntryOrdinal: TYPE = [0..3*maxMaxRecordsPerPage]; -- an OffsetTableObject needs to hold only one page's worth of entries, but an entryTable may need to hold as many as three pages' worth. EntSeqRecord: TYPE = RECORD [ fwdP: REF EntSeqRecord, -- next in list entSeqP: LONG POINTER TO BTreeEntry, -- pointer to a sequence of entries (within this EntSeqRecord, starting at or after entSeq) entSeqLen: CARDINAL, -- length of the sequence in words entSeq: --ARRAY [0..0) OF BTreeEntry-- SEQUENCE maxLength: [0..LAST[PageSize]] OF WORD]; EntryTable: TYPE = RECORD [ length: EntryOrdinal, -- indexed by entry ordinal in entry sequence list; entry 0 is a dummy and has a cumEntSize of zero. map: SEQUENCE maxLength: EntryOrdinal OF EntryTableRec]; EntryTableRec: TYPE = RECORD [ cumEntSize: CARDINAL, -- cumulative sizes of entries up to and including this one heapPos: HeapIndex]; -- offset of this entry in heap Heap: TYPE = RECORD [ length: HeapIndex, entries: SEQUENCE maxLength: HeapIndex OF EntryOrdinal]; HeapIndex: TYPE = [0..maxMaxRecordsPerPage]; -- There are at most one page's worth of entries in the heap at any given time -- Convenient access to the client-provided procedures -- These inline procedures have the effect of incorporating the client's procedures into the cluster for type Tree. Compare: PROCEDURE [tree: Tree, key: Key, entry: Entry] RETURNS [Environment.Comparison] = INLINE { RETURN [tree.repPrim.compare[key: key, entry: entry]] }; CompareEntries: PROCEDURE [tree: Tree, entry1, entry2: Entry] RETURNS [Environment.Comparison] = INLINE { RETURN [tree.repPrim.compareEntries[entry1: entry1, entry2: entry2]] }; EntrySize: PROCEDURE [tree: Tree, entry: Entry] RETURNS [words: EntSize] = INLINE { RETURN [tree.repPrim.entrySize[entry]] }; EntryFromRecord: PROCEDURE [tree: Tree, record: Record] RETURNS [entry: Entry] = INLINE { RETURN [tree.repPrim.entryFromRecord[record]] }; NewRecordFromEntry: PROCEDURE [tree: Tree, entry: Entry] RETURNS [record: Record] = INLINE { RETURN [tree.repPrim.newRecordFromEntry[entry]] }; ReferencePage: PROCEDURE [tree: Tree, number: PageNumber, type: ReferenceType _ read] RETURNS [ptr: PagePtr] = INLINE { RETURN [tree.storPrim.referencePage[storage: tree.storage, number: number, type: type]] }; ReleasePage: PROCEDURE [tree: Tree, number: PageNumber, update: UpdateState _ unchanged] = INLINE { tree.storPrim.releasePage[storage: tree.storage, number: number, update: update] }; -- Internal operations BTreeEntrySize: PROCEDURE [tree: Tree, bte: LONG POINTER TO BTreeEntry] RETURNS [words: EntSize] = INLINE -- The size of an entire BTreeEntry, including overhead. { RETURN [entryOverhead+tree.EntrySize[@bte.entry]] }; PathEntryLE: PROCEDURE [tree: Tree, key: Key, pathStk: PathStk, useExistingPath: BOOLEAN _ FALSE] RETURNS [depth: PathStkIndex]; -- Computes a PathStk for the supplied key such that at each level lastOffset refers to an entry less than or equal to key and offset refers to an entry strictly greater than key (if any such entries exist). Sets pathStk.top to the deepest level in which lastOffset refers to a real entry (i.e., is not entry0Offset). Returns the depth of the tree, i.e., the PathStkIndex for the leaf page (which is either the page containing the LE entry or its leftmost right descendant page). If key=NIL or is less than any entry in the tree, sets pathStk.top to zero, computes the path for the minimum key, and returns the depth of the tree. ReferenceStack: PROCEDURE [tree: Tree, pathStk: PathStk, type: ReferenceType _ read] RETURNS [pse: LONG POINTER TO PathStkEntry, ptr: PagePtr]; -- References the page described by the pathStk.top level of the PathStk, and returns the PagePtr along with the PathStkEntry pointer. The caller is responsible for releasing the reference. BackUpOneEntry: PROCEDURE [tree: Tree, pse: LONG POINTER TO PathStkEntry]; -- Adjusts the PathStkEntry so that it addresses the previous entry (there had better be one). Caution: if the ESL is non-empty, this destroys the invariant that the entries in the ESL lie logically between pse.lastOffset and pse.offset. RepairOffsets: PROCEDURE [tree: Tree, pse: LONG POINTER TO PathStkEntry]; -- Checks whether the offsets in the PathStkEntry are still valid, and rebuilds them if not. PathToMaxDescendant: PROCEDURE [tree: Tree, pathStk: PathStk, page: PageNumber]; -- Starting with the specified BTree page, extends the pathStk to the rightmost descendant entry. page is assumed to be a descendant of some entry at the pathStk.top level of the tree. Does not change the pathStk at that level, but builds new pathStk entries at deeper levels. Does nothing if page=nilPage. AdjustTreeState: PROCEDURE [tree: Tree, update: UpdateState, deltaEntryCount: INTEGER]; -- Notes that the entryCount is changing by deltaEntryCount, and rewrites tree.state into the statePage if appropriate. StatePage updates are ordinarily inhibited if update=unchanged or the tree's maintainRecomputableState flag is FALSE. The tree must be locked for update by the caller. WriteStatePage: PROCEDURE [tree: Tree, update: UpdateState _ unchanged]; -- Rewrites tree.state into the statePage. The tree must be locked for update by the caller. InsertRecords: PROCEDURE [tree: Tree, pathStk: PathStk]; -- Inserts all EntSeqRecords at the current (pathStk.top) level of the PathStk into the tree itself. This may leave behind some new EntSeqRecords at the next higher level. MakeEntSeqRecord: PROCEDURE [entSeq: LONG POINTER TO BTreeEntry, length: CARDINAL] RETURNS [esr: REF EntSeqRecord]; -- Takes a sequence of BTreeEntries (e.g., from a BTreePage) and constructs an EntSeqRecord containing it. Returns NIL if length=0. AppendEntSeqRecord: PROCEDURE [pse: LONG POINTER TO PathStkEntry, esr: REF EntSeqRecord]; -- Appends esr to the list of EntSeqRecords in the PathStkEntry. It is ok for esr to be NIL. PushEntSeqRecord: PROCEDURE [pse: LONG POINTER TO PathStkEntry, esr: REF EntSeqRecord]; -- Puts esr at the front of the list of EntSeqRecords in the PathStkEntry. It is ok for esr to be NIL. AssignRefESR: PROCEDURE [ptr: LONG POINTER TO REF EntSeqRecord, ref: REF EntSeqRecord] = INLINE { LOOPHOLE[ptr, REF REF EntSeqRecord]^ _ ref }; -- Performs ptr^ _ ref as a reference-counted assignment. This is necessary when assigning to the eslFront and eslRear fields of a PathStkEntry accessed via a LONG POINTER. RemoveEntry: PROCEDURE [tree: Tree, pse: LONG POINTER TO PathStkEntry, ignoreESL: BOOLEAN _ FALSE] RETURNS [esr: REF EntSeqRecord, grPage: PageNumber]; -- Removes the BTreeEntry that logically follows pse.lastOffset, i.e., the one at pse.offset if the ESL is empty, else the first entry in the ESL (except does not consider the ESL if ignoreESL=TRUE). Returns an EntSeqRecord containing the removed entry, with the grPage field set to the minPage of the page it used to point to (if any). Returns grPage = the former grPage field of the removed entry. BackUpAndRemoveEntry: PROCEDURE [tree: Tree, pse: LONG POINTER TO PathStkEntry] RETURNS [esr: REF EntSeqRecord, grPage: PageNumber]; -- Removes the BTreeEntry at pse.lastOffset and returns an ESR containing it. Reconstructs the offsets so that pse.offset refers to the same entry as before, and lastOffset and nextToLastOffset refer to the two predecessors of the removed entry. grPage is handled as in RemoveEntry. AllocatePage: PROCEDURE [tree: Tree] RETURNS [number: PageNumber]; -- Allocates a new page. Its initial contents are undefined. Raises Bug[pageNotFree] if the free list is fouled. FreePage: PROCEDURE [tree: Tree, number: PageNumber]; -- Frees a page that was formerly part of the BTree. Raises Bug[pageAlreadyFree] if it hasn't been allocated. LongZero: PROCEDURE [where: LONG POINTER, nWords: CARDINAL] = MACHINE CODE { Mopcodes.zMISC, 102B; Mopcodes.zPOP; Mopcodes.zPOP }; Lock: PROCEDURE [tree: Tree, mode: LockMode]; -- Locks the BTree object for access in the specified mode (read permits other readers but excludes updaters; update excludes all other access). LockMode: TYPE = {read, update}; Unlock: PROCEDURE [tree: Tree]; -- Unlocks the BTree object. Raises Bug[treeNotLocked] if it isn't locked. GetHeapAndTable: PROCEDURE [tree: Tree, pathStk: PathStk]; -- Obtains the Heap and EntryTable associated with the BTree and assigns them to the pathStk, if they are available; otherwise makes new ones. ReturnHeapAndTable: PROCEDURE [tree: Tree, pathStk: PathStk]; -- Gives back the Heap and EntryTable assigned to the PathStk. GetDefaultPathStk: PROCEDURE [tree: Tree] RETURNS [pathStk: PathStk]; -- Obtains the PathStk associated with the BTree, if it is available; otherwise makes a new one. ReturnDefaultPathStk: PROCEDURE [tree: Tree, pathStk: PathStk]; -- Gives back the PathStk. -- Interface variables staticZone: READONLY ZONE; -- used for relatively static objects such as TreeObjects, PathStks, and the like tempZone: READONLY ZONE; -- used for ephemeral objects, principally EntSeqRecords -- Exceptions Bug: ERROR [type: BugType]; BugType: TYPE = {depositESL, entriesLeftOver, mcCreightWasWrong, newRootOverflow, pageAlreadyFree, pageNotFree, tooFewEntries, tooManyEntriesInPage, treeNotLocked, twoBrothersNotEnough}; END.