DIRECTORY BTree; BTreeInternal: DEFINITIONS = BEGIN OPEN BTree; TreeState: TYPE = MACHINE DEPENDENT RECORD [ seal (0): CARDINAL _ sealValue, pageSize (1): PageSize _ TRASH, -- in words rootPage (2): PageNumber _ nilPage, -- BTree page containing the root greatestPage (3): PageNumber _ statePage, -- greatest page that has ever been part of the tree firstFreePage (4): PageNumber _ nilPage, -- head of list of free BTree pages entryCount (5): LONG CARDINAL _ 0, -- number of entries in the tree depth (7): PathStkIndex _ 0, -- number of levels in the tree updateInProgress (8): BOOL _ FALSE]; sealValue: CARDINAL = 046273B; BTreePagePtr: TYPE = LONG BASE POINTER TO BTreePage; BTreePage: TYPE = MACHINE DEPENDENT RECORD [ freeWords (0): CARDINAL, minPage (1): PageNumber, entries (2): --ARRAY [0..?) OF-- BTreeEntry]; PageOffset: TYPE = BTreePagePtr RELATIVE ORDERED POINTER [0..LAST[PageSize]] TO BTreeEntry; BTreeEntry: TYPE = MACHINE DEPENDENT RECORD [ grPage: PageNumber, -- page containing sons greater than this entry but less than the next entry: ClientEntry]; -- client data ClientEntry: TYPE = ARRAY [0..0) OF WORD; nilPage: PageNumber = statePage; -- The "nil" PageNumber is the same as statePage. This is ok because the PageNumber of the statePage is never stored anywhere. freePageMarker: CARDINAL = LAST[CARDINAL]; -- value of freeWords which marks page as being in free list pageOverhead: CARDINAL = 2; -- words of overhead in a page; remainder is available for BTreeEntries nilOffset: PageOffset = LOOPHOLE [0]; entry0Offset: PageOffset = LOOPHOLE [1]; -- offset of a ficticious entry whose grPage would overlay BTreePage.minPage entry1Offset: PageOffset = LOOPHOLE [2]; -- offset of first real entry entryOverhead: CARDINAL = 1; -- overhead in an entry; remainder is client data reservedWordsPerPage: [BTree.reservedWordsPerPage..BTree.reservedWordsPerPage] = pageOverhead+entryOverhead; -- just checking maxMaxRecordsPerPage: CARDINAL = (LAST[PageSize] - pageOverhead) / (entryOverhead + 1); -- worst case assuming largest possible page and smallest possible entry Tree: TYPE = REF TreeObject; TreeObject: TYPE = MONITORED RECORD [ state: TreeState, repPrim: RepresentationPrimitives, storPrim: PageStoragePrimitives, minEntrySize: CARDINAL, -- in words objectState: State _ closed, maxRecordsPerPage: CARDINAL _ TRASH, -- function of pageSize and minEntrySize maxFreeWords: CARDINAL _ TRASH, -- non-overhead words per page awfullyFull: CARDINAL _ TRASH, -- words in 9/10 full page prettyFull: CARDINAL _ TRASH, -- words in 2/3 full page fairlyFull: CARDINAL _ TRASH, -- words in 1/2 full page breathingSpace: CARDINAL _ TRASH, -- words in 1/10 full page storage: PageStorage _ NIL, maintainRecomputableState: BOOL _ TRUE, id: CARDINAL _ TRASH, -- distinguishes this TreeObject instance from any other version: LONG CARDINAL _ 0, -- number of updates to this tree since it was opened lockCount: INTEGER _ 0, -- + reader count, or -1 for (single) writer unlocked: CONDITION, -- notified when the lock is released longUpdate: BOOL _ FALSE, -- argument of most recent call to SetUpdateInProgress offsetTable: OffsetTable _ NIL, -- offsets of entries on a page, indexed by entry ordinal defaultPathStk: PathStk _ NIL, -- used internally when client doesn't pass one entryTable: REF EntryTable _ NIL, -- cumulative lengths and heap offsets of entries in EntSeqRecords heap: REF Heap _ NIL]; -- heap of entries contending for father page PathStk: TYPE = REF PathStkObject; PathStkObject: TYPE = RECORD [ treeID: CARDINAL _ 0, -- instance of tree to which this PathStk belongs version: LONG CARDINAL _ 0, -- value of tree.version for which this PathStk is valid entryTable: REF EntryTable _ NIL, -- cumulative lengths and heap offsets of entries in EntSeqRecords heap: REF Heap _ NIL, -- heap of entries contending for father page top: PathStkIndex _ 0, -- the level corresponding to the BTree entry of interest path: ARRAY PathStkIndex OF PathStkEntry]; -- level 1 is the root; level 0 is dummy, and used only when the root page splits and a new root page must be created PathStkEntry: TYPE = RECORD [ pageNumber: PageNumber _ nilPage, -- also appears as grPage of BTreeEntry indexed by lastOffset in previous stack level offset: PageOffset _ nilOffset, -- offset of entry directly along path lastOffset: PageOffset _ nilOffset, -- entry preceding the one designated by offset nextToLastOffset: PageOffset _ nilOffset, -- entry preceding the one designated by lastOffset eslFront: REF EntSeqRecord _ NIL, -- head of list of entries that belong between lastOffset and offset. Do not assign directly to this field (see below). eslRear: REF EntSeqRecord _ NIL, -- tail of list. Do not assign directly to this field (see below). leastSon: PageNumber _ nilPage]; -- least son of entries belonging on this page maxLevelsInTree: CARDINAL = 7; -- maximum depth allowed by the implementation. To exceed this without first exhausting the PageNumber space would require an average branching factor of less than 5, which is totally unreasonable for a BTree. PathStkIndex: TYPE = [0..maxLevelsInTree]; OffsetTable: TYPE = REF OffsetTableObject; OffsetTableObject: TYPE = RECORD [ offsets: SEQUENCE maxLength: EntryOrdinal OF PageOffset]; EntryOrdinal: TYPE = [0..3*maxMaxRecordsPerPage]; -- an OffsetTableObject needs to hold only one page's worth of entries, but an entryTable may need to hold as many as three pages' worth. EntSeqRecord: TYPE = RECORD [ fwdP: REF EntSeqRecord, -- next in list entSeqP: LONG POINTER TO BTreeEntry, -- pointer to a sequence of entries (within this EntSeqRecord, starting at or after entSeq) entSeqLen: CARDINAL, -- length of the sequence in words entSeq: --ARRAY [0..0) OF BTreeEntry-- SEQUENCE maxLength: [0..LAST[PageSize]] OF WORD]; EntryTable: TYPE = RECORD [ length: EntryOrdinal, -- indexed by entry ordinal in entry sequence list; entry 0 is a dummy and has a cumEntSize of zero. map: SEQUENCE maxLength: EntryOrdinal OF EntryTableRec]; EntryTableRec: TYPE = RECORD [ cumEntSize: CARDINAL, -- cumulative sizes of entries up to and including this one heapPos: HeapIndex]; -- offset of this entry in heap Heap: TYPE = RECORD [ length: HeapIndex, entries: SEQUENCE maxLength: HeapIndex OF EntryOrdinal]; HeapIndex: TYPE = [0..maxMaxRecordsPerPage]; -- There are at most one page's worth of entries in the heap at any given time Compare: PROC [tree: Tree, key: Key, entry: Entry] RETURNS [Comparison] = INLINE { RETURN [tree.repPrim.compare[key: key, entry: entry]] }; CompareEntries: PROC [tree: Tree, entry1, entry2: Entry] RETURNS [Comparison] = INLINE { RETURN [tree.repPrim.compareEntries[entry1: entry1, entry2: entry2]] }; EntrySize: PROC [tree: Tree, entry: Entry] RETURNS [words: EntSize] = INLINE { RETURN [tree.repPrim.entrySize[entry]] }; EntryFromRecord: PROC [tree: Tree, record: Record] RETURNS [entry: Entry] = INLINE { RETURN [tree.repPrim.entryFromRecord[record]] }; NewRecordFromEntry: PROC [tree: Tree, entry: Entry] RETURNS [record: Record] = INLINE { RETURN [tree.repPrim.newRecordFromEntry[entry]] }; ReferencePage: PROC [tree: Tree, number: PageNumber, type: ReferenceType _ read] RETURNS [ptr: PagePtr] = INLINE { RETURN [tree.storPrim.referencePage[storage: tree.storage, number: number, type: type]] }; ReleasePage: PROC [tree: Tree, number: PageNumber, update: UpdateState _ unchanged] = INLINE { tree.storPrim.releasePage[storage: tree.storage, number: number, update: update] }; BTreeEntrySize: PROC [tree: Tree, bte: LONG POINTER TO BTreeEntry] RETURNS [words: EntSize] = INLINE { RETURN [entryOverhead+tree.EntrySize[@bte.entry]] }; PathEntryLE: PROC [tree: Tree, key: Key, pathStk: PathStk, useExistingPath: BOOL _ FALSE] RETURNS [equal: BOOL, depth: PathStkIndex]; ReferenceStack: PROC [tree: Tree, pathStk: PathStk, type: ReferenceType _ read] RETURNS [pse: LONG POINTER TO PathStkEntry, ptr: PagePtr]; BackUpOneEntry: PROC [tree: Tree, pse: LONG POINTER TO PathStkEntry]; RepairOffsets: PROC [tree: Tree, pse: LONG POINTER TO PathStkEntry]; PathToMaxDescendant: PROC [tree: Tree, pathStk: PathStk, page: PageNumber]; AdjustTreeState: PROC [tree: Tree, update: UpdateState, deltaEntryCount: INTEGER]; WriteStatePage: PROC [tree: Tree, update: UpdateState _ unchanged]; InsertRecords: PROC [tree: Tree, pathStk: PathStk]; MakeEntSeqRecord: PROC [entSeq: LONG POINTER TO BTreeEntry, length: CARDINAL] RETURNS [esr: REF EntSeqRecord]; AppendEntSeqRecord: PROC [pse: LONG POINTER TO PathStkEntry, esr: REF EntSeqRecord]; PushEntSeqRecord: PROC [pse: LONG POINTER TO PathStkEntry, esr: REF EntSeqRecord]; AssignRefESR: PROC [ptr: LONG POINTER TO REF EntSeqRecord, ref: REF EntSeqRecord] = INLINE { LOOPHOLE[ptr, REF REF EntSeqRecord]^ _ ref }; RemoveEntry: PROC [tree: Tree, pse: LONG POINTER TO PathStkEntry, ignoreESL: BOOL _ FALSE] RETURNS [esr: REF EntSeqRecord, grPage: PageNumber]; BackUpAndRemoveEntry: PROC [tree: Tree, pse: LONG POINTER TO PathStkEntry] RETURNS [esr: REF EntSeqRecord, grPage: PageNumber]; AllocatePage: PROC [tree: Tree] RETURNS [number: PageNumber]; FreePage: PROC [tree: Tree, number: PageNumber]; LongMove: PROC [to, from: LONG POINTER, nWords: CARDINAL]; Lock: PROC [tree: Tree, mode: LockMode, checkState: BOOL _ TRUE]; LockMode: TYPE = {read, update}; Unlock: PROC [tree: Tree]; GetHeapAndTable: PROC [tree: Tree, pathStk: PathStk]; ReturnHeapAndTable: PROC [tree: Tree, pathStk: PathStk]; GetDefaultPathStk: PROC [tree: Tree] RETURNS [pathStk: PathStk]; ReturnDefaultPathStk: PROC [tree: Tree, pathStk: PathStk]; Bug: ERROR [type: BugType]; BugType: TYPE = {depositESL, entriesLeftOver, mcCreightWasWrong, newRootOverflow, pageAlreadyFree, pageNotFree, tooFewEntries, tooManyEntriesInPage, treeNotLocked, twoBrothersNotEnough}; END. ศBTreeInternal.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Taft, November 25, 1983 3:22 pm Russ Atkinson (RRA) February 19, 1985 6:46:26 pm PST Loose ends: 1. Consider storing OffsetTables at the end of BTreePages when they happen to fit. WritePage could easily compute an OffsetTable from the EntryTable it already has, and BinSearchPage could save the expense of constructing the OffsetTable when one is already stored in the page. Permanent data structures Permanent state of BTree, kept in statePage The default values are for a freshly-initialized BTree; the TRASH values are not constant but are computed and filled in during Open. Read-only fields which do not change during the life of the tree: Read-write fields, protected by the BTree internal lock: Representation of a BTree data page number of free words at end of this page (freePageMarker indicates this page is in the free list) page containing least son of this page (or next page in free list) Offsets of various fields in BTreePage and BTreeEntry Volatile data structures Volatile state of open BTree. Copy of permanent state in statePage. Access to this copy is protected by the BTree internal read/write lock. Access to the statePage itself is protected by PageStorage locking of the statePage if transactions are in use; in particular, the statePage is always read-locked. Read-only fields which do not change during the lifetime of the TreeObject: Read-only fields which do not change while a BTree is open, and which are protected by the BTree internal read/write lock when they do change: Read-write fields, protected by the BTreeObject monitor: The following fields hold onto preallocated temporary storage. This storage is handed out to clients for the duration of an operation; if multiple client calls are in progress simultaneously, extra temporary storage is allocated for the second and subsequent clients and dropped on the floor when they finish. Representation of a PathStk, the complete description of a search path down the tree *** DANGER *** DANGER *** DANGER *** Fields of a PathStkEntry are usually accessed via a LONG POINTER (e.g., pse.offset, where pse is a LONG POINTER TO PathStkEntry). LONG POINTERs to the interior of a reference-counted object are inherently unsafe. If the object is also reference-containing there is a special danger: ordinary assignment to a reference-containing field does not perform reference counting! Therefore, do not assign directly to eslFront or eslRear, but instead call AssignRefESR. Auxiliary data structures Convenient access to the client-provided procedures These inline procedures have the effect of incorporating the client's procedures into the cluster for type Tree. Internal operations The size of an entire BTreeEntry, including overhead. Computes a PathStk for the supplied key such that at each level lastOffset refers to an entry less than or equal to key and offset refers to an entry strictly greater than key (if any such entries exist). Sets pathStk.top to the deepest level in which lastOffset refers to a real entry (i.e., is not entry0Offset). Returns equal=TRUE if an entry equal to key was found, and depth = the depth of the tree, i.e., the PathStkIndex for the leaf page (which is either the page containing the LE entry or its leftmost right descendant page). If key=NIL or is less than any entry in the tree, sets pathStk.top to zero, computes the path for the minimum key, and returns the depth of the tree. References the page described by the pathStk.top level of the PathStk, and returns the PagePtr along with the PathStkEntry pointer. The caller is responsible for releasing the reference. Adjusts the PathStkEntry so that it addresses the previous entry (there had better be one). Caution: if the ESL is non-empty, this destroys the invariant that the entries in the ESL lie logically between pse.lastOffset and pse.offset. Checks whether the offsets in the PathStkEntry are still valid, and rebuilds them if not. Starting with the specified BTree page, extends the pathStk to the rightmost descendant entry. page is assumed to be a descendant of some entry at the pathStk.top level of the tree. Does not change the pathStk at that level, but builds new pathStk entries at deeper levels. Does nothing if page=nilPage. Notes that the entryCount is changing by deltaEntryCount, and rewrites tree.state into the statePage if appropriate. StatePage updates are ordinarily inhibited if update=unchanged or the tree's maintainRecomputableState flag is FALSE. The tree must be locked for update by the caller. Rewrites tree.state into the statePage. The tree must be locked for update by the caller. Inserts all EntSeqRecords at the current (pathStk.top) level of the PathStk into the tree itself. This may leave behind some new EntSeqRecords at the next higher level. Takes a sequence of BTreeEntries (e.g., from a BTreePage) and constructs an EntSeqRecord containing it. Returns NIL if length=0. Appends esr to the list of EntSeqRecords in the PathStkEntry. It is ok for esr to be NIL. Puts esr at the front of the list of EntSeqRecords in the PathStkEntry. It is ok for esr to be NIL. Performs ptr^ _ ref as a reference-counted assignment. This is necessary when assigning to the eslFront and eslRear fields of a PathStkEntry accessed via a LONG POINTER. Removes the BTreeEntry that logically follows pse.lastOffset, i.e., the one at pse.offset if the ESL is empty, else the first entry in the ESL (except does not consider the ESL if ignoreESL=TRUE). Returns an EntSeqRecord containing the removed entry, with the grPage field set to the minPage of the page it used to point to (if any). Returns grPage = the former grPage field of the removed entry. Removes the BTreeEntry at pse.lastOffset and returns an ESR containing it. Reconstructs the offsets so that pse.offset refers to the same entry as before, and lastOffset and nextToLastOffset refer to the two predecessors of the removed entry. grPage is handled as in RemoveEntry. Allocates a new page. Its initial contents are undefined. Raises Bug[pageNotFree] if the free list is fouled. Frees a page that was formerly part of the BTree. Raises Bug[pageAlreadyFree] if it hasn't been allocated. Like LongCOPY but works correctly even in the case of overlapping source and destination blocks. This will move to PrincOpsUtils someday. Locks the BTree object for access in the specified mode (read permits other readers but excludes updaters; update excludes all other access). If checkState=TRUE, checks the tree's state and raises Error[closed] if it is closed or waits if it is suspended; if checkState=FALSE, the tree's state is ignored. Unlocks the BTree object. Raises Bug[treeNotLocked] if it isn't locked. Obtains the Heap and EntryTable associated with the BTree and assigns them to the pathStk, if they are available; otherwise makes new ones. Gives back the Heap and EntryTable assigned to the PathStk. Obtains the PathStk associated with the BTree, if it is available; otherwise makes a new one. Gives back the PathStk. Exceptions ส ธ– "Cedar" style˜headšœ™Icodešœ ฯmœ1™Nšœ žœžœŸ˜9Nšœ žœžœŸ˜7Nšœ žœžœŸ˜7NšœžœžœŸ˜žœž˜pNšœžœT˜\—šก œžœEž˜\NšœU˜U——šœ™š กœžœžœžœžœ žœž˜dNšœ5™5Nšœžœ.˜6—š ก œžœ;žœžœžœ žœ˜…Nšœหžœำžœ‹™ฐ—š กœžœ<žœžœžœžœ˜ŠNšœป™ป—š กœžœžœžœžœ˜ENšœ๋™๋—š ก œžœžœžœžœ˜DNšœY™Y—šกœžœ2˜KNšœฏ™ฏ—šกœžœ4žœ˜RNšœๅžœ4™ž—šกœžœ/˜CNšœZ™Z—šก œžœ ˜3Nšœฉ™ฉ—šกœžœ žœžœžœžœžœžœ˜nNšœqžœ ™—š กœžœžœžœžœžœ˜TNšœVžœ™Z—š กœžœžœžœžœžœ˜RNšœ`žœ™d—šก œžœžœžœžœžœžœž˜ZNšœžœžœžœ˜/Nšœž œ™ช—šก œžœžœžœžœžœžœžœžœ#˜Nšœพžœฬ™Ž—šกœžœžœžœžœžœžœ#˜Nšœ™™™—šก œžœžœ˜=Nšœo™o—šกœžœ"˜0Nšœk™k—š กœžœ žœžœ žœ˜:N™Š—šกœžœ*žœžœ˜ANšœœžœnžœ™ฑ—Nšœ žœ˜ šกœžœ˜NšœH™H—šกœžœ ˜5Nšœ‹™‹—šกœžœ ˜8Nšœ;™;—šกœžœžœ˜@Nšœ]™]—šกœžœ ˜:Nšœ™——šœ ™ Nšœžœ˜Nšœ žœญ˜บNšžœ˜——…—&L–