<> <> <> <> <> <<>> <> <<>> <> DIRECTORY Rope USING[ ROPE ]; NewFile: CEDAR DEFINITIONS = BEGIN <> RC: TYPE = { -- extension of "Reason" for internal use. ok, wentOffline, -- volume is not accessible (some drive has been offline) inconsistent, -- the volume's (or file's) permanent data structures look inconsistent software, -- i.e. label-check. The page on disk is not the one the software expected. hardware, -- the disk hardware/microcode detected a hard disk error unknownFile, -- the file does not exist (possibly deleted since it was opened) unknownPage, -- the page would be beyond the end of the file volumeFull, -- the volume has no free pages (and one is needed!) fragmented, -- the file requires too many non-contiguous areas of disk ( > 84 today ) mixedDevices -- a bootable file is not entirely on one device, or not on correct device }; Reason: TYPE = RC[SUCC[ok]..LAST[RC]]; -- error reason Error: ERROR[why: Reason]; <> <> VolumeID: TYPE[5]; <> NullVolumeRep: PRIVATE TYPE = RECORD[a,b,c,d,e:CARDINAL]; nullVolumeID: VolumeID = LOOPHOLE[NullVolumeRep[a:0,b:0,c:0,d:0,e:0]]; <> Volume: TYPE = REF VolumeObject; VolumeObject: TYPE; <> VolumeFile: TYPE = MACHINE DEPENDENT { -- root files of a volume checkpoint(0), microcode(1), germ(2), bootFile(3), debugger(4), -- outload file -- debuggee(5), -- outload file -- VM(6), -- virtual memory backing file -- VAM(7), -- volume allocation map -- client(8), -- client directory system root file -- alpine(9), -- for use by Alpine file servers -- (15) -- spare root page slots -- }; FP: TYPE = MACHINE DEPENDENT RECORD[ <> id(0): FileID, da(2): DA ]; FileID: TYPE[2]; DA: TYPE[2]; NullFileIDRep: PRIVATE TYPE = RECORD[a,b:CARDINAL]; NullDARep: PRIVATE TYPE = RECORD[a,b:CARDINAL]; nullDA: PRIVATE DA = LOOPHOLE[NullDARep[0,0]]; nullFP: FP = [id: nullFileID, da: nullDA]; <> nullFileID: FileID = LOOPHOLE[NullFileIDRep[0,0]]; <> PageNumber: TYPE = RECORD[INT]; <> PageCount: TYPE = INT; <> Add: PROC[p: PageNumber, n: PageCount] RETURNS[PageNumber] = INLINE { RETURN[ [p+n] ] }; -- "n" may be negative -- wordsPerPage: INT = 256; <> PagesForWords: PROC[w: INT] RETURNS[PageCount] = INLINE { RETURN[ (w+wordsPerPage-1) / wordsPerPage ] }; Handle: TYPE = REF Object; Object: TYPE; <> propertyWords: INT = 256; -- the number of words in a file's property storage. PropertyStorage: TYPE = LONG POINTER TO ARRAY [0..propertyWords) OF WORD; <> NextVolume: PROC[volume: Volume, wait: BOOL _ FALSE] RETURNS[Volume]; <> GetVolumeID: PROC[volume: Volume] RETURNS[id: VolumeID]; <> <> GetVolumeName: PROC[volume: Volume] RETURNS[Rope.ROPE]; <> <> GetVolumePages: PROC[volume: Volume] RETURNS[size, free: INT]; <> <> FindVolumeFromID: PROC[id: VolumeID] RETURNS[Volume]; <> FindVolumeFromName: PROC[name: Rope.ROPE] RETURNS[Volume]; <> SystemVolume: PROC RETURNS[Volume]; <> SetRoot: PROC[root: VolumeFile, file: Handle, page: PageNumber _ [0]]; <> <> GetRoot: PROC[volume: Volume, root: VolumeFile] RETURNS[fp: FP, page: PageNumber]; <> <> EraseVolume: PROC[volume: Volume]; <> <> <> Open: PROC[volume: Volume, fp: FP] RETURNS[Handle]; <> <> Create: PROC[volume: Volume, size: PageCount] RETURNS[Handle]; <> <> Delete: PROC[file: Handle]; <> <> Info: PROC[file: Handle] RETURNS[volume: Volume, fp: FP, size: PageCount]; <> <> SetSize: PROC[file: Handle, size: PageCount]; <> <> Read: UNSAFE PROC[file: Handle, from: PageNumber, nPages: PageCount, to: LONG POINTER]; <> <> Write: PROC[file: Handle, to: PageNumber, nPages: PageCount, from: LONG POINTER]; <> <> GetProperties: PROC[file: Handle] RETURNS[PropertyStorage]; <> <> WriteProperties: PROC[file: Handle]; <> <> END. Implementation Notes Pages, Labels and Data Structures A volume consists of a set of pages. Each page has a label and data. The data is what is transferred during the Read and Write operations, and is where properties are stored, and is where the system's permanent data structures are stored. The truth about the pages of a volume is recorded solely in the labels of the pages. Two forms of permanent data structure are used as hints for accessing the volume: a single volume allocation map (VAM) and a run-table for each file. The VAM is itself a file, whose data pages contain a bitmap indicating which pages of the volume are (probably) free. The run-table represents the set of pages which (probably) constitute the pages of the file. This set of pages is, in sequence: the file properties, the run-table itself, and the data pages of the file. The FP of a file consists of a volume-relative UID (2 words) plus the disk address of the file's first page. The labels of the pages of a file contain the file's UID, plus an identification of the page number relative to the file. The disk allocator attempts to allocate contiguous pages as far as possible. The file's property pages and the first page of the run-table are contiguous on disk. The run-table of a file indicates the set of runs of contiguous pages constituting the file, giving for each run its disk address and length. In this discussion, all disk addresses are relative to a logical volume. Maintaining the Hints To maintain the status of the VAM as a hint, the implementation maintains the invariant that the VAM on disk describes a superset of the free pages. That is, all free pages are marked so in the VAM, but some pages marked free in the VAM may actually be in use. The hint is verified by checking the label of a page before actually allocating. When freeing pages, the appropriate region of the VAM is written back to disk before marking the pages' labels as free. When allocating pages, the pages' labels are rewritten as being in use before writing the appropriate region of the VAM. This has the property of never losing pages. Note that when allocating pages, we can delay rewriting the VAM at the possible cost of having some invalid hints if we crash before rewriting it. To maintain the status of a file's run-table as a hint, the implementation maintains the invariant that the run-table on disk describes a superset of the pages of the file. That is, all the pages of the file are recorded in the run-table, but some pages in the run-table may no longer be part of the file. The hint is verified by checking the label of the last page of the file. If the run-table in fact describes too many pages, the hint can be corrected because the run-table will include the correct last page of the file: this can be found by binary chop of the run-table. When extending (or creating) a file, its run-table is written to disk before writing the labels of the new pages (NB: tricky for the first page of the run-table itself!). When contracting the file, the pages are marked as free before rewriting the run-table. When contracting, we can delay rewriting the run-table, but this doesn't seem worth the effort (considering the rarity of file contractions). The VAM and the run-tables can be reconstructed completely by reading the label of every page of the volume. Operations The VAM of an open volume is kept in virtual memory, as is the run-table of an open file. The disk operations for opening a file are: - read the run-table. - verify the label of the last page of the file (delayable). To extend a file: - choose the pages from the VAM, verifying each with a check-label disk operation. - write the new run-table. - write the new labels of the pages - write the VAM (delayable). This amounts to (2N+2) disk page transfers for allocating N pages, hopefully 4 disk requests. To contract a file: - write the VAM indicating the freed pages - write the page labels marking them as free - write the run-table (delayable, but probably synchronous). This amounts to (N+2) disk page transfers for freeing N pages, hopefully 3 disk requests. To delete a file: - write the VAM indicating the freed pages - write the page labels indicating them as free (run-table first).