DIRECTORY
IO USING [GetByte, GetChar, GetIndex, GetLength, PutByte, PutChar, PutRope, RopeFromROS, ROS, SetIndex, STREAM],
Rope USING [ROPE],
TiogaFileIO USING [Parts];
Implementation
STREAM: TYPE ~ IO.STREAM;
fileIdSize: NAT ~ 2;
FileIdIndex: TYPE ~ [0..fileIdSize);
FileId:
TYPE ~
PACKED
ARRAY FileIdIndex
OF
BYTE;
commentHeaderId: FileId ~ [000B, 000B];
controlHeaderId: FileId ~ [235B, 312B];
controlTrailerId: FileId ~ [
205B,
227B];
commentHeaderSize: NAT ~ fileIdSize+trailerLenSize; -- <id> <commentLen>
controlHeaderSize: NAT ~ fileIdSize+trailerLenSize; -- <id> <controlLen>
controlTrailerSize:
NAT ~ fileIdSize+3*trailerLenSize;
-- <id> <propsLen> <dataLen> <fileLen>
GetFileId:
PROC [s:
STREAM, index:
INT]
RETURNS [id: FileId ¬
ALL[0]] ~ {
IO.SetIndex[s, index];
FOR i: FileIdIndex IN FileIdIndex DO id[i] ¬ IO.GetByte[s] ENDLOOP;
};
PutFileId:
PROC [s:
STREAM, id: FileId] ~ {
FOR i: FileIdIndex IN FileIdIndex DO IO.PutByte[s, id[i]] ENDLOOP;
};
Get16:
PROC [s:
STREAM]
RETURNS [
CARD16] ~ {
b0: BYTE ~ IO.GetByte[s];
b1: BYTE ~ IO.GetByte[s];
RETURN[b0*(2**8)+b1];
};
Put16:
PROC [s:
STREAM, val:
CARD16] ~ {
b0: BYTE ~ val / (2**8);
b1: BYTE ~ val MOD (2**8);
IO.PutByte[s, b0];
IO.PutByte[s, b1];
};
GetTrailerLen:
PROC [s:
STREAM]
RETURNS [
CARD32] ~ {
h1: CARD16 ~ Get16[s]; -- low order 16 bits first!
h0: CARD16 ~ Get16[s];
RETURN[h0*(2**16)+h1];
};
PutTrailerLen:
PROC [s:
STREAM, len:
CARD32] ~ {
h1: CARD16 ~ len MOD (2**16);
h0: CARD16 ~ len / (2**16);
Put16[s, h1]; -- low order 16 bits first!
Put16[s, h0];
};
GetParts:
PUBLIC
PROC [s:
STREAM, start:
INT, len:
INT]
RETURNS [TiogaFileIO.Parts] ~ {
start1: INT[0..INT.LAST] ~ start; -- bounds check
rem: INT[0..INT.LAST] ~ IO.GetLength[s]-start1;
fileEnd: INT ~ start1+MIN[rem, MAX[0, len]];
dataLen, commentLen, controlLen, propsLen, fileLen: CARD32;
end1, commentStart, start2, end2, controlStart, start3, propsStart, controlTrailerStart: INT;
{
-- GOTO Fail at the end of this block if not a valid Tioga file
IF controlTrailerSize>CARD[fileEnd-start1] THEN GOTO Fail
ELSE controlTrailerStart ¬ fileEnd-controlTrailerSize; -- avail: [start1..controlTrailerStart)
IF GetFileId[s, controlTrailerStart]#controlTrailerId THEN GOTO Fail
ELSE {
-- read controlTrailer
propsLen ¬ GetTrailerLen[s];
dataLen ¬ GetTrailerLen[s];
fileLen ¬ GetTrailerLen[s];
IF IO.GetIndex[s]#fileEnd THEN ERROR;
};
IF fileLen#CARD[fileEnd-start1] THEN GOTO Fail;
IF propsLen>CARD[controlTrailerStart-start1] THEN GOTO Fail
ELSE propsStart ¬ controlTrailerStart-propsLen; -- avail: [start1..propsStart)
IF dataLen>CARD[propsStart-start1] THEN GOTO Fail
ELSE end1 ¬ commentStart ¬ start1+dataLen; -- avail: [commentStart..propsStart)
IF commentHeaderSize>(propsStart-commentStart) THEN GOTO Fail
ELSE start2 ¬ commentStart+commentHeaderSize; -- avail: [start2..propsStart)
IF GetFileId[s, commentStart]#commentHeaderId THEN GOTO Fail
ELSE {
-- read commentHeader
commentLen ¬ GetTrailerLen[s];
IF IO.GetIndex[s]#start2 THEN ERROR;
};
IF commentLen<(start2-commentStart) THEN GOTO Fail
ELSE IF commentLen>(propsStart-commentStart) THEN GOTO Fail
ELSE end2 ¬ controlStart ¬ commentStart+commentLen; -- avail: [controlStart..propsStart)
IF controlHeaderSize>(propsStart-controlStart) THEN GOTO Fail
ELSE start3 ¬ controlStart+controlHeaderSize; -- avail: [start3..propsStart)
IF GetFileId[s, controlStart]#controlHeaderId THEN GOTO Fail
ELSE {
-- read controlHeader
controlLen ¬ GetTrailerLen[s];
IF IO.GetIndex[s]#start3 THEN ERROR;
};
IF controlLen#(fileEnd-controlStart) THEN GOTO Fail;
EXITS Fail => RETURN[[isTioga: FALSE, start1: start1, len1: fileEnd-start1]];
};
Remove possible padding at end of part1 and part2 ...
IF end1>start1 THEN { IO.SetIndex[s, end1-1]; IF IO.GetByte[s]=0 THEN end1 ¬ end1-1 };
IF end2>start2 THEN { IO.SetIndex[s, end2-1]; IF IO.GetByte[s]=0 THEN end2 ¬ end2-1 };
RETURN[[isTioga:
TRUE,
start1: start1, len1: end1-start1,
start2: start2, len2: end2-start2,
start3: start3, len3: propsStart-start3
]];
};
PutParts:
PUBLIC
PROC [s:
STREAM, put:
PROC [s1, s2, s3:
STREAM]]
RETURNS [
INT] ~ {
s2: STREAM ~ IO.ROS[];
s3: STREAM ~ IO.ROS[];
dataLen, commentLen, controlLen, fileLen, part2Len, part3Len: CARD32;
propsLen: CARD32 ~ 0; -- fileProps section is not used
dataStart, commentStart, controlStart: INT;
dataStart ¬ IO.GetIndex[s];
put[s, s2, s3];
DO
-- pad part1 to an even number of bytes
commentStart ¬ IO.GetIndex[s];
dataLen ¬ CARD[commentStart-dataStart];
IF (dataLen MOD 2)#0 THEN IO.PutByte[s, 0] ELSE EXIT;
ENDLOOP;
DO
-- pad part2 to an even number of bytes
part2Len ¬ IO.GetLength[s2];
commentLen ¬ commentHeaderSize+part2Len;
IF (commentLen MOD 2)#0 THEN IO.PutByte[s2, 0] ELSE EXIT;
ENDLOOP;
PutFileId[s, commentHeaderId];
PutTrailerLen[s, commentLen];
IO.PutRope[s, IO.RopeFromROS[s2]];
controlStart ¬ IO.GetIndex[s];
IF CARD[controlStart-commentStart]#commentLen THEN ERROR;
part3Len ¬ IO.GetLength[s3];
controlLen ¬ controlHeaderSize+part3Len+propsLen+controlTrailerSize;
PutFileId[s, controlHeaderId];
PutTrailerLen[s, controlLen];
IO.PutRope[s, IO.RopeFromROS[s3]];
-- fileProps would go here --
PutFileId[s, controlTrailerId];
PutTrailerLen[s, propsLen];
PutTrailerLen[s, dataLen];
PutTrailerLen[s, fileLen ¬ dataLen+commentLen+controlLen];
IF CARD[IO.GetIndex[s]-dataStart]#fileLen THEN ERROR;
RETURN[dataLen];
};
Documentation
The external representation of a Tioga document has three parts:
part1, the data part, contains the text of each (non-comment) node, separated by newlines
part2, the comment part, contains the text of each comment node, separated by newlines
part3, the control part, contains everything else: looks, properties, tree structure ...
The Tioga file format surrounds the content of these three parts with some header and trailer information that gives their positions and lengths. The headers and trailers also include some "password" bytes and redundant lengths to help determine whether an arbitrary file is in fact a Tioga file. The content of the headers and trailers is as follows. The Id's are two bytes, and the Len's are four bytes, in the order b2 b3 b0 b1, where b0 is most significant (in olden days, this was the byte order of a PrincOps LONG WORD).
commentHeader: <commentHeaderId> <commentLen>
controlHeader: <controlHeaderId> <controlLen>
controlTrailer: <controlTrailerId> <propsLen> <dataLen> <fileLen>
And the layout of the entire file is ...
part1: [0 .. commentStart)
commentHeader: [commentStart .. start2)
part2: [start2 .. controlStart)
controlHeader: [controlStart .. start3)
part3: [start3 .. propsStart)
fileProps: [propsStart .. controlTrailerStart) -- not used by current Tioga
controlTrailer: [controlTrailerStart .. size)
where the positions above can be determined as follows ...
controlTrailerStart ← <length of file> - controlTrailerSize
[propsLen, dataLen, fileLen] ← controlTrailer {verify fileLen = <length of file>}
propsStart ← controlTrailerStart - propsLen
commentStart ← dataLen
start2 ← commentStart + commentHeaderSize
[commentLen] ← commentHeader
controlStart ← commentStart + commentLen
start3 ← controlStart + controlHeaderSize
[controlLen] ← controlHeader {verify controlLen = fileLen - controlStart}
Note that the data part is at the beginning of the file, so that if you read just the first dataLen bytes, you will see just the plain text of all the non-comment nodes in the document. Cedar's file streams (see PFS.StreamOpen) normally reveal just the data part; because of this trick, the compiler, for example, can read Tioga source files without being confused by the formatting or the arbitrary content of comment nodes (like the one you're reading now).
Since the file stream implementation needs to be able to determine whether a file is a Tioga file, and if so where its data part ends, we define this skeletal file structure here in the IO package, below PFS. Further details of Tioga's external representation are defined in the Tioga package.