[Cyan]<CedarChest6.1>Waterlily>Waterlily.mesa!1

Waterlily.mesa

Swinehart, November 18, 1985 2:35:32 pm PST

Russ Atkinson (RRA) September 4, 1985 8:44:06 pm PDT

Part of this algorithm cribbed from Heckel, Paul. A Technique For Isolating Differences Between Files. Comm. ACM 21,4 (April 1978), 264-268. Pointer to which courtesy of Ed Taft.

DIRECTORY

BasicTime USING [Now],

Commander USING [CommandProc, Register],

CommandTool USING [ArgumentVector, Failed, Parse],

EditSpan USING [Copy],

FS USING [defaultStreamOptions, Error, FileInfo, StreamOpen, StreamOptions],

IO USING [Close, EndOfStream, GetCedarToken, GetCedarTokenRope, GetChar, GetIndex, GetLine, noWhereStream, PeekChar, Put, PutChar, PutF, PutRope, SetIndex, SkipWhitespace, STREAM, TokenKind],

NodeProps USING [PutProp],

Process USING [CheckForAbort],

PutGet USING [FromFile],

RefText USING [AppendChar, Equal, TrustTextAsRope],

Rope USING [Cat, Concat, Equal, Fetch, Find, Flatten, FromChar, FromRefText, Length, Match, ROPE, Substr],

SymTab USING [Create, EachPairAction, Fetch, FetchText, Insert, Pairs, Ref],

TextNode USING [LastWithin, Level, MakeNodeLoc, MakeNodeSpan, NodeRope, Ref, StepForward],

TiogaFileOps USING [AddLooks, CreateRoot, InsertAsLastChild, Ref, SetContents, Store];

Waterlily: CEDAR PROGRAM

IMPORTS BasicTime, Commander, CommandTool, EditSpan, FS, IO, NodeProps, Process, PutGet, RefText, Rope, SymTab, TextNode, TiogaFileOps = {

ROPE: TYPE = Rope.ROPE;

STREAM: TYPE = IO.STREAM;

SymbolTableEntry: TYPE = REF SymbolTableEntryRep;

SymbolTableEntryRep: TYPE = RECORD[

oldList: InstanceList, -- the old instances

newList: InstanceList, -- the new instances

rope: Rope.ROPE←NIL, -- the line (or token) itself

key: Rope.ROPE -- the key used for the lookup

];

InstanceList: TYPE = LIST OF InstanceRep;

InstanceRep: TYPE = RECORD [

line: INT, -- -1 => not yet assigned

position: INT, -- -1 => not yet assigned (node number if mergeFiles)

node: TextNode.Ref←NIL, -- the node, if mergeFiles, instead of line

peek: CHAR -- non-blank character after this token

];

FileArray: TYPE = REF FileArrayRep;

FileArrayRep: TYPE = RECORD [entry: SEQUENCE index: NAT OF FileArrayEntry];

FileArrayEntry: TYPE = REF FileArrayEntryRep;

FileArrayEntryRep: TYPE = RECORD [

symTableKey: SymbolTableEntry,

posInThisFile: INT,

lineNumInOtherFile: INT,

node: TextNode.Ref←NIL,

typeOfPntr: {symTable, lineNum}];

Ref: TYPE = TextNode.Ref;

Node: TYPE = REF NodeBody;

NodeBody: TYPE = RECORD [root: Ref←NIL, node: Ref←NIL ];

LongHelpMsgW: ROPE =

"Waterlily compares two source files. The command format is one of:
Waterlily file1 file2
Waterlily difFile ← file1 file2
where switches are syntactically before any argument. Differences found are written on the difFile, with default extension '.dif'. The available switches are:
t Tioga format files (comments & formatting not seen)
b Bravo format files (formatting not seen)
u Unformatted files
i Ignore blank lines (default: TRUE)
x Output file is a merger of input files (ran out of mnemonics already!)
Input files must be Tioga-format files.
#m # of matching lines (default: 3)
#c # of trailing context lines (default: 1)
";

LongHelpMsgC: ROPE =

"CedarLily compares two Cedar-program source files. Comparison is made at the source-language token level, so that only lexical differences will be found (formatting changes and modified comments will not affect the comparison). The command format is one of:
CedarLily file1 file2
CedarLily difFile ← file1 file2
where switches are syntactically before any argument. Differences found are written on the difFile, with default extension '.dif'. The available switches are:
u Unformatted files
i Ignore blank lines (default: TRUE)
#m # of matching lines (default: 3)
#c # of trailing context lines (default: 1)
";

LongHelpMsgM: ROPE =

"Tigerlily produces a merged version of two Tioga-format files. Nodes unique to file1 are included and indicated by overstriking. Nodes unique to file2 are included and indicated by underlining. Nodes that are common to both files are unchanged. The command format is one of:
Tigerlily file1 file2
Tigerlily mergedFile ← file1 file2
where switches are syntactically before any argument. The merged file is written on the mergedFile, with default extension '.merger'. The available switches are:
#m # of matching lines (default: 3)
#c # of trailing context lines (default: 1)
";

repeats: NAT ← 1;

Make this greater to experiment with multiple phases of match finding

debugging: BOOL ← FALSE;

Turn this on to get different characters for different phases of the process.

WaterlilyProc: Commander.CommandProc = {

PROC [cmd: Handle] RETURNS [result: REF ← NIL, msg: ROPE ← NIL];

Alarm: ERROR = CODE;

oldArray: FileArray;

newArray: FileArray;

out: STREAM ← cmd.out;

pData: REF ← cmd.procData.clientData;

totalLinesInNewFile: INT ← 0;

totalLinesInOldFile: INT ← 0;

posInNewFile: INT ← 0;

posInOldFile: INT ← 0;

nodeCount: INT ← 0;

newFile: REF; -- IO.STREAM or Node;

oldFile: REF; -- IO.STREAM or Node;

difFile: STREAM;

difFileNode: Node←NIL;

newFileName: ROPE ← NIL;

oldFileName: ROPE ← NIL;

difFileName: ROPE ← NIL;

Asterisks: ROPE = "**************************************";

filler: ROPE = "->->->->->->";

these switches are global or local.

FileType: TYPE = {tioga, bravo, unform};

switchNewFileType, switchOldFileType: FileType ← tioga;

switchIgnoreEmptyLinesNewFile, switchIgnoreEmptyLinesOldFile: BOOL ← TRUE;

switchTokens: BOOL ← FALSE;

switchMergeFiles: BOOL←FALSE;

these switches are global only.

switchLinesForMatch: INTEGER ← 3;

switchLinesForContext: INTEGER ← 1;

anyDifferencesSeen: BOOL ← FALSE;

indexN: INT;

indexO: INT;

LineType: TYPE = { new, old, both };

Vintage: TYPE = LineType[new..old];

SetSymbolTableEntry: PROC [key, line: ROPE, node: TextNode.Ref, newOrOld: Vintage, lineNum: INT, pos: INT, peek: CHAR] = {

symTabEntry: SymbolTableEntry ← NIL;

list: InstanceList ← LIST[[lineNum, pos, node, peek]];

WITH SymTab.Fetch[symbolTable, key].val SELECT FROM

x: SymbolTableEntry => symTabEntry ← x;

ENDCASE => {

symTabEntry ← NEW[SymbolTableEntryRep ← [

oldList: NIL, newList: NIL, rope: line, key: key]];

[] ← SymTab.Insert[symbolTable, key, symTabEntry];

};

SELECT newOrOld FROM

new => {

list.rest ← symTabEntry.newList;

symTabEntry.newList ← list;

};

old => {

list.rest ← symTabEntry.oldList;

symTabEntry.oldList ← list;

};

ENDCASE => ERROR;

};

PruneList: PROC [list: InstanceList, pos: INT] RETURNS [pruned: InstanceList] = {

IF list = NIL THEN RETURN [NIL];

IF list.first.position = pos THEN RETURN [list.rest];

pruned ← list;

lag: InstanceList ← list;

list ← list.rest;

IF list = NIL THEN RETURN;

IF list.first.position = pos THEN {lag.rest ← list.rest; RETURN};

ENDLOOP;

};

ConnectEntries: PROC [symTabEntry: SymbolTableEntry, indexO, indexN: INT, entryO, entryN: FileArrayEntry] = {

entryN.typeOfPntr ← entryO.typeOfPntr ← lineNum;

entryN.lineNumInOtherFile ← indexO;

entryO.lineNumInOtherFile ← indexN;

symTabEntry.newList ← PruneList[symTabEntry.newList, entryN.posInThisFile];

symTabEntry.oldList ← PruneList[symTabEntry.oldList, entryO.posInThisFile];

};

EndOfFile: ERROR = CODE;

buffer: REF TEXT ← NEW[TEXT[128]];

ReadLineFromFile: PROC [source: REF, localSwitchFileType: FileType, localSwitchIgnoreEmpties: BOOL] RETURNS [key, line: ROPE←NIL, node: TextNode.Ref←NIL, peek: CHAR, pos: INT] = {

token: REF TEXT ← NIL;

in: STREAM;

inNode: Node;

WITH source SELECT FROM

str: STREAM => in ← str;

ref: Node => inNode ← ref;

ENDCASE => ERROR;

peek ← 0C;

index: INT;

Process.CheckForAbort[];

SELECT pData FROM

$Cedar => {

kind: IO.TokenKind;

char: CHAR;

buffer.length ← 0;

line ← NIL;

[tokenKind: kind, token: token, charsSkipped: ] ← IO.GetCedarToken[in, buffer

! IO.EndOfStream => ERROR EndOfFile];

[] ← IO.SkipWhitespace[stream: in, flushComments: TRUE

! IO.EndOfStream => CONTINUE];

peek ← IO.PeekChar[in

! IO.EndOfStream => CONTINUE];

IF kind = tokenEOF THEN ERROR EndOfFile;

pos ← in.GetIndex[] - token.length;

Now do some token canonicalization to avoid reporting bogus

char ← token[0];

IF token.length = 1

THEN {

SELECT char FROM

'; => LOOP;

Semicolons are quite common, but not very important, and are often optional. So we can safely ignore them.

', => line ← ",";

': => line ← ":";

'. => line ← ".";

'← => line ← "←";

'! => line ← "!";

'= => line ← "=";

'# => line ← "#";

'< => line ← "<";

'> => line ← ">";

'[ => line ← "[";

'] => line ← "]";

'{ => line ← "{";

'} => line ← "}";

'- => line ← "-";

'+ => line ← "+";

'$ => line ← "$";

'^ => line ← "^";

'0 => line ← "0";

'1 => line ← "1";

'2 => line ← "2";

'3 => line ← "3";

ENDCASE;

}

ELSE

SELECT char FROM

'B => SELECT TRUE FROM

RefText.Equal[token, "BEGIN"] => line ← "{";

RefText.Equal[token, "BOOLEAN"], RefText.Equal[token, "BOOL"] =>

line ← "BOOL";

ENDCASE;

'C => SELECT TRUE FROM

RefText.Equal[token, "CHARACTER"], RefText.Equal[token, "CHAR"] =>

line ← "CHAR";

ENDCASE;

'E => SELECT TRUE FROM

RefText.Equal[token, "END"] => {line ← "}"; char ← '} };

RefText.Equal[token, "ELSE"] => line ← "ELSE";

ENDCASE;

'I => SELECT TRUE FROM

RefText.Equal[token, "IF"] => line ← "IF";

ENDCASE;

'L => SELECT TRUE FROM

RefText.Equal[token, "LONG"] => {

line ← IO.GetCedarTokenRope[in].token;

SELECT TRUE FROM

Rope.Equal[line, "CARDINAL"] => line ← "CARD";

Rope.Equal[line, "INTEGER"] => line ← "INT";

ENDCASE => line ← Rope.Concat["LONG ", line];

};

ENDCASE;

'N => SELECT TRUE FROM

RefText.Equal[token, "NIL"] => line ← "NIL";

RefText.Equal[token, "NOT"] => line ← "NOT";

RefText.Equal[token, "NEW"] => line ← "NEW";

ENDCASE;

'P => SELECT TRUE FROM

RefText.Equal[token, "PROCEDURE"], RefText.Equal[token, "PROC"] =>

line ← "PROC";

ENDCASE;

'T => SELECT TRUE FROM

RefText.Equal[token, "THEN"] => line ← "THEN";

ENDCASE;

IF line = NIL THEN {

Lookup the token in the symbol table just to canonicalize the rope (using computation time to avoid a fair amount of allocation).

WITH SymTab.FetchText[symbolTable, token].val SELECT FROM

symTabEntry: SymbolTableEntry =>

line ← symTabEntry.rope;

ENDCASE =>

line ← Rope.FromRefText[token];

};

key ← line;

SELECT peek FROM

':, '← => SELECT char FROM

IN ['A..'Z], IN ['a..'z] => {

This is an interesting little hack to make matching work better, since definitions introduce more "uniqueness", which will be reflected in better matching of lines downstream.

token ← RefText.AppendChar[token, peek];

WITH SymTab.FetchText[symbolTable, token].val SELECT FROM

symTabEntry: SymbolTableEntry =>

key ← symTabEntry.key;

ENDCASE =>

key ← Rope.FromRefText[token];

};

ENDCASE;

'. => IF char = '} THEN ERROR EndOfFile;

ENDCASE;

EXIT;

};

ENDCASE;

We only get here when we are using full lines as tokens.

IF switchMergeFiles THEN {

pos ← nodeCount;

nodeCount ← nodeCount+1;

node ← inNode.node ← inNode.node.StepForward[];

IF node = NIL THEN ERROR EndOfFile;

line ← node.NodeRope[];

}

ELSE {

pos ← IO.GetIndex[in];

token ← IO.GetLine[in, buffer ! IO.EndOfStream => ERROR EndOfFile];

WITH SymTab.FetchText[symbolTable, token].val SELECT FROM

symTabEntry: SymbolTableEntry => line ← symTabEntry.rope;

ENDCASE => line ← Rope.FromRefText[token];

};

key ← line;

IF localSwitchFileType = bravo
AND (NOT((index ← Rope.Find[line, Rope.FromChar['\032], 0, ]) = -1)) THEN

This line has some bogus old Bravo formatting that we should ignore.

line ← Rope.Substr[line, 0, index];

IF NOT AdvanceOverBlanks[line, localSwitchIgnoreEmpties] THEN EXIT;

ENDLOOP;

};

FinishUp: PROC [difMsg: ROPE] = {

IO.PutRope[out, difMsg];

IO.PutRope[out, "\n"];

IF switchMergeFiles THEN RETURN;

IF difFile = NIL THEN OpenDifFileAndWriteHeader[];

IO.PutRope[difFile, difMsg];

IO.PutRope[difFile, "\n"];

IO.Close[difFile];

};

WorkingMsg: PROC [char: CHAR] = {

Process.CheckForAbort[];

IO.PutChar[out, IF debugging THEN char ELSE '.];

};

OpenDifFileAndWriteHeader: PROC = {

IF anyDifferencesSeen THEN RETURN;

anyDifferencesSeen ← TRUE;

difFileName ← DefaultExtension[difFileName, IF switchMergeFiles THEN ".merger" ELSE ".dif"];

IF switchMergeFiles THEN {

difFile ← IO.noWhereStream;

difFileNode ← NewNode[];

RETURN;

};

difFile ← FS.StreamOpen[difFileName, $create];

IO.PutRope[difFile, IF pData = $Cedar THEN "\nCedarlily" ELSE "\nWaterlily"];

IO.PutF[difFile, "\n run on %g\n File 1: %g\n File 2: %g\n\n",

[time[BasicTime.Now[]]], [rope[oldFileName]], [rope[newFileName]]];

};

OpenFile: PROC [tempFileName: ROPE, switchFileType: FileType] RETURNS [fileName: ROPE, file: REF] = {

streamOptions: FS.StreamOptions ← FS.defaultStreamOptions;

streamOptions[tiogaRead] ← (switchFileType = tioga);

tempFileName ← DefaultExtension[tempFileName, ".mesa"];

fileName ← FS.FileInfo[tempFileName].fullFName;

IF switchMergeFiles THEN file ← NodeFromFile[fileName]

ELSE file ← FS.StreamOpen[fileName, $read, streamOptions];

};

start here.

symbolTable: SymTab.Ref ← SymTab.Create[mod: 1023, case: TRUE];

fillerNode: Node ← NewNode[];

fillerNode.node ← TR[TiogaFileOps.InsertAsLastChild[FR[fillerNode.root]]];

TiogaFileOps.SetContents[FR[fillerNode.node], filler];

TiogaFileOps.AddLooks[FR[fillerNode.node], 0, filler.Length[], 'b, FR[fillerNode.root]];

NodeProps.PutProp[fillerNode.node, $Comment, NEW[BOOL←TRUE]];

result ← NIL;

msg ← NIL;

Process arguments

{

args: CommandTool.ArgumentVector ← CommandTool.Parse[cmd

! CommandTool.Failed => {msg ← "Syntax error."; GO TO fatalError}

];

fileCount: NAT ← 0;

defaultFileType: FileType ← tioga;

ignoreEmptyLines: BOOL ← TRUE;

IF pData=$Merge THEN { switchMergeFiles ← TRUE; ignoreEmptyLines ← FALSE; };

FOR i: NAT IN [1..args.argc) DO

arg: ROPE = args[i];

sense: BOOL ← TRUE;

IF Rope.Match["-*", arg] THEN {

Switches specified here

len: NAT ← Rope.Length[arg];

number: INT ← 0;

FOR i: NAT IN [1..len) DO

c: CHAR ← Rope.Fetch[arg, i];

SELECT c FROM

'~ => sense ← NOT sense;

'b, 'B => defaultFileType ← bravo;

't, 'T => defaultFileType ← tioga;

'u, 'U => defaultFileType ← unform;

'i, 'I => ignoreEmptyLines ← sense;

'x, 'X => {

switchMergeFiles ← sense;

IF sense THEN ignoreEmptyLines ← FALSE;

};

IN ['0..'9] => {number ← number*10 + (c-'0); LOOP};

'm, 'M => IF number >= 1 THEN switchLinesForMatch ← number;

'c, 'C => IF number >= 1 THEN switchLinesForContext ← number;

ENDCASE;

number ← 0;

ENDLOOP;

LOOP;

};

IF Rope.Match["←", arg] THEN {

The old file name is really the dif file name

IF fileCount # 1 THEN GO TO usageError;

fileCount ← 0;

difFileName ← oldFileName;

LOOP;

};

SELECT fileCount FROM

0 => {

fileCount ← 1;

switchOldFileType ← defaultFileType;

switchIgnoreEmptyLinesOldFile ← ignoreEmptyLines;

oldFileName ← arg;

IF difFileName = NIL THEN difFileName ← ShortName[arg];

};

1 => {

fileCount ← 2;

switchNewFileType ← defaultFileType;

switchIgnoreEmptyLinesNewFile ← ignoreEmptyLines;

newFileName ← arg;

};

ENDCASE => {GO TO usageError};

ENDLOOP;

IF fileCount # 2 THEN GO TO usageError;

out.PutRope["Comparing "];

{

ENABLE FS.Error => {

IF error.group # bug THEN msg ← error.explanation;

GO TO fatalError;

};

[oldFileName, oldFile] ← OpenFile[oldFileName, switchOldFileType];

[newFileName, newFile] ← OpenFile[newFileName, switchNewFileType];

};

WorkingMsg['-];

};

Pass 1 & 2

{

Pass 1

In Pass 1 we scan the new file (the second one mentioned in the command line) for lines (or tokens), and enter them into the symbol table.

nextLine: ROPE ← NIL;

nextNode: TextNode.Ref ← NIL;

peek: CHAR;

key: ROPE;

[key, nextLine, nextNode, peek, posInNewFile] ← ReadLineFromFile[newFile, switchNewFileType, switchIgnoreEmptyLinesNewFile

! EndOfFile => GOTO donewithfile];

SetSymbolTableEntry[key, nextLine, nextNode, new, totalLinesInNewFile, posInNewFile, peek];

totalLinesInNewFile ← totalLinesInNewFile + 1;

REPEAT donewithfile => {

IF totalLinesInNewFile = 0 THEN GOTO emptyfile;

};

ENDLOOP;

newArray ← NEW[FileArrayRep[totalLinesInNewFile]];

WorkingMsg['!];

Pass 2

Pass 2 is like Pass1, except that we are scanning the old file (the first one mentioned in the command line).

key: ROPE;

[key, nextLine, nextNode, peek, posInOldFile] ← ReadLineFromFile[oldFile, switchOldFileType, switchIgnoreEmptyLinesOldFile

! EndOfFile => GOTO donewithfile];

SetSymbolTableEntry[key, nextLine, nextNode, old, totalLinesInOldFile, posInOldFile, peek];

totalLinesInOldFile ← totalLinesInOldFile + 1;

REPEAT donewithfile => {

IF totalLinesInOldFile = 0 THEN GOTO emptyfile;

};

ENDLOOP;

oldArray ← NEW[FileArrayRep[totalLinesInOldFile]];

WorkingMsg['@];

Pass 3

In Pass 3 we transform all of the instances into file array entries.

{

Test: SymTab.EachPairAction = {

[key: SymTab.Key, val: SymTab.Val] RETURNS [quit: BOOL]

symTabEntry: SymbolTableEntry ← NARROW[val];

newList: InstanceList ← symTabEntry.newList;

oldList: InstanceList ← symTabEntry.oldList;

FOR each: InstanceList ← newList, each.rest WHILE each # NIL DO

newArray[each.first.line] ← NEW[FileArrayEntryRep ← [

symTableKey: symTabEntry,

posInThisFile: each.first.position,

lineNumInOtherFile: -1,

node: each.first.node,

typeOfPntr: symTable]];

ENDLOOP;

FOR each: InstanceList ← oldList, each.rest WHILE each # NIL DO

oldArray[each.first.line] ← NEW[FileArrayEntryRep ← [

symTableKey: symTabEntry,

posInThisFile: each.first.position,

lineNumInOtherFile: -1,

node: each.first.node,

typeOfPntr: symTable]];

ENDLOOP;

RETURN[FALSE];

};

[] ← SymTab.Pairs[symbolTable, Test];

WorkingMsg['#];

};

EXITS emptyfile => FinishUp["At least one of these files is effectively empty."];

};

THROUGH [0..repeats) DO

Pass 4

In Pass 4 we run through the symbol table to connect all lines (or tokens) that appear only once in both files (OR appear once with a given peek char). Those lines (or tokens) are the places where the files are known to coincide.

{

Test: SymTab.EachPairAction = {

[key: SymTab.Key, val: SymTab.Val] RETURNS [quit: BOOL]

symTabEntry: SymbolTableEntry ← NARROW[val];

newList: InstanceList ← symTabEntry.newList;

oldList: InstanceList ← symTabEntry.oldList;

Process.CheckForAbort[];

SELECT TRUE FROM

newList = NIL, oldList = NIL => {};

newList.rest # NIL, oldList.rest # NIL => {

};

ENDCASE => {

indexO ← oldList.first.line;

indexN ← newList.first.line;

ConnectEntries[symTabEntry, indexO, indexN, oldArray[indexO], newArray[indexN]];

};

RETURN[FALSE];

};

[] ← SymTab.Pairs[symbolTable, Test];

WorkingMsg['$];

};

Pass 5

In Pass 5 we extend matching entries outwards by scanning forwards.

{

indexN ← 0;

WHILE indexN < totalLinesInNewFile DO

entry: FileArrayEntry = newArray[indexN];

Process.CheckForAbort[];

indexN ← indexN + 1;

IF entry.typeOfPntr = lineNum THEN {

indexO ← entry.lineNumInOtherFile + 1;

WHILE indexN < totalLinesInNewFile AND indexO < totalLinesInOldFile DO

entryN: FileArrayEntry = newArray[indexN];

entryO: FileArrayEntry = oldArray[indexO];

symTabEntry: SymbolTableEntry ← entryN.symTableKey;

SELECT TRUE FROM

entryN.typeOfPntr = symTable AND entryO.typeOfPntr = symTable AND symTabEntry = entryO.symTableKey =>

ConnectEntries[symTabEntry, indexO, indexN, entryO, entryN];

entryN.typeOfPntr # lineNum OR entryO.typeOfPntr # lineNum OR entryN.lineNumInOtherFile # indexO OR entryO.lineNumInOtherFile # indexN => EXIT;

ENDCASE;

indexN ← indexN + 1;

indexO ← indexO + 1;

ENDLOOP;

};

ENDLOOP;

WorkingMsg['%];

};

Pass 6

In Pass 6 we extend matching entries outwards by scanning backwards.

{

indexN ← totalLinesInNewFile - 1;

WHILE indexN >= 0 DO

entry: FileArrayEntry = newArray[indexN];

Process.CheckForAbort[];

indexN ← indexN - 1;

IF entry.typeOfPntr = lineNum THEN {

indexO ← entry.lineNumInOtherFile - 1;

WHILE indexN >= 0 AND indexO >= 0 DO

entryN: FileArrayEntry = newArray[indexN];

entryO: FileArrayEntry = oldArray[indexO];

symTabEntry: SymbolTableEntry ← entryN.symTableKey;

Process.CheckForAbort[];

SELECT TRUE FROM

entryN.typeOfPntr = symTable AND entryO.typeOfPntr = symTable AND symTabEntry = entryO.symTableKey => {

ConnectEntries[symTabEntry, indexO, indexN, entryO, entryN];

};

entryN.typeOfPntr # lineNum OR entryO.typeOfPntr # lineNum OR entryN.lineNumInOtherFile # indexO OR entryO.lineNumInOtherFile # indexN =>

EXIT;

ENDCASE;

indexN ← indexN - 1;

indexO ← indexO - 1;

ENDLOOP;

};

ENDLOOP;

WorkingMsg['~];

};

ENDLOOP;

Passes 7 and 8

{

CancelMatch: PROC [array1, array2: FileArray, totalNumOfLinesFile1, index: INT] = {

entry1: FileArrayEntry ← array1[index];

IF entry1.typeOfPntr = symTable THEN EXIT;

entry1.typeOfPntr ← symTable;

array2[entry1.lineNumInOtherFile].typeOfPntr ← symTable;

index ← index + 1;

IF ((index >= totalNumOfLinesFile1)
OR (entry1.lineNumInOtherFile # array1[index - 1].lineNumInOtherFile + 1)) THEN EXIT;

ENDLOOP;

};

Pass 7

In Pass 7 we cancel matches that don't extend far enough. This is to prevent excessive context switching in reporting differences.

IF switchLinesForMatch > 1 THEN {

indexN ← 0;

oldIndexN: INT ← indexN;

indexN ← indexN + 1;

Process.CheckForAbort[];

IF indexN >= totalLinesInNewFile THEN EXIT;

IF newArray[indexN].typeOfPntr = lineNum THEN {

WHILE ((indexN < totalLinesInNewFile) AND (newArray[indexN].lineNumInOtherFile = newArray[indexN - 1].lineNumInOtherFile + 1)) DO

indexN ← indexN + 1;

ENDLOOP;

IF (indexN - oldIndexN) < switchLinesForMatch AND indexN < totalLinesInNewFile THEN

CancelMatch[newArray, oldArray, totalLinesInNewFile, oldIndexN];

};

ENDLOOP;

};

WorkingMsg['&];

Pass 8

In Pass 8 we actually print the differences, skipping over the matches.

{

LeadingNumber: PROC [char: CHAR, number: INT] = {

must be a better way to do this.

leadingZeroes: CARDINAL ← columns - 1;

tempIndex: INT ← number;

IF switchMergeFiles THEN RETURN;

Process.CheckForAbort[];

tempIndex ← tempIndex/10;

IF tempIndex = 0 THEN EXIT;

leadingZeroes ← leadingZeroes - 1;

ENDLOOP;

difFile.PutF["%g/", [character[char]]];

THROUGH [0..leadingZeroes) DO difFile.PutChar['0]; ENDLOOP;

difFile.Put[[integer[number]]];

difFile.PutRope[") "];

};

PrintCedarDelta: PROC [file: STREAM, index, start, total: INT, array: FileArray] RETURNS [INT] = {

Dump out the difference indicated, extending the difference through the start of the line containing the first token, and through the end of the line containing the last token. Return the index after the last token on the last line printed.

IF start < total THEN {

leadChar: CHAR ← IF file = oldFile THEN '1 ELSE '2;

lastChar: CHAR ← 0C;

lastIndex: CARDINAL ← MIN[index+1, total] - 1;

lastEntry: FileArrayEntry = array[lastIndex];

startPos: INT ← array[start].posInThisFile;

endPos: INT ← lastEntry.posInThisFile + Rope.Length[lastEntry.symTableKey.rope];

WHILE startPos > 0 DO

IO.SetIndex[file, startPos - 1];

lastChar ← IO.GetChar[file];

IF lastChar = '\n THEN EXIT;

startPos ← startPos - 1;

ENDLOOP;

LeadingNumber[leadChar, startPos];

FOR p: INT IN [startPos..endPos) DO

IO.PutChar[difFile, lastChar ← IO.GetChar[file ! IO.EndOfStream => EXIT]];

IF lastChar = '\n AND p+1 # endPos THEN

LeadingNumber[leadChar, IO.GetIndex[file]];

ENDLOOP;

WHILE lastChar # '\n DO

IO.PutChar[difFile, lastChar ← IO.GetChar[file ! IO.EndOfStream => EXIT]];

ENDLOOP;

Now skip over tokens we inadvertently covered during our printing to the end of line. The index returned is the one after the one we printed.

endPos ← IO.GetIndex[file];

WHILE index < total DO

IF array[index].posInThisFile >= endPos THEN EXIT;

index ← index + 1;

ENDLOOP;

};

RETURN [index];

};

DumpOutDiffAndMoveAhead: PROC = {

index: INT;

IF NOT anyDifferencesSeen THEN {

OpenDifFileAndWriteHeader[];

difFile.PutF["%g%g\n", [rope[Asterisks]], [rope[Asterisks]]];

};

SELECT TRUE FROM

(indexN >= totalLinesInNewFile) OR (indexO >= totalLinesInOldFile) => {

indexN ← totalLinesInNewFile;

indexO ← totalLinesInOldFile;

};

newArray[indexN].typeOfPntr = lineNum =>

indexO ← newArray[indexN].lineNumInOtherFile

ENDCASE =>

indexN ← oldArray[indexO].lineNumInOtherFile;

SELECT pData FROM

$Cedar => {

indexO ← PrintCedarDelta[NARROW[oldFile], indexO, startDifO, totalLinesInOldFile, oldArray];

difFile.PutF["%g\n", [rope[Asterisks]]];

indexN ← PrintCedarDelta[NARROW[newFile], indexN, startDifN, totalLinesInNewFile, newArray];

};

ENDCASE => {

IF switchMergeFiles THEN CopySpan[oldArray, startDifO, indexO-1, oldFile, old, totalLinesInOldFile]

ELSE FOR index IN [startDifO..indexO) DO

entry: FileArrayEntry = oldArray[index];

LeadingNumber['1, entry.posInThisFile];

difFile.PutF["%g\n", [rope[entry.symTableKey.rope] ]];

ENDLOOP;

IF ~switchMergeFiles THEN

FOR index IN [indexO..indexO + switchLinesForContext) WHILE (index < totalLinesInOldFile) DO

entry: FileArrayEntry = oldArray[index];

LeadingNumber['1, entry.posInThisFile];

difFile.PutF["%g\n", [rope[entry.symTableKey.rope] ]];

ENDLOOP;

indexO ← indexO + switchLinesForContext;

difFile.PutF["%g\n", [rope[Asterisks]]];

IF switchMergeFiles THEN CopySpan[newArray, startDifN, indexN-1, newFile, new, totalLinesInNewFile]

ELSE FOR index IN [startDifN..indexN) DO

entry: FileArrayEntry = newArray[index];

LeadingNumber['2, entry.posInThisFile];

difFile.PutF["%g\n", [rope[entry.symTableKey.rope] ]];

ENDLOOP;

IF ~switchMergeFiles THEN

FOR index IN [indexN..indexN + switchLinesForContext) WHILE (index < totalLinesInNewFile) DO

entry: FileArrayEntry = newArray[index];

LeadingNumber['2, entry.posInThisFile];

difFile.PutF["%g\n", [rope[entry.symTableKey.rope] ]];

ENDLOOP;

indexN ← indexN + switchLinesForContext;

};

difFile.PutF["\n%g%g\n", [rope[Asterisks]], [rope[Asterisks]]];

If merging, print the context lines here.

IF switchMergeFiles THEN CopySpan[newArray, indexN-switchLinesForContext, indexN-1, newFile, both, totalLinesInNewFile];

Move ahead to get beyond the last line from the old file.

startDifN ← indexN;

WHILE indexN < totalLinesInNewFile AND newArray[indexN].typeOfPntr # symTable DO

IF newArray[indexN].lineNumInOtherFile # newArray[indexN - 1].lineNumInOtherFile + 1 THEN EXIT;

indexN ← indexN + 1;

indexO ← indexO + 1;

ENDLOOP;

IF switchMergeFiles AND indexN>startDifN THEN

CopySpan[newArray, startDifN, indexN-1, newFile, both, totalLinesInNewFile];

startDifN ← indexN;

startDifO ← indexO;

};

TryToResolveConflicts: PROC [array1, array2: FileArray, index1, index2: INT] RETURNS [okToDumpDiff: BOOL] = {

entry1: FileArrayEntry ← array1[index1];

lastRange1: INT ← index1 + entry1.lineNumInOtherFile - index2;

FOR tempIndex: INT IN [index2..entry1.lineNumInOtherFile) DO

entry2: FileArrayEntry ← array2[tempIndex];

IF entry2.typeOfPntr = lineNum THEN {

IF entry2.lineNumInOtherFile > lastRange1

THEN CancelMatch[array2, array1, totalLinesInOldFile, tempIndex]

ELSE {

CancelMatch[array1, array2, totalLinesInNewFile, index1];

RETURN[FALSE];

};

ENDLOOP;

RETURN[TRUE];

};

CopySpan: PROC [array: FileArray, index1, index2: INT, in: REF, lineType: LineType ← both, limit: INT←-1] = {

inNode: Node = NARROW[in];

nesting: INT;

IF limit>=0 THEN { IF index1 >= limit THEN RETURN; index2 ← MIN[index2, limit-1]; };

IF index1>index2 THEN RETURN;

nesting ← TextNode.Level[array[index1].node] - TextNode.Level[difFileNode.node];

WHILE nesting > 1 DO

[]�itSpan.Copy[difFileNode.root, fillerNode.root, TextNode.MakeNodeLoc[difFileNode.node], TextNode.MakeNodeSpan[fillerNode.node, fillerNode.node], after, 1];

nesting ← nesting - 1;

ENDLOOP;

[]�itSpan.Copy[difFileNode.root, inNode.root, TextNode.MakeNodeLoc[difFileNode.node], TextNode.MakeNodeSpan[array[index1].node, array[index2].node], after, nesting];

IF lineType=both THEN difFileNode.node ← TextNode.LastWithin[difFileNode.root]

ELSE {

node: TextNode.Ref;

WHILE (node←TextNode.StepForward[difFileNode.node])#NIL DO

len: INT = Rope.Length[TextNode.NodeRope[node]];

difFileNode.node ← node;

SELECT lineType FROM

old => TiogaFileOps.AddLooks[FR[node], 0, len, 'y];

new => TiogaFileOps.AddLooks[FR[node], 0, len, 'z];

ENDCASE;

ENDLOOP;

};

startDifN: INT;

startDifO: INT;

dumpDiff: BOOL;

columns: [0..255] ← 1;

max: INT ← MAX[posInNewFile, posInOldFile];

max ← max/10;

IF max = 0 THEN EXIT;

columns ← columns + 1;

ENDLOOP;

indexN ← 0;

indexO ← 0;

IF switchMergeFiles THEN OpenDifFileAndWriteHeader[];

IF indexN >= totalLinesInNewFile OR indexO >= totalLinesInOldFile

THEN EXIT

ELSE {

entryN: FileArrayEntry ← newArray[indexN];

entryO: FileArrayEntry ← oldArray[indexO];

IF entryN.typeOfPntr # lineNum THEN EXIT;

IF entryO.typeOfPntr # lineNum THEN EXIT;

IF entryN.lineNumInOtherFile # indexO THEN EXIT;

IF entryO.lineNumInOtherFile # indexN THEN EXIT;

indexN ← indexN + 1;

indexO ← indexO + 1;

};

ENDLOOP;

IF switchMergeFiles AND indexN>0 THEN

CopySpan[newArray, 0, indexN-1, newFile, both];

startDifN ← indexN;

startDifO ← indexO;

dumpDiff ← FALSE;

WHILE ((indexN < totalLinesInNewFile) AND (indexO < totalLinesInOldFile)) DO

entryN: FileArrayEntry = newArray[indexN];

entryO: FileArrayEntry = oldArray[indexO];

IF entryN.typeOfPntr = lineNum OR entryO.typeOfPntr = lineNum THEN {

IF entryN.typeOfPntr = lineNum AND entryO.typeOfPntr = lineNum THEN {

IF entryN.lineNumInOtherFile = indexO AND entryO.lineNumInOtherFile = indexN THEN GOTO dumpoutthedifference;

IF (entryN.lineNumInOtherFile - indexO) > (entryO.lineNumInOtherFile - indexN)

THEN CancelMatch[newArray, oldArray, totalLinesInNewFile, indexN]

ELSE CancelMatch[oldArray, newArray, totalLinesInOldFile, indexO];

};

IF entryN.typeOfPntr = lineNum

THEN dumpDiff ← TryToResolveConflicts[newArray, oldArray, indexN, indexO]

ELSE dumpDiff ← TryToResolveConflicts[oldArray, newArray, indexO, indexN];

EXITS dumpoutthedifference => dumpDiff ← TRUE;

};

IF dumpDiff

THEN {

DumpOutDiffAndMoveAhead[];

dumpDiff ← FALSE;

}

ELSE {

indexN ← indexN + 1;

indexO ← indexO + 1;

};

ENDLOOP;

IF startDifN < totalLinesInNewFile OR startDifO < totalLinesInOldFile THEN

DumpOutDiffAndMoveAhead[];

WorkingMsg['*];

IF switchMergeFiles THEN TiogaFileOps.Store[FR[difFileNode.root], difFileName];

};

IF anyDifferencesSeen

THEN FinishUp[Rope.Cat[IF switchMergeFiles THEN "merged version written on file " ELSE " differences written on file ", difFileName, ".\n"]]

ELSE FinishUp[" no differences encountered.\n"];

IF ~switchMergeFiles THEN {

IO.Close[NARROW[oldFile]];

IO.Close[NARROW[newFile]];

}

};

EXITS

usageError => {msg ← "Usage error: Waterlily file1 file2\n"; result ← $Failed};

fatalError => {result ← $Failed};

};

AdvanceOverBlanks: PROC [line: ROPE, localSwitchIgnoreEmpties: BOOL] RETURNS [BOOL] = {

index: CARDINAL ← 0;

length: CARDINAL = Rope.Length[line];

WHILE index < length DO

char: CHAR ← Rope.Fetch[line, index];

SELECT char FROM

' , '\t => index ← index + 1;

ENDCASE => RETURN [FALSE];

ENDLOOP;

here on an "empty" line.

RETURN [localSwitchIgnoreEmpties];

};

ShortName: PROC [name: ROPE] RETURNS [ROPE] = {

bang: INT ← Rope.Length[name];

pos: INT ← bang;

WHILE (pos ← pos - 1) > 0 DO

SELECT Rope.Fetch[name, pos] FROM

'>, '/, '] => {pos ← pos + 1; EXIT};

'!, '. => bang ← pos;

ENDCASE;

ENDLOOP;

RETURN [Rope.Flatten[name, pos, bang - pos]];

};

DefaultExtension: PROC [name: ROPE, ext: ROPE] RETURNS [ROPE] = {

len: INT ← Rope.Length[name];

pos: INT ← len;

WHILE (pos ← pos - 1) > 0 DO

SELECT Rope.Fetch[name, pos] FROM

'>, '/, '] => EXIT;

'., '! => RETURN [name];

ENDCASE;

ENDLOOP;

RETURN [Rope.Concat[name, ext]];

};

Node Management

TR: PROC[ref: REF] RETURNS [TextNode.Ref] = TRUSTED INLINE {

RETURN[LOOPHOLE[ref]]; };

FR: PROC[ref: REF] RETURNS [TiogaFileOps.Ref] = TRUSTED INLINE {

RETURN[LOOPHOLE[ref]]; };

NewNode: PROC RETURNS [node: Node] = {

node ← NEW[NodeBody ← [TR[TiogaFileOps.CreateRoot[]]]];

node.node ← node.root;

};

NodeFromFile: PROC[fileName: ROPE] RETURNS [node: Node] = {

node ← NEW[NodeBody ← []];

node.root ← PutGet.FromFile[fileName];

node.node ← node.root;

};

Commander.Register[key: "Waterlily", proc: WaterlilyProc, doc: LongHelpMsgW, clientData: NIL];

Commander.Register[key: "Cedarlily", proc: WaterlilyProc, doc: LongHelpMsgC, clientData: $Cedar];

Commander.Register[key: "Tigerlily",

proc: WaterlilyProc, doc: LongHelpMsgM, clientData: $Merge];

Commander.Register[key: "MergeLily",

proc: WaterlilyProc, doc: LongHelpMsgM, clientData: $Merge]; -- The old name