This program implements the file spelling corrector.
Last Edited by: Teitelman, March 12, 1983 1:28 pm
DIRECTORY
ConvertUnsafe USING [AppendRope, ToRope],
Directory USING [GetNext, Lookup, Error, ignore],
IO USING [char, GetOutputStreamRope, GetToken, NUL, IDProc, Put, PutChar, PutRope, rope, RIS, ROS, STREAM],
MessageWindow USING [Append, Clear],
Process USING [Detach],
Rope USING [Cat, Concat, Equal, IsEmpty, Fetch, Find, Flatten, Length, Lower, ROPE, Substr, Text, ToRefText, Upper],
RopeInline USING [InlineFlatten],
Spell USING [AbortProc, ConfirmProc, defaultModes, extensionList, GeneratorFromProcs, GetMatchingList, GetTheOne, InformProc, IsAPattern, Modes, ROPE, SpellingGenerator, SpellingList],
SpellExtras USING [],
UnsafeStorage USING [GetSystemUZone],
UserProfile USING [CallWhenProfileChanges, ProfileChangedProc, Boolean],
WindowManager USING [WaitCursor, UnWaitCursor]
;
FileSpellImpl: CEDAR MONITOR
IMPORTS ConvertUnsafe, Directory, IO, MessageWindow, Process, Rope, RopeInline, Spell, UnsafeStorage, UserProfile, WindowManager
EXPORTS Spell, SpellExtras
SHARES Rope -- to be able to say text.length
=
BEGIN OPEN Spell;
file correction
extensionList: PUBLIC Spell.SpellingList ← LIST["mesa", "cm", "config", "commands", "profile", "df", "doc", "log", "style", "abbreviations", "press", "bcd", "tioga", "mail"];
AddExtension: PUBLIC ENTRY PROC [ext: ROPE] = {
ENABLE UNWIND => NULL;
FOR l: Spell.SpellingList ← extensionList, l.rest UNTIL l = NIL DO
IF Rope.Equal[l.first, ext, FALSE] THEN EXIT;
REPEAT
FINISHED => extensionList ← CONS[ext, extensionList];
ENDLOOP;
};
assumeFirstCharCorrect: BOOL;
SetUp: UserProfile.ProfileChangedProc = {
assumeFirstCharCorrect ← UserProfile.Boolean["Spell.assumeFirstCharCorrect" , FALSE];
};
GetTheFile: PUBLIC PROC [
unknown: ROPE,
defaultExt: ROPENIL,
abort: AbortProc ← NIL,
confirm: ConfirmProc ← NIL,
inform: InformProc ← NIL,
modes: Modes ← NIL
]
RETURNS [correct: ROPE] = {
ENABLE UNWIND => NULL;
root, ext, msg: ROPE;
unwaitCursor, releaseGenerator: BOOLEAN;
IF Rope.IsEmpty[unknown] THEN RETURN[NIL];
IF modes = NIL THEN modes ← Spell.defaultModes;
[root, ext, correct] ← SetUpGenerator[unknown: unknown, defaultExt: defaultExt, abort: abort, confirm: confirm, inform: inform, modes: modes];
IF correct # NIL THEN {ReleaseGenerator[]; RETURN[correct]}; -- only mistake was extension.
releaseGenerator ← TRUE; -- the generator belongs to this call. it must be released.
msg ← Rope.Concat["Trying for spelling correction on ", root];
WindowManager.WaitCursor[];
IF ext # NIL THEN msg ← Rope.Cat[msg, ".", ext];
MessageWindow.Append[message: msg, clearFirst: TRUE];
unwaitCursor ← TRUE;
{
ENABLE UNWIND => {
MessageWindow.Clear[];
IF unwaitCursor THEN WindowManager.UnWaitCursor[];
IF releaseGenerator THEN ReleaseGenerator[];
};
Inform: Spell.InformProc -- [msg: ROPE] -- = {
IF releaseGenerator THEN {ReleaseGenerator[]; releaseGenerator ← FALSE}; -- see comment in Confirm below.
IF inform = NIL THEN RETURN;
IF ext # NIL AND Rope.Find[s1: unknown, s2: "."] # -1 THEN msg ← Rope.Cat[msg, ".", ext]; -- only output extension if extension in original unknown, as opposed to being obtained from defaultExt.
inform[msg]
};
Confirm: Spell.ConfirmProc -- [msg: ROPE, timeout: INT, defaultConfirm: BOOL] RETURNS[yes: BOOLEAN] -- = {
i: INT = Rope.Find[msg, " -> "];
IF releaseGenerator THEN {ReleaseGenerator[]; releaseGenerator ← FALSE}; -- in case user simply ignores this and does something else, spelling corrector is not hung. Need to set releaseGenerator to FALSE so that if he comes back to this later and confirms, when another correction has started, won't mistakenly release the generator again, thereby allowing a third correction to disrupt the second one still in progress.
IF i = -1 THEN ERROR;
IF Rope.Find[s1: msg, s2: " ", pos1: i + 4] # -1 THEN RETURN[FALSE]; -- more than one candidate. don't bother confirming. CHECK THIS
IF confirm = NIL THEN RETURN[FALSE];
unwaitCursor ← FALSE;
WindowManager.UnWaitCursor[];
IF ext # NIL AND Rope.Find[s1: unknown, s2: "."] # -1 THEN msg ← Rope.Cat[msg, ".", ext]; -- only output extension if extension in original unknown, as opposed to being obtained from defaultExt.
RETURN[confirm[msg, timeout, defaultConfirm]];
}; -- end of confirm
main body
IF IsAPattern[unknown] THEN {
l: LIST OF ROPE = Spell.GetMatchingList[pattern: root, generator: fileGenerator, abort: abort, confirm: Confirm, inform: Inform, modes: modes];
IF l # NIL AND l.rest = NIL THEN correct ← l.first ELSE correct ← NIL;
}
ELSE {
correct ← GetTheOne[unknown: root, generator: fileGenerator, abort: abort, confirm: IF confirm = NIL THEN NIL ELSE Confirm, inform: IF inform = NIL THEN NIL ELSE Inform, modes: modes];
IF correct = NIL AND fileGenState.stopAfter # LAST[CHARACTER] AND NOT assumeFirstCharCorrect AND releaseGenerator THEN -- no candidates found in corresponding bucket, or else, one was found and user rejected confirmation. now search entire space.
TRUSTED {
fileGenState.stopAfter ← LAST[CHARACTER];
fileGenState.nextName.length ← 0;
fileGenState.currentName.length ← 0;
correct ← GetTheOne[unknown: root, generator: fileGenerator, abort: abort, confirm: IF confirm = NIL THEN NIL ELSE Confirm, inform: IF inform = NIL THEN NIL ELSE Inform, modes: modes];
};
};
}; -- end of inner block containing definitions of inform, confirm, etc.
IF correct # NIL AND ext # NIL THEN
correct ← Rope.Cat[correct, ".", ext];
MessageWindow.Clear[];
IF unwaitCursor THEN WindowManager.UnWaitCursor[];
IF releaseGenerator THEN ReleaseGenerator[];
};
GetFileExtension: PUBLIC PROC [
unknown: Rope.ROPE,
spellingList: Spell.SpellingList ← Spell.extensionList,
abort: AbortProc ← NIL,
confirm: ConfirmProc ← NIL,
inform: InformProc ← NIL,
modes: Modes ← NIL
] RETURNS [correct: Rope.ROPE] = {
len, len1: INT;
ext1: ROPE;
len ← len1 ← Rope.Length[unknown];
IF len1 > 0 THEN WHILE Rope.Fetch[unknown, len1 - 1] = '$ DO
len1 ← len1 - 1;
ENDLOOP;
IF len1 # len THEN ext1 ← Rope.Substr[base: unknown, len: len1] ELSE ext1 ← unknown;
FOR l: SpellingList ← spellingList, l.rest UNTIL l = NIL DO
IF Rope.Equal[l.first, ext1, FALSE] THEN RETURN[unknown];
ENDLOOP;
correct ← GetTheOne[unknown: ext1, spellingList: spellingList, abort: abort, confirm: confirm, modes: modes]; -- dont print message. want to print it once for entire correction (which may fail).
IF correct # NIL THEN {
IF len # len1 THEN correct ← Rope.Concat[correct, Rope.Substr[base: unknown, start: len1]]; -- put $'s back
};
};
GetMatchingFileList: PUBLIC PROCEDURE [
unknown: ROPE,
defaultExt: ROPENIL,
abort: AbortProc ← NIL,
confirm: ConfirmProc ← NIL,
inform: InformProc ← NIL,
modes: Modes ← NIL]
RETURNS [files: LIST OF ROPE] =
{
ENABLE UNWIND => NULL;
root, ext, correct, msg: ROPE;
unwaitCursor, releaseGenerator: BOOLEAN;
IF Rope.IsEmpty[unknown] THEN RETURN[NIL];
IF modes = NIL THEN modes ← Spell.defaultModes;
[root, ext, correct] ← SetUpGenerator[unknown: unknown, defaultExt: defaultExt, abort: abort, confirm: confirm, inform: inform, modes: modes];
IF correct # NIL THEN {ReleaseGenerator[]; RETURN[LIST[correct]]}; -- only mistake was extension.
releaseGenerator ← TRUE; -- the generator belongs to this call. it must be released.
IF inform # NIL THEN {
msg ← Rope.Concat["Trying for pattern completion on ", root];
IF ext # NIL THEN msg ← Rope.Cat[msg, ".", ext];
MessageWindow.Append[message: msg, clearFirst: TRUE];
WindowManager.WaitCursor[];
unwaitCursor ← TRUE;
};
{
ENABLE UNWIND => {
MessageWindow.Clear[];
IF unwaitCursor THEN WindowManager.UnWaitCursor[];
IF releaseGenerator THEN ReleaseGenerator[];
};
Inform: Spell.InformProc -- [msg: ROPE] -- = {
IF releaseGenerator THEN {ReleaseGenerator[]; releaseGenerator ← FALSE}; -- see comments in GetTheOne.
IF inform = NIL THEN RETURN;
IF ext # NIL AND Rope.Find[s1: unknown, s2: "."] # -1 THEN msg ← AddExt[msg]; -- only output extension if extension in original unknown, as opposed to being obtained from defaultExt.
inform[msg]
};
Confirm: Spell.ConfirmProc -- [msg: ROPE, timeout: INT, defaultConfirm: BOOL] RETURNS[yes: BOOLEAN] -- = {
IF releaseGenerator THEN {ReleaseGenerator[]; releaseGenerator ← FALSE}; -- see comments in GetTheOne.
IF confirm = NIL THEN RETURN[FALSE];
unwaitCursor ← FALSE;
WindowManager.UnWaitCursor[];
IF ext # NIL AND Rope.Find[s1: unknown, s2: "."] # -1 THEN msg ← AddExt[msg]; -- only output extension if extension in original unknown, as opposed to being obtained from defaultExt.
RETURN[confirm[msg, timeout, defaultConfirm]];
};
AddExt: PROC [r: ROPE] RETURNS [ROPE] = {
OPEN IO;
stream1, stream2: STREAM;
token: ROPE;
stream1 ← RIS[r];
stream2 ← ROS[];
UNTIL Rope.Length[token ← stream1.GetToken[IO.IDProc]] = 0 DO
stream2.PutRope[token];
IF NOT Rope.Equal[token, "->"] THEN stream2.Put[char['.], rope[ext]];
stream2.PutChar[' ];
ENDLOOP;
RETURN[stream2.GetOutputStreamRope[]];
};
IF NOT IsAPattern[unknown] THEN {
x: ROPE = GetTheOne[unknown: root, generator: fileGenerator, abort: abort, confirm: Confirm, inform: Inform, modes: modes];
IF x # NIL THEN files ← LIST[x] ELSE files ← NIL;
}
ELSE files ← Spell.GetMatchingList[pattern: root, generator: fileGenerator, abort: abort, confirm: Confirm, inform: Inform, modes: modes];
};
IF files # NIL AND ext # NIL THEN
FOR lst: LIST OF ROPE ← files, lst.rest UNTIL lst = NIL DO
lst.first ← Rope.Cat[lst.first, ".", ext];
ENDLOOP; 
MessageWindow.Clear[];
IF unwaitCursor THEN WindowManager.UnWaitCursor[];
IF releaseGenerator THEN ReleaseGenerator[];
}; -- of GetMatchingFileList
in the case that the extension is a known extension, the file generator is set up to only generate files of that extension, and the root is used as the unknown in the correction.
SetUpGenerator: PROC [
unknown: ROPE,
defaultExt: ROPE,
abort: AbortProc,
confirm: ConfirmProc,
inform: InformProc,
modes: Modes
]
RETURNS[root, ext, correct: ROPE] =
{
noExtension: BOOLEANFALSE;
i: INT ← Rope.Find[s1: unknown, s2: "."];
firstChar: CHAR;
AcquireGenerator: ENTRY PROC = {
IF fileGenerator = NIL THEN -- first time.
TRUSTED {
s: LONG STRING = UnsafeStorage.GetSystemUZone[].NEW[StringBody[128]];
pathName: ROPE;
i, j: INT ← 0;
[] ← Directory.GetNext[pathName: "WorkDir>", currentName: "", nextName: s];
pathName ← ConvertUnsafe.ToRope[s];
UNTIL (j ← Rope.Find[s1: pathName, s2: ">", pos1: i]) = -1 DO
i ← j + 1;
ENDLOOP;
s.length ← i;
fileGenerator ← GeneratorFromProcs[
generate: FileGenerate,
clientData: fileGenState ← NEW[FileGenStateRecord ← [
pathName: s,
scratchText: NEW[TEXT[128]],
currentName: UnsafeStorage.GetSystemUZone[].NEW[StringBody[128]],
nextName: UnsafeStorage.GetSystemUZone[].NEW[StringBody[128]]
]]
]
}
ELSE WHILE fileGenState.inUse DO WAIT fileGenState.nowFree ENDLOOP;
fileGenState.inUse ← TRUE;
IF fileCache = NIL THEN fileCache ← NEW[ARRAY CHARACTER['A..'Z] OF ROPEALL[NIL]];
};
main body
root ← unknown;
ext ← defaultExt;
IF i = -1 THEN { -- unknown does not have an extension
IF defaultExt = NIL THEN {
IF NOT IsAPattern[unknown] THEN noExtension ← TRUE} -- means only consider those without extension. The IsAPattern check is because Foo* means match everything.
ELSE IF IsAPattern[defaultExt] THEN { -- e.g. "*press", meaning match .press and .mesa.press. only way to handle this is to treat the whole thing as a unit.
root ← Rope.Cat[root, ".", defaultExt];
ext ← NIL;
}
}
ELSE IF i = Rope.Length[unknown] - 1 -- i.e. of form foo.
THEN {root ← Rope.Substr[base: unknown, len: i]; ext ← ""}
ELSE IF IsAPattern[ext ← Rope.Substr[base: unknown, start: i + 1]] THEN -- treat whole name as unit
{ext ← NIL; root ← unknown}
ELSE
{
ext1: ROPE;
root ← Rope.Substr[base: unknown, len: i];
ext1 ← GetFileExtension[unknown: ext, abort: abort, confirm: confirm, modes: modes];
IF ext1 = NIL THEN {root ← unknown; ext ← NIL} -- the extension is not recognized, so treat whole name as a unit.
ELSE IF ext1 # ext THEN { -- was corrected
r: ROPE = Rope.Cat[root, ".", ext1];
IF CheckForFile[r] THEN { -- extension was the only thing misspelled. return now correct name.
IF inform # NIL THEN inform[Rope.Cat[unknown, " -> ", r]];
correct ← r;
RETURN;
}
ELSE ext ← ext1;-- ext is known. spelling correct on root only.
};
};
AcquireGenerator[];
fileGenState.ext ← RopeInline.InlineFlatten[ext];
fileGenState.noExtension ← noExtension;
fileGenState.stopAfter ← LAST[CHARACTER];
TRUSTED {fileGenState.nextName.length ← 0; fileGenState.currentName.length ← 0};
fileGenState.scratchText.length ← 0;
IF (firstChar ← Rope.Upper[Rope.Fetch[unknown, 0]]) IN ['A..'Z] THEN {
fileGenState.stopAfter ← firstChar;
FOR c: CHAR DECREASING IN ['A..firstChar] DO
IF useFileCache AND NOT Rope.IsEmpty[fileCache[c]] AND CheckForFile[fileCache[c]] THEN
TRUSTED {
ConvertUnsafe.AppendRope[to: fileGenState.nextName, from: fileCache[c]];
EXIT;
};
ENDLOOP;
};
}; -- of SetUpGenerator
ReleaseGenerator: ENTRY PROC = {fileGenState.inUse ← FALSE; NOTIFY fileGenState.nowFree};
file generator declaration
FileGenState: TYPE = REF FileGenStateRecord ← NIL;
FileGenStateRecord: TYPE = RECORD[
pathName, currentName, nextName: LONG STRING,
scratchText: REF TEXT,
ext: Rope.Text,
noExtension: BOOLEANFALSE,
inUse: BOOLFALSE,
stopAfter: CHARACTERIO.NUL,
nowFree: CONDITION
];
fileGenerator: Spell.SpellingGenerator ← NIL;
fileGenState: FileGenState ← NIL;
fileCache: REF ARRAY CHARACTER['A..'Z] OF ROPENIL; -- contains the last file just previous (in alphabetic order) to the indicated character. e.g. for B might be AtomsPrivate.BCD
useFileCache: BOOLTRUE;
buildFileCache: BOOLTRUE;
FileGenerate: PROC [self: SpellingGenerator] RETURNS [REF TEXT] = TRUSTED {
state: FileGenState ← NARROW[self.clientData];
ext: Rope.Text = state.ext;
noExtension: BOOL = state.noExtension;
pathLen: NAT = state.pathName.length;
currentLen: NAT;
len: NAT;
s: LONG STRING;
DO
extDidntMatch: BOOLFALSE;
s ← state.currentName;
state.currentName ← state.nextName;
state.nextName ← s;
[] ← Directory.GetNext[pathName: state.pathName, currentName: state.currentName, nextName: state.nextName];
IF state.nextName.length = 0 THEN RETURN[NIL];
IF state.currentName.length = 0 OR Rope.Upper[state.nextName[pathLen]] > Rope.Upper[state.currentName[pathLen]] THEN -- new letter (assumes enumeration in alphabetic order)
{
cache: ROPE;
len: INT;
currentChar: CHAR = Rope.Upper[state.nextName[pathLen]];
IF currentChar > fileGenState.stopAfter THEN RETURN[NIL]; -- says that if you are doing a pattern match and first character is not a *, ok to stop as soon as you have passed that character.
see if this entry is already in the cache (avoid allocations).
IF currentChar IN ['A..'Z] THEN {
cache ← fileCache[currentChar];
len ← Rope.Length[cache];
FOR i: INT IN [0..state.currentName.length) DO
IF i >= len OR state.currentName[i] # Rope.Fetch[cache, i] THEN {
IF buildFileCache THEN fileCache[currentChar] ← ConvertUnsafe.ToRope[state.currentName]; -- insert in cache.
EXIT;
};
ENDLOOP;
}
};
currentLen ← state.nextName.length;
len ← currentLen - pathLen;
copy characters from long string into Rope.Text, skipping over pathName characters
FOR i: NAT IN [pathLen..currentLen) DO
char: CHARACTER ← state.nextName[i];
IF char # '. THEN NULL
ELSE IF noExtension THEN GOTO Fail
ELSE IF ext # NIL THEN -- NIL means look at all extensions, i.e. treat the entire name as a unit.
{ -- compare extension with ext. If not equal, do not consider this candidate.
i1: NAT ← i + 1;
FOR j: NAT IN [0..ext.length) DO
IF i1 = currentLen THEN GOTO Fail; -- unaccounted for characters in target extension
IF Rope.Lower[ext[j]] # Rope.Lower[state.nextName[i1]] THEN GOTO MaybeFail; -- target = .press and ext (at this point) is mesa.press
i1 ← i1 + 1;
REPEAT
FINISHED => IF i1 < currentLen THEN GOTO MaybeFail; -- unaccounted for character in unknown extension, e.g. .press.press where target is .press
ENDLOOP;
len ← i - pathLen;
extDidntMatch ← FALSE;
EXIT;
EXITS
MaybeFail => extDidntMatch ← TRUE;
};
state.scratchText[i - pathLen] ← char;
REPEAT
Fail => LOOP;
FINISHED => IF (ext # NIL AND NOT noExtension) OR extDidntMatch THEN LOOP; -- to get here, either this file doesnt have an extension, or else ext = NIL, i.e. consider all files. This test says that if an extension is required, then it must be the case that this file does not have one, so reject it.
ENDLOOP;
state.scratchText.length ← len;
RETURN[state.scratchText];
ENDLOOP;
};
LoadCache: PROC = {
[] ← SetUpGenerator[unknown: "zzz", defaultExt: NIL, abort: NIL, confirm: NIL, inform: NIL, modes: NIL];
DO
IF FileGenerate[fileGenerator] = NIL THEN EXIT;
ENDLOOP;
ReleaseGenerator[]
};
cloned from userexec to make spell totally independent.
CheckForFile: PROC [file: ROPE] RETURNS [found: BOOLEAN] = TRUSTED {
fName: LONG STRING;
fName ← LOOPHOLE[Rope.ToRefText[file]];
found ← TRUE;
IF Rope.Length[file] = 0 THEN RETURN[FALSE];
[] ← Directory.Lookup[fileName: fName, permissions: Directory.ignore
! Directory.Error =>
{found ← FALSE; CONTINUE}
];
};
TRUSTED {Process.Detach[FORK LoadCache[]]};
UserProfile.CallWhenProfileChanges[SetUp];
END.
June 21, 1982 1:09 pm made patternMatch one of enumerated type of spelling classes.