This program implements the file spelling corrector.
Last Edited by: Teitelman, March 12, 1983 1:28 pm
DIRECTORY
ConvertUnsafe USING [AppendRope, ToRope],
Directory USING [GetNext, Lookup, Error, ignore],
IO USING [char, GetOutputStreamRope, GetToken, NUL, IDProc, Put, PutChar, PutRope, rope, RIS, ROS, STREAM],
MessageWindow USING [Append, Clear],
Process USING [Detach],
Rope USING [Cat, Concat, Equal, IsEmpty, Fetch, Find, Flatten, Length, Lower, ROPE, Substr, Text, ToRefText, Upper],
RopeInline USING [InlineFlatten],
Spell USING [AbortProc, ConfirmProc, defaultModes, extensionList, GeneratorFromProcs, GetMatchingList, GetTheOne, InformProc, IsAPattern, Modes, ROPE, SpellingGenerator, SpellingList],
SpellExtras USING [],
UnsafeStorage USING [GetSystemUZone],
UserProfile USING [CallWhenProfileChanges, ProfileChangedProc, Boolean],
WindowManager USING [WaitCursor, UnWaitCursor]
;
FileSpellImpl: CEDAR MONITOR
IMPORTS ConvertUnsafe, Directory, IO, MessageWindow, Process, Rope, RopeInline, Spell, UnsafeStorage, UserProfile, WindowManager
EXPORTS Spell, SpellExtras
SHARES Rope
-- to be able to say text.length
=
BEGIN OPEN Spell;
file correction
extensionList: PUBLIC Spell.SpellingList ← LIST["mesa", "cm", "config", "commands", "profile", "df", "doc", "log", "style", "abbreviations", "press", "bcd", "tioga", "mail"];
AddExtension:
PUBLIC
ENTRY
PROC [ext:
ROPE] = {
ENABLE UNWIND => NULL;
FOR l: Spell.SpellingList ← extensionList, l.rest
UNTIL l =
NIL
DO
IF Rope.Equal[l.first, ext, FALSE] THEN EXIT;
REPEAT
FINISHED => extensionList ← CONS[ext, extensionList];
ENDLOOP;
};
assumeFirstCharCorrect: BOOL;
SetUp: UserProfile.ProfileChangedProc = {
assumeFirstCharCorrect ← UserProfile.Boolean["Spell.assumeFirstCharCorrect" , FALSE];
};
GetTheFile:
PUBLIC
PROC [
unknown: ROPE,
defaultExt: ROPE ← NIL,
abort: AbortProc ← NIL,
confirm: ConfirmProc ← NIL,
inform: InformProc ← NIL,
modes: Modes ← NIL
]
RETURNS [correct: ROPE] = {
ENABLE UNWIND => NULL;
root, ext, msg: ROPE;
unwaitCursor, releaseGenerator: BOOLEAN;
IF Rope.IsEmpty[unknown] THEN RETURN[NIL];
IF modes = NIL THEN modes ← Spell.defaultModes;
[root, ext, correct] ← SetUpGenerator[unknown: unknown, defaultExt: defaultExt, abort: abort, confirm: confirm, inform: inform, modes: modes];
IF correct # NIL THEN {ReleaseGenerator[]; RETURN[correct]}; -- only mistake was extension.
releaseGenerator ← TRUE; -- the generator belongs to this call. it must be released.
msg ← Rope.Concat["Trying for spelling correction on ", root];
WindowManager.WaitCursor[];
IF ext # NIL THEN msg ← Rope.Cat[msg, ".", ext];
MessageWindow.Append[message: msg, clearFirst: TRUE];
unwaitCursor ← TRUE;
{
ENABLE
UNWIND =>
{
MessageWindow.Clear[];
IF unwaitCursor THEN WindowManager.UnWaitCursor[];
IF releaseGenerator THEN ReleaseGenerator[];
};
Inform: Spell.InformProc
-- [msg: ROPE] -- = {
IF releaseGenerator
THEN {ReleaseGenerator[]; releaseGenerator ←
FALSE};
-- see comment in Confirm below.
IF inform = NIL THEN RETURN;
IF ext # NIL AND Rope.Find[s1: unknown, s2: "."] # -1 THEN msg ← Rope.Cat[msg, ".", ext]; -- only output extension if extension in original unknown, as opposed to being obtained from defaultExt.
inform[msg]
};
Confirm: Spell.ConfirmProc
-- [msg: ROPE, timeout: INT, defaultConfirm: BOOL] RETURNS[yes: BOOLEAN] -- = {
i: INT = Rope.Find[msg, " -> "];
IF releaseGenerator THEN {ReleaseGenerator[]; releaseGenerator ← FALSE}; -- in case user simply ignores this and does something else, spelling corrector is not hung. Need to set releaseGenerator to FALSE so that if he comes back to this later and confirms, when another correction has started, won't mistakenly release the generator again, thereby allowing a third correction to disrupt the second one still in progress.
IF i = -1 THEN ERROR;
IF Rope.Find[s1: msg, s2: " ", pos1: i + 4] # -1 THEN RETURN[FALSE]; -- more than one candidate. don't bother confirming. CHECK THIS
IF confirm = NIL THEN RETURN[FALSE];
unwaitCursor ← FALSE;
WindowManager.UnWaitCursor[];
IF ext # NIL AND Rope.Find[s1: unknown, s2: "."] # -1 THEN msg ← Rope.Cat[msg, ".", ext]; -- only output extension if extension in original unknown, as opposed to being obtained from defaultExt.
RETURN[confirm[msg, timeout, defaultConfirm]];
}; -- end of confirm
main body
IF IsAPattern[unknown]
THEN {
l: LIST OF ROPE = Spell.GetMatchingList[pattern: root, generator: fileGenerator, abort: abort, confirm: Confirm, inform: Inform, modes: modes];
IF l # NIL AND l.rest = NIL THEN correct ← l.first ELSE correct ← NIL;
}
ELSE {
correct ← GetTheOne[unknown: root, generator: fileGenerator, abort: abort, confirm: IF confirm = NIL THEN NIL ELSE Confirm, inform: IF inform = NIL THEN NIL ELSE Inform, modes: modes];
IF correct =
NIL
AND fileGenState.stopAfter #
LAST[
CHARACTER]
AND
NOT assumeFirstCharCorrect
AND releaseGenerator
THEN
-- no candidates found in corresponding bucket, or else, one was found and user rejected confirmation. now search entire space.
TRUSTED {
fileGenState.stopAfter ← LAST[CHARACTER];
fileGenState.nextName.length ← 0;
fileGenState.currentName.length ← 0;
correct ← GetTheOne[unknown: root, generator: fileGenerator, abort: abort, confirm: IF confirm = NIL THEN NIL ELSE Confirm, inform: IF inform = NIL THEN NIL ELSE Inform, modes: modes];
};
};
}; -- end of inner block containing definitions of inform, confirm, etc.
IF correct #
NIL
AND ext #
NIL
THEN
correct ← Rope.Cat[correct, ".", ext];
MessageWindow.Clear[];
IF unwaitCursor THEN WindowManager.UnWaitCursor[];
IF releaseGenerator THEN ReleaseGenerator[];
};
GetFileExtension:
PUBLIC PROC [
unknown: Rope.ROPE,
spellingList: Spell.SpellingList ← Spell.extensionList,
abort: AbortProc ← NIL,
confirm: ConfirmProc ← NIL,
inform: InformProc ← NIL,
modes: Modes ← NIL
]
RETURNS [correct: Rope.
ROPE] = {
len, len1: INT;
ext1: ROPE;
len ← len1 ← Rope.Length[unknown];
IF len1 > 0
THEN
WHILE Rope.Fetch[unknown, len1 - 1] = '$
DO
len1 ← len1 - 1;
ENDLOOP;
IF len1 # len THEN ext1 ← Rope.Substr[base: unknown, len: len1] ELSE ext1 ← unknown;
FOR l: SpellingList ← spellingList, l.rest
UNTIL l =
NIL
DO
IF Rope.Equal[l.first, ext1, FALSE] THEN RETURN[unknown];
ENDLOOP;
correct ← GetTheOne[unknown: ext1, spellingList: spellingList, abort: abort, confirm: confirm, modes: modes]; -- dont print message. want to print it once for entire correction (which may fail).
IF correct #
NIL
THEN {
IF len # len1 THEN correct ← Rope.Concat[correct, Rope.Substr[base: unknown, start: len1]]; -- put $'s back
};
};
GetMatchingFileList:
PUBLIC
PROCEDURE [
unknown: ROPE,
defaultExt: ROPE ← NIL,
abort: AbortProc ← NIL,
confirm: ConfirmProc ← NIL,
inform: InformProc ← NIL,
modes: Modes ← NIL]
RETURNS [files: LIST OF ROPE] =
{
ENABLE UNWIND => NULL;
root, ext, correct, msg: ROPE;
unwaitCursor, releaseGenerator: BOOLEAN;
IF Rope.IsEmpty[unknown] THEN RETURN[NIL];
IF modes = NIL THEN modes ← Spell.defaultModes;
[root, ext, correct] ← SetUpGenerator[unknown: unknown, defaultExt: defaultExt, abort: abort, confirm: confirm, inform: inform, modes: modes];
IF correct # NIL THEN {ReleaseGenerator[]; RETURN[LIST[correct]]}; -- only mistake was extension.
releaseGenerator ← TRUE; -- the generator belongs to this call. it must be released.
IF inform #
NIL
THEN {
msg ← Rope.Concat["Trying for pattern completion on ", root];
IF ext # NIL THEN msg ← Rope.Cat[msg, ".", ext];
MessageWindow.Append[message: msg, clearFirst: TRUE];
WindowManager.WaitCursor[];
unwaitCursor ← TRUE;
};
{
ENABLE
UNWIND =>
{
MessageWindow.Clear[];
IF unwaitCursor THEN WindowManager.UnWaitCursor[];
IF releaseGenerator THEN ReleaseGenerator[];
};
Inform: Spell.InformProc
-- [msg: ROPE] -- = {
IF releaseGenerator THEN {ReleaseGenerator[]; releaseGenerator ← FALSE}; -- see comments in GetTheOne.
IF inform = NIL THEN RETURN;
IF ext # NIL AND Rope.Find[s1: unknown, s2: "."] # -1 THEN msg ← AddExt[msg]; -- only output extension if extension in original unknown, as opposed to being obtained from defaultExt.
inform[msg]
};
Confirm: Spell.ConfirmProc
-- [msg: ROPE, timeout: INT, defaultConfirm: BOOL] RETURNS[yes: BOOLEAN] -- = {
IF releaseGenerator
THEN {ReleaseGenerator[]; releaseGenerator ←
FALSE};
-- see comments in GetTheOne.
IF confirm = NIL THEN RETURN[FALSE];
unwaitCursor ← FALSE;
WindowManager.UnWaitCursor[];
IF ext # NIL AND Rope.Find[s1: unknown, s2: "."] # -1 THEN msg ← AddExt[msg]; -- only output extension if extension in original unknown, as opposed to being obtained from defaultExt.
RETURN[confirm[msg, timeout, defaultConfirm]];
};
AddExt:
PROC [r:
ROPE]
RETURNS [
ROPE] = {
OPEN IO;
stream1, stream2: STREAM;
token: ROPE;
stream1 ← RIS[r];
stream2 ← ROS[];
UNTIL Rope.Length[token ← stream1.GetToken[
IO.IDProc]] = 0
DO
stream2.PutRope[token];
IF NOT Rope.Equal[token, "->"] THEN stream2.Put[char['.], rope[ext]];
stream2.PutChar[' ];
ENDLOOP;
RETURN[stream2.GetOutputStreamRope[]];
};
IF
NOT IsAPattern[unknown]
THEN {
x: ROPE = GetTheOne[unknown: root, generator: fileGenerator, abort: abort, confirm: Confirm, inform: Inform, modes: modes];
IF x # NIL THEN files ← LIST[x] ELSE files ← NIL;
}
ELSE files ← Spell.GetMatchingList[pattern: root, generator: fileGenerator, abort: abort, confirm: Confirm, inform: Inform, modes: modes];
};
IF files #
NIL
AND ext #
NIL
THEN
FOR lst:
LIST
OF
ROPE ← files, lst.rest
UNTIL lst =
NIL
DO
lst.first ← Rope.Cat[lst.first, ".", ext];
ENDLOOP;
MessageWindow.Clear[];
IF unwaitCursor THEN WindowManager.UnWaitCursor[];
IF releaseGenerator THEN ReleaseGenerator[];
}; -- of GetMatchingFileList
in the case that the extension is a known extension, the file generator is set up to only generate files of that extension, and the root is used as the unknown in the correction.
SetUpGenerator:
PROC [
unknown: ROPE,
defaultExt: ROPE,
abort: AbortProc,
confirm: ConfirmProc,
inform: InformProc,
modes: Modes
]
RETURNS[root, ext, correct: ROPE] =
{
noExtension: BOOLEAN ← FALSE;
i: INT ← Rope.Find[s1: unknown, s2: "."];
firstChar: CHAR;
AcquireGenerator:
ENTRY
PROC =
{
IF fileGenerator =
NIL
THEN
-- first time.
TRUSTED {
s: LONG STRING = UnsafeStorage.GetSystemUZone[].NEW[StringBody[128]];
pathName: ROPE;
i, j: INT ← 0;
[] ← Directory.GetNext[pathName: "WorkDir>", currentName: "", nextName: s];
pathName ← ConvertUnsafe.ToRope[s];
UNTIL (j ← Rope.Find[s1: pathName, s2: ">", pos1: i]) = -1
DO
i ← j + 1;
ENDLOOP;
s.length ← i;
fileGenerator ← GeneratorFromProcs[
generate: FileGenerate,
clientData: fileGenState ←
NEW[FileGenStateRecord ← [
pathName: s,
scratchText: NEW[TEXT[128]],
currentName: UnsafeStorage.GetSystemUZone[].NEW[StringBody[128]],
nextName: UnsafeStorage.GetSystemUZone[].NEW[StringBody[128]]
]]
]
}
ELSE WHILE fileGenState.inUse DO WAIT fileGenState.nowFree ENDLOOP;
fileGenState.inUse ← TRUE;
IF fileCache = NIL THEN fileCache ← NEW[ARRAY CHARACTER['A..'Z] OF ROPE ← ALL[NIL]];
};
main body
root ← unknown;
ext ← defaultExt;
IF i = -1
THEN {
-- unknown does not have an extension
IF defaultExt =
NIL
THEN {
IF NOT IsAPattern[unknown] THEN noExtension ← TRUE} -- means only consider those without extension. The IsAPattern check is because Foo* means match everything.
ELSE
IF IsAPattern[defaultExt]
THEN {
-- e.g. "*press", meaning match .press and .mesa.press. only way to handle this is to treat the whole thing as a unit.
root ← Rope.Cat[root, ".", defaultExt];
ext ← NIL;
}
}
ELSE
IF i = Rope.Length[unknown] - 1
-- i.e. of form foo.
THEN {root ← Rope.Substr[base: unknown, len: i]; ext ← ""}
ELSE
IF IsAPattern[ext ← Rope.Substr[base: unknown, start: i + 1]]
THEN
-- treat whole name as unit
{ext ← NIL; root ← unknown}
ELSE
{
ext1: ROPE;
root ← Rope.Substr[base: unknown, len: i];
ext1 ← GetFileExtension[unknown: ext, abort: abort, confirm: confirm, modes: modes];
IF ext1 = NIL THEN {root ← unknown; ext ← NIL} -- the extension is not recognized, so treat whole name as a unit.
ELSE
IF ext1 # ext
THEN {
-- was corrected
r: ROPE = Rope.Cat[root, ".", ext1];
IF CheckForFile[r]
THEN {
-- extension was the only thing misspelled. return now correct name.
IF inform # NIL THEN inform[Rope.Cat[unknown, " -> ", r]];
correct ← r;
RETURN;
}
ELSE ext ← ext1;-- ext is known. spelling correct on root only.
};
};
AcquireGenerator[];
fileGenState.ext ← RopeInline.InlineFlatten[ext];
fileGenState.noExtension ← noExtension;
fileGenState.stopAfter ← LAST[CHARACTER];
TRUSTED {fileGenState.nextName.length ← 0; fileGenState.currentName.length ← 0};
fileGenState.scratchText.length ← 0;
IF (firstChar ← Rope.Upper[Rope.Fetch[unknown, 0]])
IN ['A..'Z]
THEN {
fileGenState.stopAfter ← firstChar;
FOR c:
CHAR
DECREASING
IN ['A..firstChar]
DO
IF useFileCache
AND
NOT Rope.IsEmpty[fileCache[c]]
AND CheckForFile[fileCache[c]]
THEN
TRUSTED {
ConvertUnsafe.AppendRope[to: fileGenState.nextName, from: fileCache[c]];
EXIT;
};
ENDLOOP;
};
}; -- of SetUpGenerator
ReleaseGenerator: ENTRY PROC = {fileGenState.inUse ← FALSE; NOTIFY fileGenState.nowFree};
file generator declaration
FileGenState: TYPE = REF FileGenStateRecord ← NIL;
FileGenStateRecord:
TYPE =
RECORD[
pathName, currentName, nextName: LONG STRING,
scratchText: REF TEXT,
ext: Rope.Text,
noExtension: BOOLEAN ← FALSE,
inUse: BOOL ← FALSE,
stopAfter: CHARACTER ← IO.NUL,
nowFree: CONDITION
];
fileGenerator: Spell.SpellingGenerator ← NIL;
fileGenState: FileGenState ← NIL;
fileCache: REF ARRAY CHARACTER['A..'Z] OF ROPE ← NIL; -- contains the last file just previous (in alphabetic order) to the indicated character. e.g. for B might be AtomsPrivate.BCD
useFileCache: BOOL ← TRUE;
buildFileCache: BOOL ← TRUE;
FileGenerate:
PROC [self: SpellingGenerator]
RETURNS [
REF
TEXT] =
TRUSTED {
state: FileGenState ← NARROW[self.clientData];
ext: Rope.Text = state.ext;
noExtension: BOOL = state.noExtension;
pathLen: NAT = state.pathName.length;
currentLen: NAT;
len: NAT;
s: LONG STRING;
DO
extDidntMatch: BOOL ← FALSE;
s ← state.currentName;
state.currentName ← state.nextName;
state.nextName ← s;
[] ← Directory.GetNext[pathName: state.pathName, currentName: state.currentName, nextName: state.nextName];
IF state.nextName.length = 0 THEN RETURN[NIL];
IF state.currentName.length = 0
OR Rope.Upper[state.nextName[pathLen]] > Rope.Upper[state.currentName[pathLen]]
THEN
-- new letter (assumes enumeration in alphabetic order)
{
cache: ROPE;
len: INT;
currentChar: CHAR = Rope.Upper[state.nextName[pathLen]];
IF currentChar > fileGenState.stopAfter THEN RETURN[NIL]; -- says that if you are doing a pattern match and first character is not a *, ok to stop as soon as you have passed that character.
see if this entry is already in the cache (avoid allocations).
IF currentChar
IN ['A..'Z]
THEN {
cache ← fileCache[currentChar];
len ← Rope.Length[cache];
FOR i:
INT
IN [0..state.currentName.length)
DO
IF i >= len
OR state.currentName[i] # Rope.Fetch[cache, i]
THEN {
IF buildFileCache THEN fileCache[currentChar] ← ConvertUnsafe.ToRope[state.currentName]; -- insert in cache.
EXIT;
};
ENDLOOP;
}
};
currentLen ← state.nextName.length;
len ← currentLen - pathLen;
copy characters from long string into Rope.Text, skipping over pathName characters
FOR i:
NAT
IN [pathLen..currentLen)
DO
char: CHARACTER ← state.nextName[i];
IF char # '. THEN NULL
ELSE IF noExtension THEN GOTO Fail
ELSE
IF ext #
NIL
THEN
-- NIL means look at all extensions, i.e. treat the entire name as a unit.
{
-- compare extension with ext. If not equal, do not consider this candidate.
i1: NAT ← i + 1;
FOR j:
NAT
IN [0..ext.length)
DO
IF i1 = currentLen THEN GOTO Fail; -- unaccounted for characters in target extension
IF Rope.Lower[ext[j]] # Rope.Lower[state.nextName[i1]] THEN GOTO MaybeFail; -- target = .press and ext (at this point) is mesa.press
i1 ← i1 + 1;
REPEAT
FINISHED => IF i1 < currentLen THEN GOTO MaybeFail; -- unaccounted for character in unknown extension, e.g. .press.press where target is .press
ENDLOOP;
len ← i - pathLen;
extDidntMatch ← FALSE;
EXIT;
EXITS
MaybeFail => extDidntMatch ← TRUE;
};
state.scratchText[i - pathLen] ← char;
REPEAT
Fail => LOOP;
FINISHED => IF (ext # NIL AND NOT noExtension) OR extDidntMatch THEN LOOP; -- to get here, either this file doesnt have an extension, or else ext = NIL, i.e. consider all files. This test says that if an extension is required, then it must be the case that this file does not have one, so reject it.
ENDLOOP;
state.scratchText.length ← len;
RETURN[state.scratchText];
ENDLOOP;
};
LoadCache:
PROC = {
[] ← SetUpGenerator[unknown: "zzz", defaultExt: NIL, abort: NIL, confirm: NIL, inform: NIL, modes: NIL];
DO
IF FileGenerate[fileGenerator] = NIL THEN EXIT;
ENDLOOP;
ReleaseGenerator[]
};
cloned from userexec to make spell totally independent.
CheckForFile:
PROC [file:
ROPE]
RETURNS [found:
BOOLEAN] =
TRUSTED
{
fName: LONG STRING;
fName ← LOOPHOLE[Rope.ToRefText[file]];
found ← TRUE;
IF Rope.Length[file] = 0 THEN RETURN[FALSE];
[] ← Directory.Lookup[fileName: fName, permissions: Directory.ignore
! Directory.Error =>
{found ← FALSE; CONTINUE}
];
TRUSTED {Process.Detach[FORK LoadCache[]]};
UserProfile.CallWhenProfileChanges[SetUp];
END.
June 21, 1982 1:09 pm made patternMatch one of enumerated type of spelling classes.