<> <> <> DIRECTORY Generator USING [CreateGenerator, ReStart, Terminate, Generate, Handle], List USING [DReverse], Rope USING [Find, Fetch, Match, Equal, IsEmpty, Length, Cat, Compare, Flatten, FromRefText, Letter, ROPE, Text], RopeInline USING [InlineFlatten], SafeStorage USING [NarrowRefFault], ShowTime USING [Microseconds, GetMark, SinceMark], Spell USING [ROPE, Modes, ModesRecord, SpellingList, SpellingGenerator, AbortProc, InformProc, ConfirmProc, Filter, CorrectionClass, SpellingGeneratorRecord], UserProfile USING [CallWhenProfileChanges, ProfileChangedProc, Token, Number, Boolean] ; SpellImpl: CEDAR MONITOR IMPORTS List, Rope, RopeInline, ShowTime, UserProfile, Generator, SafeStorage EXPORTS Spell SHARES Rope -- to be able to say text.length = BEGIN OPEN Spell; <> fastypeflg: BOOLEAN = TRUE; Percentage: TYPE = REAL; howClose: REAL = .7; <> defaultModes: PUBLIC Modes _ NEW[ModesRecord _ [inform: NULL, confirm: NULL, disabled: NULL, timeout: NULL, defaultConfirm: NULL ]]; SetModes: UserProfile.ProfileChangedProc = CHECKED { GetCorrectionClass: SAFE PROC [key: ROPE, default: ROPE, rope: ROPE _ NIL] RETURNS[CorrectionClass] = { r: ROPE = IF rope # NIL THEN rope ELSE UserProfile.Token[key, default]; FOR l: LIST OF RECORD[rope: ROPE, class: CorrectionClass] _ LIST[["never", never], ["someMistakes", someMistakes], ["allAccountedFor", allAccountedFor], ["caseError", caseError], ["patternMatch", patternMatch], ["always", always]], l.rest UNTIL l = NIL DO IF Rope.Equal[r, l.first.rope, FALSE] THEN RETURN[l.first.class]; ENDLOOP; IF rope = NIL THEN RETURN[GetCorrectionClass[key, default, default]]; -- user has somethig else in the profile, return the default. PRINT A MESSAGE? ERROR; -- shouldnt happen }; modes: REF ModesRecord; i: INT; TRUSTED {modes _ LOOPHOLE[defaultModes]}; -- defaultmodes is REF READONLY modes.inform _ GetCorrectionClass["Spell.inform", "allAccountedFor"]; modes.confirm _ GetCorrectionClass["Spell.confirm", "allAccountedFor"]; modes.disabled _ GetCorrectionClass["Spell.disabled", "never"]; modes.timeout _ UserProfile.Number["Spell.Timeout", -1]; modes.defaultConfirm _ UserProfile.Boolean["Spell.defaultConfirm", FALSE]; i _ UserProfile.Number["Spell.giveUpAfter" , -1]; IF i > 0 THEN giveUpAfter _ (i * 1000) ELSE giveUpAfter _ 0; -- giveUpAfter is in milliseconds in profile, but need it in microseconds to compare using ShowTime.SinceMark. }; <> <> IsAPattern: PUBLIC PROC [unknown: ROPE] RETURNS[BOOL] = { i: INT _ 0; WHILE (i _ Rope.Find[s1: unknown, s2: "*", pos1: i]) # -1 DO IF i = 0 OR Rope.Fetch[unknown, i - 1] # '' THEN RETURN[TRUE]; i _ i + 1; ENDLOOP; RETURN[FALSE]; }; -- of IsAPattern Match: PROC [pattern, object: ROPE] RETURNS[BOOL] = { RETURN[Rope.Match[ pattern: pattern, object: object, case: FALSE]]; }; CaseEqual: PROCEDURE [x, y: ROPE] RETURNS[BOOLEAN] = INLINE { RETURN[Rope.Compare[x, y, FALSE] = equal]; }; -- of CaseEqual <> GetTheOne: PUBLIC PROCEDURE [ unknown: ROPE, spellingList: SpellingList _ NIL, generator: SpellingGenerator _ NIL, abort: AbortProc _ NIL,-- abort = NIL => never abort. confirm: ConfirmProc _ NIL, -- confirm = NIL => not confirmed. inform: InformProc _ NIL,-- inform = NIL => no output. filter: Filter _ NIL, modes: Modes _ NIL ] RETURNS [ROPE] = { correct, r: ROPE; correctionClass: CorrectionClass; IF Rope.IsEmpty[unknown] THEN RETURN[NIL]; IF modes = NIL THEN modes _ defaultModes; IF IsAPattern[unknown] THEN {l: LIST OF ROPE; IF modes.disabled >= patternMatch THEN RETURN[NIL]; l _ GetMatches[unknown: unknown, spellingList: spellingList, generator: generator, abort: abort, filter: filter]; IF l # NIL AND l.rest = NIL THEN RETURN[l.first] ELSE RETURN[NIL] }; IF Rope.Length[unknown] = 0 OR modes.disabled = always THEN RETURN[NIL]; [correct, correctionClass] _ Choose[unknown: unknown, spellingList: spellingList, generator: generator, filter: filter, abort: abort]; IF correct = NIL OR correctionClass <= modes.disabled OR Rope.Equal[correct, unknown] THEN RETURN[NIL]; r _ Rope.Cat[unknown, " -> ", correct]; IF correctionClass <= modes.confirm THEN RETURN[IF confirm # NIL AND confirm[msg: r, timeout: modes.timeout, defaultConfirm: modes.defaultConfirm] THEN correct ELSE NIL]; IF correctionClass <= modes.inform AND inform # NIL THEN inform[r]; RETURN[correct]; }; GetMatchingList: PUBLIC PROCEDURE [ pattern: ROPE, spellingList: SpellingList _ NIL, generator: SpellingGenerator _ NIL, abort: AbortProc _ NIL, confirm: ConfirmProc _ NIL, inform: InformProc _ NIL, filter: Filter _ NIL, modes: Modes _ NIL ] RETURNS [LIST OF ROPE] = { values: LIST OF ROPE; IF Rope.IsEmpty[pattern] THEN RETURN[NIL]; IF modes = NIL THEN modes _ defaultModes; IF ~IsAPattern[pattern] THEN {x: ROPE = GetTheOne[unknown: pattern, spellingList: spellingList, generator: generator, abort: abort, confirm: confirm, inform: inform, filter: filter, modes: modes]; IF x # NIL THEN RETURN[LIST[x]] ELSE RETURN[NIL]; }; IF modes.disabled >= patternMatch THEN RETURN[NIL]; values _ GetMatches[unknown: pattern, spellingList: spellingList, generator: generator, abort: abort, filter: filter]; IF values = NIL THEN RETURN[NIL]; IF modes.confirm >= patternMatch THEN {r: ROPE; FOR lst: LIST OF ROPE _ values, lst.rest UNTIL lst = NIL DO r _ Rope.Cat[r, lst.first, " "]; ENDLOOP; IF confirm # NIL AND confirm[msg: Rope.Cat[pattern, " -> ", r], timeout: modes.timeout, defaultConfirm: modes.defaultConfirm] THEN RETURN[values] ELSE RETURN[NIL]; }; IF inform # NIL AND modes.inform >= patternMatch THEN {r: ROPE; FOR lst: LIST OF ROPE _ values, lst.rest UNTIL lst = NIL DO r _ Rope.Cat[r, lst.first, " "]; ENDLOOP; inform[msg: Rope.Cat[pattern, " -> ", r]] ; }; RETURN[values]; }; <> giveUpAfter: ShowTime.Microseconds; -- microSeconds startedAt: ShowTime.Microseconds; Choose: PROCEDURE[ unknown: ROPE, spellingList: SpellingList, generator: SpellingGenerator, filter: Filter _ NIL, abort: AbortProc _ NIL ] RETURNS [val: ROPE, correctionClass: CorrectionClass] = { private: REF SpellingGeneratorPrivateRecord _ IF generator = NIL THEN NIL ELSE generator.private; Choose0: PROC = { lst: LIST OF ROPE _ NIL; candidate: REF ANY; tword: Rope.Text; nctword, ncxword, sc: NAT; close: BOOLEAN; howClose1, tem: Percentage _ howClose; MakeRope: PROC [candidate: REF ANY] RETURNS [text: Rope.ROPE] = { WITH candidate SELECT FROM t: REF TEXT => RETURN[Rope.FromRefText[t]]; r: Rope.ROPE => RETURN[r]; ENDCASE => ERROR; -- should already have determined that it is either a REF TEXT or ROPE }; IF giveUpAfter > 0 THEN startedAt _ ShowTime.GetMark[]; DO IF abort # NIL THEN abort[]; IF giveUpAfter > 0 AND ShowTime.SinceMark[from: startedAt] > giveUpAfter THEN EXIT; IF generator # NIL THEN TRUSTED { candidate _ private.generate[generator]; IF candidate = NIL THEN EXIT; WITH candidate SELECT FROM t: REF TEXT => tword _ LOOPHOLE[t, Rope.Text]; -- will be copied if it is going to be retained. r: Rope.Text => tword _ r; r: Rope.ROPE => tword _ RopeInline.InlineFlatten[r]; ENDCASE => ERROR SafeStorage.NarrowRefFault[ref: candidate, targetType: CODE[Rope.ROPE]]; } ELSE IF spellingList = NIL THEN EXIT ELSE { tword _ RopeInline.InlineFlatten[spellingList.first]; candidate _ tword; spellingList _ spellingList.rest; }; nctword _ tword.length; ncxword _ xword.length; IF (IF nctword > ncxword THEN (ncxword + 0.0) / nctword < howClose1 ELSE (nctword + 0.0) / (ncxword - ndbls) < howClose1) THEN LOOP; -- Checks to see if test word and unknown word differ sufficiently in number of characters so as to make it unnecessary to even call score. First case is where test word is longer than xword. If number of characters in xword, divided by number of characters in twest word is less than REL, then dont bother. Second case where xword is longer than test word, must allow for possibility of dobuled characters. [close, sc, ncxword] _ Score[xword, tword]; SELECT TRUE FROM ~ close => LOOP; filter # NIL AND ~filter[candidateRope: tword, unknown: unknown] => LOOP; sc = 0 => { val _ MakeRope[candidate]; IF CaseEqual[val, unknown] THEN correctionClass _ caseError ELSE correctionClass _ allAccountedFor; RETURN; }; (tem _ Choose1[xword.length, tword.length, sc]) > howClose1 => { lst _ CONS[MakeRope[candidate], NIL]; howClose1 _ tem }; sc = howClose1 => lst _ CONS[MakeRope[candidate], lst]; -- might be something better later on ENDCASE; ENDLOOP; -- end of main loop IF lst # NIL AND lst.rest = NIL THEN { val _ lst.first; correctionClass _ someMistakes; }; }; -- of Choose0 isAPattern: BOOL = IsAPattern[unknown]; xword: Rope.Text ; c: CHARACTER; ndbls: NAT _ 0; xword _ RopeInline.InlineFlatten[unknown]; IF unknown = NIL OR (spellingList = NIL AND generator = NIL) THEN RETURN [NIL, never]; c _ xword[0]; FOR i: NAT IN [1 ..xword.length) DO IF c = xword[i] THEN ndbls _ ndbls + 1; c _ xword[i]; -- computes number of double characters in string. ENDLOOP; IF generator # NIL THEN {IF private.initialize # NIL THEN private.initialize[generator]}; Choose0[ ! ABORTED => IF generator # NIL AND private.terminate # NIL THEN private.terminate[generator]; UNWIND => IF generator # NIL AND private.terminate # NIL THEN private.terminate[generator]; ]; IF generator # NIL AND private.terminate # NIL THEN private.terminate[generator]; }; -- of Choose Choose1: PROCEDURE[a, b, sc: NAT] RETURNS [REAL] = INLINE { <> <> <> <> RETURN[(a + b - 2 * sc + 0.0) / (a + b)]; }; -- of Choose1 Score: PROCEDURE[xword, tword: Rope.Text] RETURNS[close: BOOLEAN, mistakes: NAT, ncxword: NAT] = { xpos, tpos: NAT _ 0; -- current character position in xword,tword xc, tc: CHARACTER; -- current character in xword, tword, equal to xword[xpos],tword[tpos] but used to improve readability xclast: CHARACTER; -- last character looked at in xword. used for double character detection. empty: NAT = LAST[NAT]; --indicates a buffer is empty x1, x2, t1, t2: NAT _ empty; -- position of characters that did not match in xword, tword respectively. These are effectively single character buffers. n, ntrans: NAT _ 0; -- number of mistakes, number of transpositions CheckCharBuffer: PROCEDURE[currchar: CHARACTER, word: Rope.Text, buffer, currpos: NAT] RETURNS[BOOLEAN] = INLINE { IF buffer # empty AND (currchar = word[buffer] OR currchar = word[buffer] + 32 AND currchar >= 'a OR word[buffer] = currchar + 32 AND word[buffer] >= 'a) THEN {IF currpos -buffer <= 3 THEN ntrans _ ntrans + 1 -- count it as a transposition ELSE n _ n + 1; -- shifted more than two characters, count it as an error RETURN[TRUE] }; RETURN[FALSE]; }; -- of CheckCharBuffer NotAlpha: PROC[word: Rope.Text, buffer: NAT] RETURNS[BOOLEAN] = INLINE {RETURN[NOT Rope.Letter[word[buffer]]]; }; ncxword _ xword.length; DO -- main body of score is one big loop SELECT TRUE FROM xpos = xword.length => IF tpos = tword.length THEN EXIT ELSE tc _ tword[tpos]; --otherwise drop thru and check single character buffers tpos = tword.length => xc _ xword[xpos]; -- drop through to check xc against tbuffers (xc _ xword[xpos]) = (tc _ tword[tpos]) OR (SELECT tc FROM IN['!..'/] => xc = tc + 16, --!../ to corresponding unshifted characters IN['1..'?] => xc = tc - 16, -- 1..? to corresponding shifted characters ENDCASE => FALSE) OR xc = tc + 32 AND xc >= 'a -- xc is lower case equivalent of tc OR tc = xc + 32 AND tc >= 'a -- tc is lower case equivalent of xc OR tc = '1 AND (xc = 'L OR xc = 'l) -- misreading of manual => {xpos _ xpos + 1; tpos _ tpos + 1; xclast _ xc; LOOP}; ENDCASE; <> SELECT TRUE FROM xpos = xword.length => NULL; CheckCharBuffer[buffer: t2, currchar: xc, word: tword, currpos: tpos] => -- character encountered in tword before xword, e.g. the P in IPRNT vs PRINT. The case of RPINT vs PRINT is handled specially without ever going to the buffers {t2 _ empty; xpos _ xpos + 1; xclast _ xc; LOOP}; CheckCharBuffer[buffer: t1, currchar: xc, word: tword, currpos: tpos] => {IF t2 # empty THEN {t1 _ t2; t2 _ empty} ELSE t1 _ empty; xpos _ xpos + 1; xclast _ xc; LOOP; }; ENDCASE; <> SELECT TRUE FROM tpos = tword.length => NULL; CheckCharBuffer[buffer: x2, currchar: tc, word: xword, currpos: xpos] => -- character encountered in xword before tword, e.g. the I in IPRNT vs PRINT. {x2 _ empty; tpos _ tpos + 1; LOOP}; CheckCharBuffer[buffer: x1, currchar: tc, word: xword, currpos: xpos] => {IF x2 # empty THEN {x1 _ x2; x2 _ empty} ELSE x1 _ empty; tpos _ tpos + 1; <> LOOP; }; xpos + 1 < xword.length AND tpos + 1 < tword.length AND xc = tword[tpos + 1] AND tc = xword[xpos + 1] AND (tpos + 2 = tword.length OR tc # tword[tpos + 2]) => <> {xclast _ xword[xpos + 1]; xpos _ xpos + 2; tpos _ tpos + 2; ntrans _ ntrans + 1; LOOP; }; tword.length - tpos > xword.length - xpos => -- more characters left in tword than xword, remove character from tword SELECT TRUE FROM t1 = empty => {t1 _ tpos; -- save character in t1 tpos _ tpos + 1; LOOP}; t2 = empty => {t2 _ tpos; tpos _ tpos + 1; LOOP}; ENDCASE => GOTO Failed; -- too many errors. abort ENDCASE; IF xc = xclast OR (xpos + 1 < xword.length AND xc = xword[xpos + 1]) THEN -- double characters {xpos _ xpos + 1; ncxword _ ncxword - 1; } -- When computing value of score, want to divide number of mistakes by actual lenghth of word i.e. length minus number of doubled characters. Otehrwise, making a word longer by adding extra characters will make it closer, e.g. ZZZZZZ would correct to PP. ELSE {IF x1 = empty THEN x1 _ xpos ELSE IF x2 = empty THEN x2 _ xpos ELSE GOTO Failed; xpos _ xpos + 1; }; ENDLOOP; <> IF x2 # empty THEN { IF NotAlpha[xword, x2] THEN GOTO Failed; -- e.g. if unknown contains a $, and thats not accounted for, then shouldnt correct. n _ n +2; } ELSE IF x1 # empty THEN {IF NotAlpha[xword, x1] THEN GOTO Failed; n _ n +1; }; IF t2 # empty THEN {IF NotAlpha[tword, t2] THEN GOTO Failed; n _ n +2; } ELSE IF t1 # empty THEN {IF NotAlpha[tword, t1] THEN GOTO Failed; n _ n +1}; IF xpos = xword.length AND tpos = tword.length AND t1 # empty AND x1 # empty THEN {IF t1 = x1 OR t1 = x2 THEN n _ n-1; IF t2 # empty AND (t2 = x1 OR t2 = x2) THEN n _ n-1; }; -- check for substitution errors. subtracts one so net effect is only counted as one. RETURN[TRUE, (IF n <= 1 THEN n ELSE n + ntrans), ncxword] ; -- count transpositions as errors if more than one other mistake. (maybe should go to fractional errors?) EXITS Failed => RETURN[FALSE, 0, 0]; }; -- of Score <> GetMatches: PROCEDURE[ unknown: ROPE, spellingList: SpellingList _ NIL, generator: SpellingGenerator _ NIL, filter: Filter _ NIL, abort: AbortProc _ NIL ] RETURNS [lst: LIST OF ROPE] = { private: REF SpellingGeneratorPrivateRecord _ IF generator = NIL THEN NIL ELSE generator.private; GetMatches0: PROC = { candidate: REF ANY; tword: Rope.Text; MakeRope: PROC [candidate: REF ANY] RETURNS [text: Rope.ROPE] = { WITH candidate SELECT FROM t: REF TEXT => RETURN[Rope.FromRefText[t]]; r: Rope.ROPE => RETURN[r]; ENDCASE => ERROR; -- should already have determined that it is either a REF TEXT or ROPE }; <
> DO IF abort # NIL THEN abort[]; IF generator # NIL THEN { candidate _ private.generate[generator]; IF candidate = NIL THEN EXIT; WITH candidate SELECT FROM t: REF TEXT => TRUSTED {tword _ LOOPHOLE[t, Rope.Text]}; -- will be copied if it is going to be retained. r: Rope.Text => tword _ r; r: Rope.ROPE => tword _ Rope.Flatten[r]; ENDCASE => ERROR SafeStorage.NarrowRefFault[ref: candidate, targetType: CODE[Rope.ROPE]]; } ELSE IF spellingList = NIL THEN EXIT ELSE {candidate _ tword _ RopeInline.InlineFlatten[spellingList.first]; spellingList _ spellingList.rest}; IF Match[xword, tword] THEN {IF filter # NIL AND ~filter[candidateRope: tword, unknown: unknown] THEN LOOP; lst _ CONS[MakeRope[candidate], lst]; }; ENDLOOP; TRUSTED { IF lst # NIL AND lst.rest # NIL THEN lst _ LOOPHOLE[List.DReverse[LOOPHOLE[lst, LIST OF REF ANY]], LIST OF ROPE]; }; }; -- of GetMatches0 xword: Rope.Text; xword _ RopeInline.InlineFlatten[unknown]; IF unknown = NIL OR (spellingList = NIL AND generator = NIL) THEN RETURN [NIL]; IF generator # NIL THEN {IF private.initialize # NIL THEN private.initialize[generator]}; GetMatches0[]; IF generator # NIL THEN {IF private.terminate # NIL THEN private.terminate[generator]}; }; -- of GetMatches <> <> SpellingGeneratorPrivateRecord: PUBLIC TYPE = RECORD[ initialize: PROCEDURE[self: SpellingGenerator] _ NIL, generate: PROCEDURE [self: SpellingGenerator] RETURNS [candidate: REF ANY], terminate: PROCEDURE[self: SpellingGenerator] _ NIL -- to be called when finished. ]; GeneratorFromProcs: PUBLIC PROC [ initialize: PROCEDURE[self: SpellingGenerator] _ NIL, generate: PROCEDURE [self: SpellingGenerator] RETURNS [candidate: REF ANY], terminate: PROCEDURE[self: SpellingGenerator] _ NIL, -- to be called when finished. clientData: REF ANY _ NIL ] RETURNS [SpellingGenerator] = { RETURN[NEW[Spell.SpellingGeneratorRecord _ [ clientData: clientData, private: NEW[SpellingGeneratorPrivateRecord _ [ initialize: initialize, generate: generate, terminate: terminate ] ] ] ]]; }; GeneratorFromEnumerator: PUBLIC PROC [enumerator: PROC[self: Generator.Handle], clientData: REF ANY] RETURNS [SpellingGenerator] = { RETURN[NEW[Spell.SpellingGeneratorRecord _ [ clientData: NEW[EnumeratorRecord _ [enumerator: enumerator, origClientData: clientData]], private: NEW[SpellingGeneratorPrivateRecord _ [ initialize: Initialize, generate: Generate, terminate: Terminate ] ] ] ]] }; EnumeratorRecord: TYPE = RECORD [ enumerator: PROC [self: Generator.Handle], origClientData: REF ANY, -- the client data that was the argument to GeneratorFromEnumerator. Gets put into the clientData field of the generator. generator: Generator.Handle _ NIL -- not created until generator is actually used the first time. ]; Initialize: PROC [self: SpellingGenerator] = { state: REF EnumeratorRecord _ NARROW[self.clientData]; IF state.generator = NIL THEN state.generator _ Generator.CreateGenerator[state.enumerator, state.origClientData] ELSE Generator.ReStart[state.generator]; }; -- of Initialize Generate: PROC [self: SpellingGenerator] RETURNS[ROPE] = { state: REF EnumeratorRecord _ NARROW[self.clientData]; val: REF ANY = Generator.Generate[state.generator]; RETURN[IF val = state.generator THEN NIL ELSE NARROW[val, ROPE]]; }; Terminate: PROC [self: SpellingGenerator] = { state: REF EnumeratorRecord _ NARROW[self.clientData]; Generator.Terminate[state.generator]; }; <> UserProfile.CallWhenProfileChanges[SetModes]; END.