File: KeyNoteCommandToolImpl.mesa
Copyright Ó 1985, 1987 by Xerox Corporation. All rights reserved.
Jack Kent February 24, 1988 10:59:34 am PST
Contents: Implementation of KeyNote
DIRECTORY
Ascii USING [Letter],
Basics,
Commander USING [CommandProc, Register, Handle],
CommandTool USING [ArgumentVector, Parse, ParseToList],
Convert,
List,
FS,
IO,
Real,
RefText,
Rope,
KeyNote,
KeyNotePseudoServer,
SymTab;
KeyNoteCommandToolImpl: CEDAR PROGRAM
IMPORTS Ascii, Commander, CommandTool, Convert, FS, List, IO, Real, RefText, Rope, SymTab, KeyNote, KeyNotePseudoServer = {
ROPE: TYPE = Rope.ROPE;
KeyBD: Commander.CommandProc = {
argv: CommandTool.ArgumentVector ← CommandTool.Parse[cmd];
databaseName: ROPE ← argv[1];
fileNamesToMatch: ROPE ← argv[2];
stopListFileName: ROPE ← argv[3];
wvp: KeyNote.WordVerifierProc = {
EachCharInWord: Rope.ActionType = {
IF Ascii.Letter[ch: c] THEN
quit ← FALSE
ELSE {
quit ← TRUE;
};
};
IF SymTab.Fetch[x:NARROW[clientData], key: word].found THEN RETURN[FALSE];
RETURN[~Rope.Map[base: word, action: EachCharInWord]];
};
now get a handle on things
stream: IO.STREAMFS.StreamOpen[fileName: stopListFileName];
block: REF TEXT ← RefText.New[FS.FileInfo[stopListFileName].bytes];
handle: KeyNote.Handle;
[] ← IO.GetBlock[self: stream, block: block];
handle ← KeyNote.OpenDatabase[databaseName: databaseName, fileNamesToMatch: NEW[KeyNote.FileNamesToMatchObject ← [pattern: fileNamesToMatch]], wordVerifierProc: wvp, tokenRelevanceThreshhold: 2, clientDataForVerifierProc: RopeToSymTab[RefText.TrustTextAsRope[text: block]] ! KeyNote.Error => { msg ← Rope.Cat[ "Problems during database building", " ",Convert.RopeFromAtom[ec], ":", explanation]; GO TO Die}
];
KeyNote.CloseDatabase[handle];
IO.Close[self: stream];
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "Usage: KeyBD TokenDatabaseName pattern stopListFileName"];
};
KeyWM: Commander.CommandProc = {
databaseName: ROPE;
handle: KeyNote.Handle;
resultList: KeyNote.ResultList;
displayOverAllWeight: BOOLEANTRUE;
displayTokensInEachFile: BOOLEANTRUE;
displayThreshhold: REAL ← 0.10;
argv: CommandTool.ArgumentVector ← CommandTool.Parse[cmd: cmd];
EliminateIrrelevantFiles: PROC [relevantAndIrrelevantFiles: KeyNote.ResultList, threshHold: REAL] RETURNS [ relevantFiles: KeyNote.ResultList] = {
RemoveIrrelevantItem: PROC [ item: REF ANY, list: List.LORA] = {
IF Real.CompareREAL[NARROW[item, REF KeyNote.ResultObject].overAllWeight, threshHold] # less THEN
relevantFiles ← List.Nconc1[list: relevantFiles, ref: item];
};
List.Map[relevantAndIrrelevantFiles, RemoveIrrelevantItem];
};
ProcessSwitches: PROC [arg: ROPE, argNext: ROPE] = {
FOR index: INT IN [0..Rope.Length[arg]) DO
SELECT Rope.Fetch[arg, index] FROM
'w, 'W => {displayOverAllWeight ← FALSE };
't, 'T => {displayTokensInEachFile ← FALSE };
'q, 'Q=> {
displayThreshhold ← Convert.RealFromRope[r:argNext];
};
ENDCASE;
ENDLOOP;
};
FOR i: NAT IN [1..argv.argc) DO
arg: ROPE = argv[i];
argNext: ROPE = IF (argv.argc-1)=i THEN NIL ELSE argv[i+1];
SELECT Rope.Fetch[arg, 0] FROM
'- => ProcessSwitches[arg, argNext! Convert.Error => { msg ← "illegal threshold"; GO TO Die} ];
'/, '[ => {
recognize server by [mumble or /mumble
databaseName ← NARROW[argv[i]];
FOR j: NAT IN [i+1..argv.argc) DO
resultList ← CONS[argv[j], resultList];
ENDLOOP;
EXIT;
};
ENDCASE =>NULL;
ENDLOOP;
IF Real.CompareREAL[displayThreshhold,1.0] = greater OR
Real.CompareREAL[displayThreshhold,0.0] = less
THEN {
msg ← "threshold must be between 0 and 1"; GO TO Die };
handle ← KeyNote.OpenDatabase[databaseName: databaseName ! KeyNote.Error => { msg ← "Problems With KeyNote Database"; GO TO Die} ];
now get a handle on things
resultList ← KeyNote.FindDocumentsFromWords[db: handle, ropeList: resultList ];
KeyNote.CloseDatabase[handle];
IF resultList#NIL THEN {
resultList ← EliminateIrrelevantFiles[resultList, displayThreshhold * NARROW[resultList.first, REF KeyNote.ResultObject].overAllWeight];
chop off appropriate part of result List
OutputNames[cmd, resultList, displayOverAllWeight, displayTokensInEachFile];
};
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "Usage: KeyWM {switch} TokenDatabaseName "];
};
KeyStop: Commander.CommandProc = {
argv: CommandTool.ArgumentVector ← CommandTool.Parse[cmd];
stopListCutOff: INT ← Convert.IntFromRope[argv[1]];
stopListFileName: ROPE ← argv[3];
fileNamesToMatch: ROPE ← argv[2];
wordList: KeyNote.ResultList ← KeyNote.BuildStopList[fileNamesToMatch: NEW[KeyNote.FileNamesToMatchObject ← [pattern: fileNamesToMatch]], stopListCutOff: stopListCutOff];
stream: IO.STREAMFS.StreamOpen[fileName: stopListFileName, accessOptions: create ];
TRUSTED {
IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[ListToRope[wordList]], REF TEXT]];
IO.Close[self: stream]; };
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "Usage: KeyStop stopListCutOff pattern stopListFileName"];
};
keyNoteRegistry: ROPE ← "keyNoteRegistry.txt";
register, list and unregister keyNote databases
KeyRegister: Commander.CommandProc = TRUSTED {
maybe make sure that comment has no carriage returns
dataBaseInfo: ROPE ← Rope.Concat[ListToRope[LOOPHOLE[CommandTool.ParseToList[cmd].list, List.LORA]],"\n"];
stream: IO.STREAMFS.StreamOpen[fileName: keyNoteRegistry, accessOptions: append ];
IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[dataBaseInfo], REF TEXT]];
IO.Flush[self: stream];
IO.Close[self: stream];
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "KeyRegister keyNoteDatabase {Description of keyNoteDatabase}"];
};
KeyList: Commander.CommandProc = {
block: REF TEXT ← RefText.New[FS.FileInfo[keyNoteRegistry].bytes];
stream: IO.STREAMFS.StreamOpen[fileName: keyNoteRegistry, accessOptions: read];
[] ← IO.GetBlock[self: stream, block: block];
IO.PutBlock[self: cmd.out, block: block];
IO.Close[self: stream];
};
KeyUnregister: Commander.CommandProc = {
argv: CommandTool.ArgumentVector ← CommandTool.Parse[cmd];
databaseToBeUnregistered: ROPE ← argv[1];
block: REF TEXT ← RefText.New[FS.FileInfo[keyNoteRegistry].bytes];
stream: IO.STREAMFS.StreamOpen[fileName: keyNoteRegistry, accessOptions: write];
fileNames: ROPE;
startPos: INT;
[] ← IO.GetBlock[self: stream, block: block];
fileNames ← RefText.TrustTextAsRope[block];
IF (startPos ← Rope.Find[fileNames, databaseToBeUnregistered]) # -1 THEN {
look for carriage return
endPos: INT ← Rope.Find[fileNames, "\n", startPos];
fileNames ← Rope.Concat[
Rope.Substr[base: fileNames, start: 0, len: startPos],
Rope.Substr[base: fileNames, start: endPos+2 ]
]
}
ELSE {
msg ← "Database Not Registered";
GO TO Die};
IO.SetIndex[stream, 0];
TRUSTED {
IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[fileNames], REF TEXT ]];
};
IO.SetLength[stream, Rope.InlineSize[fileNames]];
IO.Flush[self: stream];
IO.Close[self: stream];
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "Usage: KeyUnregister TokenDatabaseName"];
};
private procedures
ListToRope: PRIVATE PROC [listOfRope: List.LORA, spaceBetweenTokens: BOOLEANTRUE] RETURNS [bigRope: ROPE] = {
ConvertListToRope: PROC [item: REF ANY, list: List.LORA] = {
bigRope ← IF spaceBetweenTokens THEN
Rope.Cat[bigRope, " ",NARROW[item]]
ELSE
Rope.Concat[bigRope, NARROW[item]];
};
List.Map[list: listOfRope, proc: ConvertListToRope];
};
RopeToSymTab: PRIVATE PROC [bigRope: ROPE] RETURNS [table: SymTab.Ref] = {
positionAfterLastSpace: INT ← 0;
length: INT ← Rope.Length[base: bigRope];
stream: IO.STREAMIO.RIS[rope: bigRope];
table ← SymTab.Create[case: FALSE];
DO
token: ROPEIO.GetTokenRope[stream: stream!IO.EndOfStream => EXIT].token;
[] ← SymTab.Store[x: table, key: token, val: NIL];
ENDLOOP;
};
OutputNames: PRIVATE PROC [cmd: Commander.Handle, results: List.LORA, displayOverAllWeight: BOOLEANTRUE, displayTokensInEachFile: BOOLEANTRUE] = {
weightPrecision: Convert.RealPrecision = 4;
ExtractAppropriateDataFromItem: PROC [file : REF ANY, fileList:List.LORA] = {
basicFileInfo: REF KeyNote.ResultObject ← NARROW[file];
ExtractAppropriateDataForEachTokenFromItem: PROC [token : REF ANY, tokenList:List.LORA] = {
tokenPerFileInfo: REF KeyNote.TokenInfoObject ← NARROW[token];
fileInfo ← Rope.Concat[ fileInfo, Rope.Cat[tokenPerFileInfo.token, " ", Convert.RopeFromInt[from: tokenPerFileInfo.frequency]," ", Convert.RopeFromReal[from: tokenPerFileInfo.weight, precision: weightPrecision]]];
fileInfo ← Rope.Cat[fileInfo, " "];
};
fileInfo ← Rope.Cat[fileInfo, KeyNotePseudoServer.SubstituteLongestPrefixWithPseudoServer[basicFileInfo.fileName], " "];
IF displayOverAllWeight THEN fileInfo ← Rope.Cat[fileInfo, Convert.RopeFromReal[from: basicFileInfo.overAllWeight, precision: weightPrecision], " "];
IF displayTokensInEachFile THEN List.Map[list: basicFileInfo.tokenInfoList, proc: ExtractAppropriateDataForEachTokenFromItem];
fileInfo ← Rope.Concat[fileInfo, "\n"];
};
fileInfo: ROPE;
List.Map[list: results, proc: ExtractAppropriateDataFromItem];
IO.PutRope[cmd.out, fileInfo];
};
Commander.Register[
key: "KeyBD",
proc: KeyBD,
doc: "KeyBD TokenDatabaseName pattern StopListName"];
Commander.Register[
key: "KeyWM",
proc: KeyWM,
doc: "KeyWM {switch} TokenDatabaseName (token)+
switch = -t: suppress token display, -w: suppress aggregate weight display, -q threshold: display only files with weight greater than the best match * threshold "];
Commander.Register[
key: "KeyStop",
proc: KeyStop,
doc: "KeyStop stopListCutOff pattern stopListFileName"];
Commander.Register[
key: "KeyRegister",
proc: KeyRegister,
doc: "KeyRegister keyNoteDatabase {Description of keyNoteDatabase}"];
Commander.Register[
key: "KeyUnregister",
proc: KeyUnregister,
doc: "KeyUnregister keyNoteDatabase"];
Commander.Register[
key: "KeyList",
proc: KeyList,
doc: "KeyList"];
}.