KeyNoteCommandToolImpl:
CEDAR
PROGRAM
IMPORTS Ascii, Commander, CommandTool, Convert, FS, List, IO, Real, RefText, Rope, SymTab, KeyNote, KeyNotePseudoServer = {
ROPE: TYPE = Rope.ROPE;
KeyBD: Commander.CommandProc = {
argv: CommandTool.ArgumentVector ← CommandTool.Parse[cmd];
databaseName: ROPE ← argv[1];
fileNamesToMatch: ROPE ← argv[2];
stopListFileName: ROPE ← argv[3];
wvp: KeyNote.WordVerifierProc = {
EachCharInWord: Rope.ActionType = {
IF Ascii.Letter[ch: c]
THEN
quit ← FALSE
};
IF SymTab.Fetch[x:NARROW[clientData], key: word].found THEN RETURN[FALSE];
RETURN[~Rope.Map[base: word, action: EachCharInWord]];
};
now get a handle on things
stream: IO.STREAM ← FS.StreamOpen[fileName: stopListFileName];
block: REF TEXT ← RefText.New[FS.FileInfo[stopListFileName].bytes];
handle: KeyNote.Handle;
[] ← IO.GetBlock[self: stream, block: block];
handle ← KeyNote.OpenDatabase[databaseName: databaseName, fileNamesToMatch: NEW[KeyNote.FileNamesToMatchObject ← [pattern: fileNamesToMatch]], wordVerifierProc: wvp, tokenRelevanceThreshhold: 2, clientDataForVerifierProc: RopeToSymTab[RefText.TrustTextAsRope[text: block]] ! KeyNote.Error => { msg ← Rope.Cat[ "Problems during database building", " ",Convert.RopeFromAtom[ec], ":", explanation]; GO TO Die}
];
KeyNote.CloseDatabase[handle];
IO.Close[self: stream];
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "Usage: KeyBD TokenDatabaseName pattern stopListFileName"];
};
KeyWM: Commander.CommandProc = {
databaseName: ROPE;
handle: KeyNote.Handle;
resultList: KeyNote.ResultList;
displayOverAllWeight: BOOLEAN ← TRUE;
displayTokensInEachFile: BOOLEAN ← TRUE;
displayThreshhold: REAL ← 0.10;
argv: CommandTool.ArgumentVector ← CommandTool.Parse[cmd: cmd];
EliminateIrrelevantFiles:
PROC [relevantAndIrrelevantFiles: KeyNote.ResultList, threshHold:
REAL]
RETURNS [ relevantFiles: KeyNote.ResultList] = {
RemoveIrrelevantItem:
PROC [ item:
REF
ANY, list: List.
LORA] = {
IF Real.CompareREAL[
NARROW[item,
REF KeyNote.ResultObject].overAllWeight, threshHold] # less
THEN
relevantFiles ← List.Nconc1[list: relevantFiles, ref: item];
};
List.Map[relevantAndIrrelevantFiles, RemoveIrrelevantItem];
};
ProcessSwitches:
PROC [arg:
ROPE, argNext:
ROPE] = {
FOR index:
INT
IN [0..Rope.Length[arg])
DO
SELECT Rope.Fetch[arg, index]
FROM
'w, 'W => {displayOverAllWeight ← FALSE };
't, 'T => {displayTokensInEachFile ← FALSE };
'q, 'Q=> {
displayThreshhold ← Convert.RealFromRope[r:argNext];
};
ENDCASE;
ENDLOOP;
};
FOR i:
NAT
IN [1..argv.argc)
DO
arg: ROPE = argv[i];
argNext: ROPE = IF (argv.argc-1)=i THEN NIL ELSE argv[i+1];
SELECT Rope.Fetch[arg, 0]
FROM
'- => ProcessSwitches[arg, argNext! Convert.Error => { msg ← "illegal threshold"; GO TO Die} ];
'/, '[ => {
recognize server by [mumble or /mumble
databaseName ← NARROW[argv[i]];
FOR j:
NAT
IN [i+1..argv.argc)
DO
resultList ← CONS[argv[j], resultList];
ENDLOOP;
EXIT;
};
ENDCASE =>NULL;
ENDLOOP;
IF Real.CompareREAL[displayThreshhold,1.0] = greater
OR
Real.CompareREAL[displayThreshhold,0.0] = less
THEN {
msg ← "threshold must be between 0 and 1"; GO TO Die };
handle ← KeyNote.OpenDatabase[databaseName: databaseName ! KeyNote.Error => { msg ← "Problems With KeyNote Database"; GO TO Die} ];
now get a handle on things
resultList ← KeyNote.FindDocumentsFromWords[db: handle, ropeList: resultList ];
KeyNote.CloseDatabase[handle];
IF resultList#
NIL
THEN {
resultList ← EliminateIrrelevantFiles[resultList, displayThreshhold * NARROW[resultList.first, REF KeyNote.ResultObject].overAllWeight];
chop off appropriate part of result List
OutputNames[cmd, resultList, displayOverAllWeight, displayTokensInEachFile];
};
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "Usage: KeyWM {switch} TokenDatabaseName "];
};
KeyStop: Commander.CommandProc = {
argv: CommandTool.ArgumentVector ← CommandTool.Parse[cmd];
stopListCutOff: INT ← Convert.IntFromRope[argv[1]];
stopListFileName: ROPE ← argv[3];
fileNamesToMatch: ROPE ← argv[2];
wordList: KeyNote.ResultList ← KeyNote.BuildStopList[fileNamesToMatch: NEW[KeyNote.FileNamesToMatchObject ← [pattern: fileNamesToMatch]], stopListCutOff: stopListCutOff];
stream: IO.STREAM ← FS.StreamOpen[fileName: stopListFileName, accessOptions: create ];
TRUSTED {
IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[ListToRope[wordList]], REF TEXT]];
IO.Close[self: stream]; };
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "Usage: KeyStop stopListCutOff pattern stopListFileName"];
};
keyNoteRegistry: ROPE ← "keyNoteRegistry.txt";
register, list and unregister keyNote databases
KeyRegister: Commander.CommandProc =
TRUSTED {
maybe make sure that comment has no carriage returns
dataBaseInfo: ROPE ← Rope.Concat[ListToRope[LOOPHOLE[CommandTool.ParseToList[cmd].list, List.LORA]],"\n"];
stream: IO.STREAM ← FS.StreamOpen[fileName: keyNoteRegistry, accessOptions: append ];
IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[dataBaseInfo], REF TEXT]];
IO.Flush[self: stream];
IO.Close[self: stream];
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "KeyRegister keyNoteDatabase {Description of keyNoteDatabase}"];
};
KeyList: Commander.CommandProc = {
block: REF TEXT ← RefText.New[FS.FileInfo[keyNoteRegistry].bytes];
stream: IO.STREAM ← FS.StreamOpen[fileName: keyNoteRegistry, accessOptions: read];
[] ← IO.GetBlock[self: stream, block: block];
IO.PutBlock[self: cmd.out, block: block];
IO.Close[self: stream];
};
KeyUnregister: Commander.CommandProc = {
argv: CommandTool.ArgumentVector ← CommandTool.Parse[cmd];
databaseToBeUnregistered: ROPE ← argv[1];
block: REF TEXT ← RefText.New[FS.FileInfo[keyNoteRegistry].bytes];
stream: IO.STREAM ← FS.StreamOpen[fileName: keyNoteRegistry, accessOptions: write];
fileNames: ROPE;
startPos: INT;
[] ← IO.GetBlock[self: stream, block: block];
fileNames ← RefText.TrustTextAsRope[block];
IF (startPos ← Rope.Find[fileNames, databaseToBeUnregistered]) # -1
THEN {
look for carriage return
endPos: INT ← Rope.Find[fileNames, "\n", startPos];
fileNames ← Rope.Concat[
Rope.Substr[base: fileNames, start: 0, len: startPos],
Rope.Substr[base: fileNames, start: endPos+2 ]
]
}
ELSE {
msg ← "Database Not Registered";
GO TO Die};
IO.SetIndex[stream, 0];
TRUSTED {
IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[fileNames], REF TEXT ]];
};
IO.SetLength[stream, Rope.InlineSize[fileNames]];
IO.Flush[self: stream];
IO.Close[self: stream];
EXITS
Die => result ← $Failure;
Usage => RETURN[$Failure, "Usage: KeyUnregister TokenDatabaseName"];
};
private procedures
ListToRope:
PRIVATE
PROC [listOfRope: List.
LORA, spaceBetweenTokens:
BOOLEAN ←
TRUE]
RETURNS [bigRope:
ROPE] = {
ConvertListToRope:
PROC [item:
REF
ANY, list: List.
LORA] = {
bigRope ←
IF spaceBetweenTokens
THEN
Rope.Cat[bigRope, " ",NARROW[item]]
ELSE
Rope.Concat[bigRope, NARROW[item]];
};
List.Map[list: listOfRope, proc: ConvertListToRope];
};
RopeToSymTab:
PRIVATE
PROC [bigRope:
ROPE]
RETURNS [table: SymTab.Ref] = {
positionAfterLastSpace: INT ← 0;
length: INT ← Rope.Length[base: bigRope];
stream: IO.STREAM ← IO.RIS[rope: bigRope];
table ← SymTab.Create[case: FALSE];
DO
token: ROPE ← IO.GetTokenRope[stream: stream!IO.EndOfStream => EXIT].token;
[] ← SymTab.Store[x: table, key: token, val: NIL];
ENDLOOP;
};
OutputNames:
PRIVATE
PROC [cmd: Commander.Handle, results: List.
LORA, displayOverAllWeight:
BOOLEAN ←
TRUE, displayTokensInEachFile:
BOOLEAN ←
TRUE] = {
weightPrecision: Convert.RealPrecision = 4;
ExtractAppropriateDataFromItem:
PROC [file :
REF
ANY, fileList:List.
LORA] = {
basicFileInfo: REF KeyNote.ResultObject ← NARROW[file];
ExtractAppropriateDataForEachTokenFromItem:
PROC [token :
REF
ANY, tokenList:List.
LORA] = {
tokenPerFileInfo: REF KeyNote.TokenInfoObject ← NARROW[token];
fileInfo ← Rope.Concat[ fileInfo, Rope.Cat[tokenPerFileInfo.token, " ", Convert.RopeFromInt[from: tokenPerFileInfo.frequency]," ", Convert.RopeFromReal[from: tokenPerFileInfo.weight, precision: weightPrecision]]];
fileInfo ← Rope.Cat[fileInfo, " "];
};
fileInfo ← Rope.Cat[fileInfo, KeyNotePseudoServer.SubstituteLongestPrefixWithPseudoServer[basicFileInfo.fileName], " "];
IF displayOverAllWeight THEN fileInfo ← Rope.Cat[fileInfo, Convert.RopeFromReal[from: basicFileInfo.overAllWeight, precision: weightPrecision], " "];
IF displayTokensInEachFile THEN List.Map[list: basicFileInfo.tokenInfoList, proc: ExtractAppropriateDataForEachTokenFromItem];
fileInfo ← Rope.Concat[fileInfo, "\n"];
};
fileInfo: ROPE;
List.Map[list: results, proc: ExtractAppropriateDataFromItem];
IO.PutRope[cmd.out, fileInfo];
};