<> <> <> <<>> <> DIRECTORY Ascii USING [Letter], Basics, Commander USING [CommandProc, Register, Handle], CommandTool USING [ArgumentVector, Parse, ParseToList], Convert, List, FS, IO, Real, RefText, Rope, KeyNote, KeyNotePseudoServer, SymTab; KeyNoteCommandToolImpl: CEDAR PROGRAM IMPORTS Ascii, Commander, CommandTool, Convert, FS, List, IO, Real, RefText, Rope, SymTab, KeyNote, KeyNotePseudoServer = { ROPE: TYPE = Rope.ROPE; KeyBD: Commander.CommandProc = { argv: CommandTool.ArgumentVector _ CommandTool.Parse[cmd]; databaseName: ROPE _ argv[1]; fileNamesToMatch: ROPE _ argv[2]; stopListFileName: ROPE _ argv[3]; wvp: KeyNote.WordVerifierProc = { EachCharInWord: Rope.ActionType = { IF Ascii.Letter[ch: c] THEN quit _ FALSE ELSE { quit _ TRUE; }; }; IF SymTab.Fetch[x:NARROW[clientData], key: word].found THEN RETURN[FALSE]; RETURN[~Rope.Map[base: word, action: EachCharInWord]]; }; <> stream: IO.STREAM _ FS.StreamOpen[fileName: stopListFileName]; block: REF TEXT _ RefText.New[FS.FileInfo[stopListFileName].bytes]; handle: KeyNote.Handle; [] _ IO.GetBlock[self: stream, block: block]; handle _ KeyNote.OpenDatabase[databaseName: databaseName, fileNamesToMatch: NEW[KeyNote.FileNamesToMatchObject _ [pattern: fileNamesToMatch]], wordVerifierProc: wvp, tokenRelevanceThreshhold: 2, clientDataForVerifierProc: RopeToSymTab[RefText.TrustTextAsRope[text: block]] ! KeyNote.Error => { msg _ Rope.Cat[ "Problems during database building", " ",Convert.RopeFromAtom[ec], ":", explanation]; GO TO Die} ]; KeyNote.CloseDatabase[handle]; IO.Close[self: stream]; EXITS Die => result _ $Failure; Usage => RETURN[$Failure, "Usage: KeyBD TokenDatabaseName pattern stopListFileName"]; }; KeyWM: Commander.CommandProc = { databaseName: ROPE; handle: KeyNote.Handle; resultList: KeyNote.ResultList; displayOverAllWeight: BOOLEAN _ TRUE; displayTokensInEachFile: BOOLEAN _ TRUE; displayThreshhold: REAL _ 0.10; argv: CommandTool.ArgumentVector _ CommandTool.Parse[cmd: cmd]; EliminateIrrelevantFiles: PROC [relevantAndIrrelevantFiles: KeyNote.ResultList, threshHold: REAL] RETURNS [ relevantFiles: KeyNote.ResultList] = { RemoveIrrelevantItem: PROC [ item: REF ANY, list: List.LORA] = { IF Real.CompareREAL[NARROW[item, REF KeyNote.ResultObject].overAllWeight, threshHold] # less THEN relevantFiles _ List.Nconc1[list: relevantFiles, ref: item]; }; List.Map[relevantAndIrrelevantFiles, RemoveIrrelevantItem]; }; ProcessSwitches: PROC [arg: ROPE, argNext: ROPE] = { FOR index: INT IN [0..Rope.Length[arg]) DO SELECT Rope.Fetch[arg, index] FROM 'w, 'W => {displayOverAllWeight _ FALSE }; 't, 'T => {displayTokensInEachFile _ FALSE }; 'q, 'Q=> { displayThreshhold _ Convert.RealFromRope[r:argNext]; }; ENDCASE; ENDLOOP; }; FOR i: NAT IN [1..argv.argc) DO arg: ROPE = argv[i]; argNext: ROPE = IF (argv.argc-1)=i THEN NIL ELSE argv[i+1]; SELECT Rope.Fetch[arg, 0] FROM '- => ProcessSwitches[arg, argNext! Convert.Error => { msg _ "illegal threshold"; GO TO Die} ]; '/, '[ => { <> databaseName _ NARROW[argv[i]]; FOR j: NAT IN [i+1..argv.argc) DO resultList _ CONS[argv[j], resultList]; ENDLOOP; EXIT; }; ENDCASE =>NULL; ENDLOOP; IF Real.CompareREAL[displayThreshhold,1.0] = greater OR Real.CompareREAL[displayThreshhold,0.0] = less THEN { msg _ "threshold must be between 0 and 1"; GO TO Die }; handle _ KeyNote.OpenDatabase[databaseName: databaseName ! KeyNote.Error => { msg _ "Problems With KeyNote Database"; GO TO Die} ]; <> resultList _ KeyNote.FindDocumentsFromWords[db: handle, ropeList: resultList ]; KeyNote.CloseDatabase[handle]; IF resultList#NIL THEN { resultList _ EliminateIrrelevantFiles[resultList, displayThreshhold * NARROW[resultList.first, REF KeyNote.ResultObject].overAllWeight]; <> OutputNames[cmd, resultList, displayOverAllWeight, displayTokensInEachFile]; }; EXITS Die => result _ $Failure; Usage => RETURN[$Failure, "Usage: KeyWM {switch} TokenDatabaseName "]; }; KeyStop: Commander.CommandProc = { argv: CommandTool.ArgumentVector _ CommandTool.Parse[cmd]; stopListCutOff: INT _ Convert.IntFromRope[argv[1]]; stopListFileName: ROPE _ argv[3]; fileNamesToMatch: ROPE _ argv[2]; wordList: KeyNote.ResultList _ KeyNote.BuildStopList[fileNamesToMatch: NEW[KeyNote.FileNamesToMatchObject _ [pattern: fileNamesToMatch]], stopListCutOff: stopListCutOff]; stream: IO.STREAM _ FS.StreamOpen[fileName: stopListFileName, accessOptions: create ]; TRUSTED { IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[ListToRope[wordList]], REF TEXT]]; IO.Close[self: stream]; }; EXITS Die => result _ $Failure; Usage => RETURN[$Failure, "Usage: KeyStop stopListCutOff pattern stopListFileName"]; }; keyNoteRegistry: ROPE _ "keyNoteRegistry.txt"; <> KeyRegister: Commander.CommandProc = TRUSTED { <> dataBaseInfo: ROPE _ Rope.Concat[ListToRope[LOOPHOLE[CommandTool.ParseToList[cmd].list, List.LORA]],"\n"]; stream: IO.STREAM _ FS.StreamOpen[fileName: keyNoteRegistry, accessOptions: append ]; IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[dataBaseInfo], REF TEXT]]; IO.Flush[self: stream]; IO.Close[self: stream]; EXITS Die => result _ $Failure; Usage => RETURN[$Failure, "KeyRegister keyNoteDatabase {Description of keyNoteDatabase}"]; }; KeyList: Commander.CommandProc = { block: REF TEXT _ RefText.New[FS.FileInfo[keyNoteRegistry].bytes]; stream: IO.STREAM _ FS.StreamOpen[fileName: keyNoteRegistry, accessOptions: read]; [] _ IO.GetBlock[self: stream, block: block]; IO.PutBlock[self: cmd.out, block: block]; IO.Close[self: stream]; }; KeyUnregister: Commander.CommandProc = { argv: CommandTool.ArgumentVector _ CommandTool.Parse[cmd]; databaseToBeUnregistered: ROPE _ argv[1]; block: REF TEXT _ RefText.New[FS.FileInfo[keyNoteRegistry].bytes]; stream: IO.STREAM _ FS.StreamOpen[fileName: keyNoteRegistry, accessOptions: write]; fileNames: ROPE; startPos: INT; [] _ IO.GetBlock[self: stream, block: block]; fileNames _ RefText.TrustTextAsRope[block]; IF (startPos _ Rope.Find[fileNames, databaseToBeUnregistered]) # -1 THEN { <> endPos: INT _ Rope.Find[fileNames, "\n", startPos]; fileNames _ Rope.Concat[ Rope.Substr[base: fileNames, start: 0, len: startPos], Rope.Substr[base: fileNames, start: endPos+2 ] ] } ELSE { msg _ "Database Not Registered"; GO TO Die}; IO.SetIndex[stream, 0]; TRUSTED { IO.PutBlock[self: stream, block: LOOPHOLE[Rope.InlineFlatten[fileNames], REF TEXT ]]; }; IO.SetLength[stream, Rope.InlineSize[fileNames]]; IO.Flush[self: stream]; IO.Close[self: stream]; EXITS Die => result _ $Failure; Usage => RETURN[$Failure, "Usage: KeyUnregister TokenDatabaseName"]; }; <> ListToRope: PRIVATE PROC [listOfRope: List.LORA, spaceBetweenTokens: BOOLEAN _ TRUE] RETURNS [bigRope: ROPE] = { ConvertListToRope: PROC [item: REF ANY, list: List.LORA] = { bigRope _ IF spaceBetweenTokens THEN Rope.Cat[bigRope, " ",NARROW[item]] ELSE Rope.Concat[bigRope, NARROW[item]]; }; List.Map[list: listOfRope, proc: ConvertListToRope]; }; RopeToSymTab: PRIVATE PROC [bigRope: ROPE] RETURNS [table: SymTab.Ref] = { positionAfterLastSpace: INT _ 0; length: INT _ Rope.Length[base: bigRope]; stream: IO.STREAM _ IO.RIS[rope: bigRope]; table _ SymTab.Create[case: FALSE]; DO token: ROPE _ IO.GetTokenRope[stream: stream!IO.EndOfStream => EXIT].token; [] _ SymTab.Store[x: table, key: token, val: NIL]; ENDLOOP; }; OutputNames: PRIVATE PROC [cmd: Commander.Handle, results: List.LORA, displayOverAllWeight: BOOLEAN _ TRUE, displayTokensInEachFile: BOOLEAN _ TRUE] = { weightPrecision: Convert.RealPrecision = 4; ExtractAppropriateDataFromItem: PROC [file : REF ANY, fileList:List.LORA] = { basicFileInfo: REF KeyNote.ResultObject _ NARROW[file]; ExtractAppropriateDataForEachTokenFromItem: PROC [token : REF ANY, tokenList:List.LORA] = { tokenPerFileInfo: REF KeyNote.TokenInfoObject _ NARROW[token]; fileInfo _ Rope.Concat[ fileInfo, Rope.Cat[tokenPerFileInfo.token, " ", Convert.RopeFromInt[from: tokenPerFileInfo.frequency]," ", Convert.RopeFromReal[from: tokenPerFileInfo.weight, precision: weightPrecision]]]; fileInfo _ Rope.Cat[fileInfo, " "]; }; fileInfo _ Rope.Cat[fileInfo, KeyNotePseudoServer.SubstituteLongestPrefixWithPseudoServer[basicFileInfo.fileName], " "]; IF displayOverAllWeight THEN fileInfo _ Rope.Cat[fileInfo, Convert.RopeFromReal[from: basicFileInfo.overAllWeight, precision: weightPrecision], " "]; IF displayTokensInEachFile THEN List.Map[list: basicFileInfo.tokenInfoList, proc: ExtractAppropriateDataForEachTokenFromItem]; fileInfo _ Rope.Concat[fileInfo, "\n"]; }; fileInfo: ROPE; List.Map[list: results, proc: ExtractAppropriateDataFromItem]; IO.PutRope[cmd.out, fileInfo]; }; <<>> Commander.Register[ key: "KeyBD", proc: KeyBD, doc: "KeyBD TokenDatabaseName pattern StopListName"]; Commander.Register[ key: "KeyWM", proc: KeyWM, doc: "KeyWM {switch} TokenDatabaseName (token)+ switch = -t: suppress token display, -w: suppress aggregate weight display, -q threshold: display only files with weight greater than the best match * threshold "]; Commander.Register[ key: "KeyStop", proc: KeyStop, doc: "KeyStop stopListCutOff pattern stopListFileName"]; Commander.Register[ key: "KeyRegister", proc: KeyRegister, doc: "KeyRegister keyNoteDatabase {Description of keyNoteDatabase}"]; Commander.Register[ key: "KeyUnregister", proc: KeyUnregister, doc: "KeyUnregister keyNoteDatabase"]; Commander.Register[ key: "KeyList", proc: KeyList, doc: "KeyList"]; }. <<>>