<> <> <> <<>> <> DIRECTORY List, SymTab, Rope, Real, KeyNote, KeyNoteDatabase, KeyNoteWeightedMatching; KeyNoteWeightedMatchingImpl: CEDAR PROGRAM IMPORTS SymTab, List, Real, KeyNoteDatabase EXPORTS KeyNoteWeightedMatching = { ROPE: TYPE = Rope.ROPE; ResultHashTableData: TYPE = REF ResultHashTableDataObject; ResultHashTableDataObject: TYPE = RECORD [ detailedInfo: List.LORA, totalWeight: REAL _ 0]; <<>> WeightedMatch: PUBLIC PROC [db: KeyNote.Handle, ropeList: KeyNote.ResultList] RETURNS [resultList: KeyNote.ResultList] = { CallForEachKeyWord: PROC [token: REF ANY, list: List.LORA] = { CallForEachFileName: PROC [item: REF ANY, list: List.LORA] = { val: SymTab.Val; found: BOOLEAN; WITH item SELECT FROM i: REF KeyNoteDatabase.FilesContainingTokenObject => { contributionOfGivenWordToGivenFile: REAL _ weightOfToken * Real.Float[i.frequency] / Real.Float[i.fileSize]; [found: found, val: val] _ SymTab.Fetch[x: resultHashTable, key: i.fileName]; IF found THEN WITH val SELECT FROM v: ResultHashTableData => { v.detailedInfo _ CONS[ NEW[KeyNote.TokenInfoObject _ [token: NARROW[token], weight: contributionOfGivenWordToGivenFile, frequency: i.frequency]], v.detailedInfo]; v.totalWeight _ v.totalWeight + contributionOfGivenWordToGivenFile; [] _ SymTab.Replace[x: resultHashTable, key: i.fileName, val: v]; }; ENDCASE => NULL ELSE { [] _ SymTab.Store[x: resultHashTable, key: i.fileName, val: NEW[ResultHashTableDataObject _ [totalWeight: contributionOfGivenWordToGivenFile, detailedInfo: LIST[ NEW[KeyNote.TokenInfoObject _ [token: NARROW[token], weight: contributionOfGivenWordToGivenFile, frequency: i.frequency]]] ]]]; }; }; ENDCASE => NULL; }; wordFreqInUniverse: INT _ 0; weightOfToken: REAL; IF (wordFreqInUniverse _ KeyNoteDatabase.FindFrequencyWordInUniverse[db, NARROW[token]])#0 THEN { listOfFilesWithToken: KeyNoteDatabase.ListOfFilesContainingToken; weightOfToken _ numberOfTokensInUniverse/Real.Float[wordFreqInUniverse]; listOfFilesWithToken _ KeyNoteDatabase.GetListOfFilesContainingToken[db, NARROW[token]]; List.Map[list: listOfFilesWithToken, proc: CallForEachFileName]; }; }; CompareFrequencies: List.CompareProc = { RETURN[Real.CompareREAL[NARROW[ref2, REF KeyNote.ResultObject].overAllWeight, NARROW[ref1, REF KeyNote.ResultObject].overAllWeight]] }; DumpResultEntryToList: SymTab.EachPairAction = { <> entry: KeyNote.Result _ NEW[KeyNote.ResultObject _ [fileName: NARROW[key], overAllWeight: NARROW[val, ResultHashTableData].totalWeight, tokenInfoList: NARROW[val, ResultHashTableData].detailedInfo]]; resultList _ List.Nconc1[list: resultList, ref: entry]; }; numberOfTokensInUniverse: REAL = Real.Float[KeyNoteDatabase.GetNumberOfTokensInUniverse[db]]; resultHashTable: SymTab.Ref _ SymTab.Create[]; List.Map[list: ropeList, proc: CallForEachKeyWord]; <> [] _ SymTab.Pairs[x: resultHashTable, action: DumpResultEntryToList]; resultList _ List.Sort[list: resultList, compareProc: CompareFrequencies]; <<>> }; }.