File: KeyNoteWeightedMatchingImpl.mesa
Copyright Ó 1985, 1987 by Xerox Corporation. All rights reserved.
Jack Kent February 21, 1988 4:24:31 pm PST
Contents: Implementation of KeyNoteWeightedMatching
DIRECTORY
List,
SymTab,
Rope,
Real,
KeyNote,
KeyNoteDatabase,
KeyNoteWeightedMatching;
KeyNoteWeightedMatchingImpl: CEDAR PROGRAM
IMPORTS SymTab, List, Real, KeyNoteDatabase
EXPORTS KeyNoteWeightedMatching = {
ROPE: TYPE = Rope.ROPE;
ResultHashTableData: TYPE = REF ResultHashTableDataObject;
ResultHashTableDataObject: TYPE = RECORD [
detailedInfo: List.LORA,
totalWeight: REAL ← 0];
WeightedMatch: PUBLIC PROC [db: KeyNote.Handle, ropeList: KeyNote.ResultList] RETURNS [resultList: KeyNote.ResultList] = {
CallForEachKeyWord: PROC [token: REF ANY, list: List.LORA] = {
CallForEachFileName: PROC [item: REF ANY, list: List.LORA] = {
val: SymTab.Val;
found: BOOLEAN;
WITH item SELECT FROM
i: REF KeyNoteDatabase.FilesContainingTokenObject => {
contributionOfGivenWordToGivenFile: REAL ← weightOfToken * Real.Float[i.frequency] / Real.Float[i.fileSize];
[found: found, val: val] ← SymTab.Fetch[x: resultHashTable, key: i.fileName];
IF found THEN
WITH val SELECT FROM
v: ResultHashTableData => {
v.detailedInfo ← CONS[ NEW[KeyNote.TokenInfoObject ← [token: NARROW[token], weight: contributionOfGivenWordToGivenFile, frequency: i.frequency]], v.detailedInfo];
v.totalWeight ← v.totalWeight + contributionOfGivenWordToGivenFile;
[] ← SymTab.Replace[x: resultHashTable, key: i.fileName, val: v];
};
ENDCASE => NULL
ELSE {
[] ← SymTab.Store[x: resultHashTable, key: i.fileName, val: NEW[ResultHashTableDataObject ← [totalWeight: contributionOfGivenWordToGivenFile, detailedInfo: LIST[ NEW[KeyNote.TokenInfoObject ← [token: NARROW[token], weight: contributionOfGivenWordToGivenFile, frequency: i.frequency]]] ]]];
};
};
ENDCASE => NULL;
};
wordFreqInUniverse: INT ← 0;
weightOfToken: REAL;
IF (wordFreqInUniverse ← KeyNoteDatabase.FindFrequencyWordInUniverse[db, NARROW[token]])#0 THEN {
listOfFilesWithToken: KeyNoteDatabase.ListOfFilesContainingToken;
weightOfToken ← numberOfTokensInUniverse/Real.Float[wordFreqInUniverse];
listOfFilesWithToken ← KeyNoteDatabase.GetListOfFilesContainingToken[db, NARROW[token]];
List.Map[list: listOfFilesWithToken, proc: CallForEachFileName];
};
};
CompareFrequencies: List.CompareProc = {
RETURN[Real.CompareREAL[NARROW[ref2, REF KeyNote.ResultObject].overAllWeight, NARROW[ref1, REF KeyNote.ResultObject].overAllWeight]]
};
DumpResultEntryToList: SymTab.EachPairAction = {
dump value from hash table to list
entry: KeyNote.Result ← NEW[KeyNote.ResultObject ← [fileName: NARROW[key], overAllWeight: NARROW[val, ResultHashTableData].totalWeight, tokenInfoList: NARROW[val, ResultHashTableData].detailedInfo]];
resultList ← List.Nconc1[list: resultList, ref: entry];
};
numberOfTokensInUniverse: REAL = Real.Float[KeyNoteDatabase.GetNumberOfTokensInUniverse[db]];
resultHashTable: SymTab.Ref ← SymTab.Create[];
List.Map[list: ropeList, proc: CallForEachKeyWord];
OK...hash table contains all the info...now dump back into List and sort it.
[] ← SymTab.Pairs[x: resultHashTable, action: DumpResultEntryToList];
resultList ← List.Sort[list: resultList, compareProc: CompareFrequencies];
};
}.