DIRECTORY List, SymTab, Rope, Real, KeyNote, KeyNoteDatabase, KeyNoteWeightedMatching; KeyNoteWeightedMatchingImpl: CEDAR PROGRAM IMPORTS SymTab, List, Real, KeyNoteDatabase EXPORTS KeyNoteWeightedMatching = { ROPE: TYPE = Rope.ROPE; ResultHashTableData: TYPE = REF ResultHashTableDataObject; ResultHashTableDataObject: TYPE = RECORD [ detailedInfo: List.LORA, totalWeight: REAL _ 0]; WeightedMatch: PUBLIC PROC [db: KeyNote.Handle, ropeList: KeyNote.ResultList] RETURNS [resultList: KeyNote.ResultList] = { CallForEachKeyWord: PROC [token: REF ANY, list: List.LORA] = { CallForEachFileName: PROC [item: REF ANY, list: List.LORA] = { val: SymTab.Val; found: BOOLEAN; WITH item SELECT FROM i: REF KeyNoteDatabase.FilesContainingTokenObject => { contributionOfGivenWordToGivenFile: REAL _ weightOfToken * Real.Float[i.frequency] / Real.Float[i.fileSize]; [found: found, val: val] _ SymTab.Fetch[x: resultHashTable, key: i.fileName]; IF found THEN WITH val SELECT FROM v: ResultHashTableData => { v.detailedInfo _ CONS[ NEW[KeyNote.TokenInfoObject _ [token: NARROW[token], weight: contributionOfGivenWordToGivenFile, frequency: i.frequency]], v.detailedInfo]; v.totalWeight _ v.totalWeight + contributionOfGivenWordToGivenFile; [] _ SymTab.Replace[x: resultHashTable, key: i.fileName, val: v]; }; ENDCASE => NULL ELSE { [] _ SymTab.Store[x: resultHashTable, key: i.fileName, val: NEW[ResultHashTableDataObject _ [totalWeight: contributionOfGivenWordToGivenFile, detailedInfo: LIST[ NEW[KeyNote.TokenInfoObject _ [token: NARROW[token], weight: contributionOfGivenWordToGivenFile, frequency: i.frequency]]] ]]]; }; }; ENDCASE => NULL; }; wordFreqInUniverse: INT _ 0; weightOfToken: REAL; IF (wordFreqInUniverse _ KeyNoteDatabase.FindFrequencyWordInUniverse[db, NARROW[token]])#0 THEN { listOfFilesWithToken: KeyNoteDatabase.ListOfFilesContainingToken; weightOfToken _ numberOfTokensInUniverse/Real.Float[wordFreqInUniverse]; listOfFilesWithToken _ KeyNoteDatabase.GetListOfFilesContainingToken[db, NARROW[token]]; List.Map[list: listOfFilesWithToken, proc: CallForEachFileName]; }; }; CompareFrequencies: List.CompareProc = { RETURN[Real.CompareREAL[NARROW[ref2, REF KeyNote.ResultObject].overAllWeight, NARROW[ref1, REF KeyNote.ResultObject].overAllWeight]] }; DumpResultEntryToList: SymTab.EachPairAction = { entry: KeyNote.Result _ NEW[KeyNote.ResultObject _ [fileName: NARROW[key], overAllWeight: NARROW[val, ResultHashTableData].totalWeight, tokenInfoList: NARROW[val, ResultHashTableData].detailedInfo]]; resultList _ List.Nconc1[list: resultList, ref: entry]; }; numberOfTokensInUniverse: REAL = Real.Float[KeyNoteDatabase.GetNumberOfTokensInUniverse[db]]; resultHashTable: SymTab.Ref _ SymTab.Create[]; List.Map[list: ropeList, proc: CallForEachKeyWord]; [] _ SymTab.Pairs[x: resultHashTable, action: DumpResultEntryToList]; resultList _ List.Sort[list: resultList, compareProc: CompareFrequencies]; }; }. BFile: KeyNoteWeightedMatchingImpl.mesa Copyright Σ 1985, 1987 by Xerox Corporation. All rights reserved. Jack Kent February 21, 1988 4:24:31 pm PST Contents: Implementation of KeyNoteWeightedMatching dump value from hash table to list OK...hash table contains all the info...now dump back into List and sort it. Κ˜šΟnœ"™&JšœB™BIcodešœ*™*—J™Jšœ+™3J˜šΟk ˜ J˜Jšœ˜J˜J˜J˜J˜Jšœ˜J˜—šœžœž˜*Jšžœ$˜+Jšžœ˜$J˜J˜Jšžœžœžœ˜J˜Jšœžœžœ˜;šœžœžœ˜+Jšœžœ˜Jšœ žœ˜——J˜J™J˜š œžœžœ4žœ&˜{š œžœ žœžœ žœ˜?š œžœžœžœ žœ˜>Jšœ˜Jšœžœ˜šžœžœž˜šœžœ0˜6Jšœ$žœE˜m—J•StartOfExpansion[x: SymTab.Ref, key: ROPE]šœN˜Nšžœžœ˜šžœžœžœ˜šœ˜Jšœžœžœ#žœ_˜’JšœC˜CJšœA˜AJ˜—Jšžœž˜——šžœ˜Jš œ<žœ]žœžœ#žœU˜£Jšœ˜—J˜Jšžœžœ˜—J˜—Jšœžœ˜Jšœžœ˜šžœGžœ žœ˜aJšœA˜AJšœH˜HJšœIžœ ˜XJšœ@˜@J˜—J˜—šœ˜(Jš žœžœžœ(žœžœ&˜†J˜—šœ˜1J™"Jš œžœ#žœžœ7žœ*˜ΘJ–)[list: LIST OF REF ANY, ref: REF ANY]šœ7˜7J˜—Jšœžœ?˜]J–&[mod: NAT _ 17, case: BOOL _ TRUE]šœ.˜.J–-[list: LIST OF REF ANY, proc: PROC [...]]šœ3˜3JšžœJ™LJ–-[list: LIST OF REF ANY, proc: PROC [...]]šœE˜EJ–:[list: LIST OF REF ANY, compareProc: List.CompareProc]šœJ˜JJ™J˜J˜—˜J˜—˜J˜J˜—J˜J˜—…— ~ή