--DictionaryStatsImpl.mesa --Last edit by : S.Umehara for OIS to CES conversion, 12-Jun-84 0:44:57 PDT --Calculates statistics on the master dictionary DIRECTORY DictionaryStatsDefs USING [], Environment USING [Block], Format USING [NumberFormat], Inline USING [LongDivMod], JDBMSDefs USING [heap], CESDictDataDefs USING [DictBytesPerEntry, Entry], Put USING [CR, Line, LongDecimal, LongNumber, Number, Text], Stream USING [CompletionCode, GetBlock, Handle, SetPosition], Window USING [Handle]; DictionaryStatsImpl: PROGRAM IMPORTS Inline, JDBMSDefs, Put, Stream EXPORTS DictionaryStatsDefs = BEGIN --Constants MaxKana: CARDINAL = 14; MaxKanji: CARDINAL = 7; onesbyte: CARDINAL = 377B; MaxFreq: CARDINAL = 10; --[0..9] MaxCat: CARDINAL = 26B; -- old Data is 23B changed by M.Ando MaxPos: CARDINAL = 377B; MaxLogicalDict: CARDINAL = 17B; MakeStats: PUBLIC PROCEDURE [masterDictStrH: Stream.Handle, logSW: Window.Handle] = BEGIN why: Stream.CompletionCode; nBytes: CARDINAL; dataBlock: Environment.Block; entry: LONG POINTER TO CESDictDataDefs.Entry _ JDBMSDefs.heap.NEW[CESDictDataDefs.Entry]; totalCt: LONG CARDINAL _ 0; kanaLengthCt: ARRAY [0..MaxKana] OF LONG CARDINAL _ ALL[0]; kanjiLengthCt: ARRAY [0..MaxKanji] OF LONG CARDINAL _ ALL[0]; posCt: ARRAY [0..MaxPos] OF LONG CARDINAL _ ALL[0]; catCt: ARRAY [0..MaxCat] OF LONG CARDINAL _ ALL[0]; freqCt: ARRAY [0..MaxFreq] OF LONG CARDINAL _ ALL[0]; logicalCt: ARRAY [0..MaxLogicalDict] OF LONG CARDINAL _ ALL[0]; i: CARDINAL _ 0; dataBlock.startIndex _ 0; dataBlock.stopIndexPlusOne _ CESDictDataDefs.DictBytesPerEntry; dataBlock.blockPointer _ LOOPHOLE[entry]; Stream.SetPosition[masterDictStrH, LONG[0]]; DO [nBytes,why,] _ Stream.GetBlock[masterDictStrH,dataBlock]; IF why = endOfStream THEN EXIT; totalCt _ totalCt+1; DO IF entry.kana[i] = onesbyte THEN EXIT ELSE {i _ i+1; IF i = MaxKana THEN EXIT ELSE LOOP;}; ENDLOOP; kanaLengthCt[i] _ kanaLengthCt[i]+1; i _ 0; DO IF entry.kanji[i].code = onesbyte THEN EXIT ELSE {i _ i+1; IF i = MaxKanji THEN EXIT ELSE LOOP;}; ENDLOOP; kanjiLengthCt[i] _ kanjiLengthCt[i]+1; posCt[entry.pos] _ posCt[entry.pos]+1; catCt[entry.cat] _ catCt[entry.cat]+1; freqCt[entry.freq] _ freqCt[entry.freq]+1; logicalCt[entry.logicalDict] _ logicalCt[entry.logicalDict]+1; ENDLOOP; --Print the statistics Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; Put.Text[logSW, "Total Number of Entries: "L]; Put.LongDecimal[logSW, totalCt]; Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; Put.Line[logSW, "Distribution of Kana-String Lengths:"L]; Put.CR[logSW]; FOR i IN [0..MaxKana] DO IF kanaLengthCt[i] # 0 THEN OutLog[totalCt, kanaLengthCt[i], i, logSW] ELSE IF kanaLengthCt[MIN[i+1,MaxKana]] # 0 THEN Put.CR[logSW]; ENDLOOP; Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; Put.Line[logSW, "Distribution of Kanji-String Lengths:"L]; Put.CR[logSW]; FOR i IN[0..MaxKanji] DO IF kanjiLengthCt[i] # 0 THEN OutLog[totalCt, kanjiLengthCt[i], i, logSW] ELSE IF kanjiLengthCt[MIN[i+1,MaxKanji]] # 0 THEN Put.CR[logSW]; ENDLOOP; Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; Put.Line[logSW, "Distribution of Parts of Speech:"L]; Put.CR[logSW]; FOR i IN [0..MaxPos] DO IF posCt[i] # 0 THEN OutLog[totalCt, posCt[i], i, logSW] ELSE IF posCt[MIN[i+1,MaxPos]] # 0 THEN Put.CR[logSW]; ENDLOOP; Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; Put.Line[logSW, "Distribution of Categories:"L]; Put.CR[logSW]; FOR i IN [0..MaxCat] DO IF catCt[i] # 0 THEN OutLog[totalCt, catCt[i], i, logSW] ELSE IF catCt[MIN[i+1,MaxCat]] # 0 THEN Put.CR[logSW]; ENDLOOP; Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; Put.Line[logSW, "Distribution of Frequencies:"L]; Put.CR[logSW]; FOR i IN [0..MaxFreq] DO IF freqCt[i] # 0 THEN OutLog[totalCt, freqCt[i], i, logSW] ELSE IF freqCt[MIN[i+1,MaxFreq]] # 0 THEN Put.CR[logSW]; ENDLOOP; Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; Put.Line[logSW, "Distribution of Logical Dictionaries:"L]; Put.CR[logSW]; FOR i IN [0..MaxLogicalDict] DO IF logicalCt[i] # 0 THEN OutLog[totalCt, logicalCt[i], i, logSW] ELSE IF logicalCt[MIN[i+1,MaxLogicalDict]] # 0 THEN Put.CR[logSW]; ENDLOOP; Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; JDBMSDefs.heap.FREE[@entry]; END; --MakeStats OutLog: PROCEDURE [total, smTotal: LONG CARDINAL, i: CARDINAL, logSW: Window.Handle] = BEGIN temp: LONG CARDINAL _ 0; ones,dec: LONG CARDINAL _ 0; rowFormat: Format.NumberFormat _ [base: 10, zerofill: FALSE, unsigned: TRUE, columns: 10]; Put.Number[logSW, i, rowFormat]; Put.LongNumber[logSW, smTotal, rowFormat]; temp _ (smTotal*10000/total)/10; [ones, dec] _ Inline.LongDivMod[temp, 10]; Put.LongNumber[logSW, ones, rowFormat]; Put.Text[logSW, "."L]; Put.LongDecimal[logSW, dec]; Put.Text[logSW, "%"L]; Put.CR[logSW]; END; --of OutLog END. --of DictionaryStats LOG M.Ando 26-Jul-83 14:16:12 Converted to Trinity