--DictionaryStatsImpl.mesa
--Last edit by : S.Umehara for OIS to CES conversion, 12-Jun-84 0:44:57 PDT
--Calculates statistics on the master dictionary
DIRECTORY
DictionaryStatsDefs USING [],
Environment USING [Block],
Format USING [NumberFormat],
Inline USING [LongDivMod],
JDBMSDefs USING [heap],
CESDictDataDefs USING [DictBytesPerEntry, Entry],
Put USING [CR, Line, LongDecimal, LongNumber, Number, Text],
Stream USING [CompletionCode, GetBlock, Handle, SetPosition],
Window USING [Handle];
DictionaryStatsImpl: PROGRAM
IMPORTS Inline, JDBMSDefs, Put, Stream
EXPORTS DictionaryStatsDefs =
BEGIN
--Constants
MaxKana: CARDINAL = 14;
MaxKanji: CARDINAL = 7;
onesbyte: CARDINAL = 377B;
MaxFreq: CARDINAL = 10; --[0..9]
MaxCat: CARDINAL = 26B; -- old Data is 23B changed by M.Ando
MaxPos: CARDINAL = 377B;
MaxLogicalDict: CARDINAL = 17B;
MakeStats: PUBLIC PROCEDURE
[masterDictStrH: Stream.Handle, logSW: Window.Handle] =
BEGIN
why: Stream.CompletionCode;
nBytes: CARDINAL;
dataBlock: Environment.Block;
entry: LONG POINTER TO CESDictDataDefs.Entry ←
JDBMSDefs.heap.NEW[CESDictDataDefs.Entry];
totalCt: LONG CARDINAL ← 0;
kanaLengthCt: ARRAY [0..MaxKana] OF LONG CARDINAL ← ALL[0];
kanjiLengthCt: ARRAY [0..MaxKanji] OF LONG CARDINAL ← ALL[0];
posCt: ARRAY [0..MaxPos] OF LONG CARDINAL ← ALL[0];
catCt: ARRAY [0..MaxCat] OF LONG CARDINAL ← ALL[0];
freqCt: ARRAY [0..MaxFreq] OF LONG CARDINAL ← ALL[0];
logicalCt: ARRAY [0..MaxLogicalDict] OF LONG CARDINAL ← ALL[0];
i: CARDINAL ← 0;
dataBlock.startIndex ← 0;
dataBlock.stopIndexPlusOne ← CESDictDataDefs.DictBytesPerEntry;
dataBlock.blockPointer ← LOOPHOLE[entry];
Stream.SetPosition[masterDictStrH, LONG[0]];
DO
[nBytes,why,] ← Stream.GetBlock[masterDictStrH,dataBlock];
IF why = endOfStream THEN EXIT;
totalCt ← totalCt+1;
DO
IF entry.kana[i] = onesbyte THEN EXIT
ELSE
{i ← i+1;
IF i = MaxKana THEN EXIT ELSE LOOP;};
ENDLOOP;
kanaLengthCt[i] ← kanaLengthCt[i]+1;
i ← 0;
DO
IF entry.kanji[i].code = onesbyte THEN EXIT
ELSE
{i ← i+1;
IF i = MaxKanji THEN EXIT ELSE LOOP;};
ENDLOOP;
kanjiLengthCt[i] ← kanjiLengthCt[i]+1;
posCt[entry.pos] ← posCt[entry.pos]+1;
catCt[entry.cat] ← catCt[entry.cat]+1;
freqCt[entry.freq] ← freqCt[entry.freq]+1;
logicalCt[entry.logicalDict] ← logicalCt[entry.logicalDict]+1;
ENDLOOP;
--Print the statistics
Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
Put.Text[logSW, "Total Number of Entries: "L];
Put.LongDecimal[logSW, totalCt];
Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
Put.Line[logSW, "Distribution of Kana-String Lengths:"L];
Put.CR[logSW];
FOR i IN [0..MaxKana] DO
IF kanaLengthCt[i] # 0 THEN
OutLog[totalCt, kanaLengthCt[i], i, logSW]
ELSE
IF kanaLengthCt[MIN[i+1,MaxKana]] # 0 THEN Put.CR[logSW];
ENDLOOP;
Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
Put.Line[logSW, "Distribution of Kanji-String Lengths:"L];
Put.CR[logSW];
FOR i IN[0..MaxKanji] DO
IF kanjiLengthCt[i] # 0 THEN
OutLog[totalCt, kanjiLengthCt[i], i, logSW]
ELSE
IF kanjiLengthCt[MIN[i+1,MaxKanji]] # 0 THEN Put.CR[logSW];
ENDLOOP;
Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
Put.Line[logSW, "Distribution of Parts of Speech:"L];
Put.CR[logSW];
FOR i IN [0..MaxPos] DO
IF posCt[i] # 0 THEN
OutLog[totalCt, posCt[i], i, logSW]
ELSE
IF posCt[MIN[i+1,MaxPos]] # 0 THEN Put.CR[logSW];
ENDLOOP;
Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
Put.Line[logSW, "Distribution of Categories:"L];
Put.CR[logSW];
FOR i IN [0..MaxCat] DO
IF catCt[i] # 0 THEN
OutLog[totalCt, catCt[i], i, logSW]
ELSE
IF catCt[MIN[i+1,MaxCat]] # 0 THEN Put.CR[logSW];
ENDLOOP;
Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
Put.Line[logSW, "Distribution of Frequencies:"L];
Put.CR[logSW];
FOR i IN [0..MaxFreq] DO
IF freqCt[i] # 0 THEN
OutLog[totalCt, freqCt[i], i, logSW]
ELSE
IF freqCt[MIN[i+1,MaxFreq]] # 0 THEN Put.CR[logSW];
ENDLOOP;
Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
Put.Line[logSW, "Distribution of Logical Dictionaries:"L];
Put.CR[logSW];
FOR i IN [0..MaxLogicalDict] DO
IF logicalCt[i] # 0 THEN
OutLog[totalCt, logicalCt[i], i, logSW]
ELSE
IF logicalCt[MIN[i+1,MaxLogicalDict]] # 0 THEN Put.CR[logSW];
ENDLOOP;
Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
JDBMSDefs.heap.FREE[@entry];
END; --MakeStats
OutLog: PROCEDURE
[total, smTotal: LONG CARDINAL, i: CARDINAL, logSW: Window.Handle] =
BEGIN
temp: LONG CARDINAL ← 0;
ones,dec: LONG CARDINAL ← 0;
rowFormat: Format.NumberFormat ←
[base: 10, zerofill: FALSE, unsigned: TRUE, columns: 10];
Put.Number[logSW, i, rowFormat];
Put.LongNumber[logSW, smTotal, rowFormat];
temp ← (smTotal*10000/total)/10;
[ones, dec] ← Inline.LongDivMod[temp, 10];
Put.LongNumber[logSW, ones, rowFormat];
Put.Text[logSW, "."L];
Put.LongDecimal[logSW, dec];
Put.Text[logSW, "%"L];
Put.CR[logSW];
END; --of OutLog
END. --of DictionaryStats
LOG
M.Ando 26-Jul-83 14:16:12 Converted to Trinity