--DictionaryStatsImpl.mesa
--Last edit by : S.Umehara for OIS to CES conversion, 12-Jun-84  0:44:57 PDT

--Calculates statistics on the master dictionary

DIRECTORY
  DictionaryStatsDefs USING [],
  Environment USING [Block],
  Format USING [NumberFormat],
  Inline USING [LongDivMod],
  JDBMSDefs USING [heap],
  CESDictDataDefs USING [DictBytesPerEntry, Entry],
  Put USING [CR, Line, LongDecimal, LongNumber, Number, Text],
  Stream USING [CompletionCode, GetBlock, Handle, SetPosition],
  Window USING [Handle];

DictionaryStatsImpl: PROGRAM 
  IMPORTS Inline, JDBMSDefs, Put, Stream 
  EXPORTS DictionaryStatsDefs =

BEGIN

--Constants
  MaxKana: CARDINAL = 14;
  MaxKanji: CARDINAL = 7;
  onesbyte: CARDINAL = 377B;
  MaxFreq: CARDINAL = 10;  --[0..9]
  MaxCat: CARDINAL = 26B; -- old Data is 23B changed by M.Ando
  MaxPos: CARDINAL = 377B;
  MaxLogicalDict: CARDINAL = 17B;

  MakeStats: PUBLIC PROCEDURE
      [masterDictStrH: Stream.Handle, logSW: Window.Handle] =
    BEGIN

    why: Stream.CompletionCode;
    nBytes: CARDINAL;
    dataBlock: Environment.Block;
    entry: LONG POINTER TO CESDictDataDefs.Entry ← 
      JDBMSDefs.heap.NEW[CESDictDataDefs.Entry];
    totalCt: LONG CARDINAL ← 0;
    kanaLengthCt:  ARRAY [0..MaxKana] OF LONG CARDINAL ← ALL[0];
    kanjiLengthCt: ARRAY [0..MaxKanji] OF LONG CARDINAL ← ALL[0];
    posCt: ARRAY [0..MaxPos] OF LONG CARDINAL ← ALL[0];
    catCt: ARRAY [0..MaxCat] OF LONG CARDINAL ← ALL[0];
    freqCt: ARRAY [0..MaxFreq] OF LONG CARDINAL ← ALL[0];
    logicalCt: ARRAY [0..MaxLogicalDict] OF LONG CARDINAL ← ALL[0];
    i: CARDINAL ← 0;

    dataBlock.startIndex ← 0;
    dataBlock.stopIndexPlusOne ← CESDictDataDefs.DictBytesPerEntry;
    dataBlock.blockPointer ← LOOPHOLE[entry];

    Stream.SetPosition[masterDictStrH, LONG[0]];
    DO
      [nBytes,why,] ← Stream.GetBlock[masterDictStrH,dataBlock];
      IF why = endOfStream THEN EXIT;
      totalCt ← totalCt+1;

      DO
        IF entry.kana[i] = onesbyte THEN EXIT
	ELSE
	  {i ← i+1;
	  IF i = MaxKana THEN EXIT ELSE LOOP;};
      ENDLOOP;
      kanaLengthCt[i] ← kanaLengthCt[i]+1;

      i ← 0;
      DO
        IF entry.kanji[i].code = onesbyte THEN EXIT
	ELSE
	  {i ← i+1;
	  IF i = MaxKanji THEN EXIT ELSE LOOP;};
      ENDLOOP;
      kanjiLengthCt[i] ← kanjiLengthCt[i]+1;

      posCt[entry.pos] ← posCt[entry.pos]+1;
      catCt[entry.cat] ← catCt[entry.cat]+1;
      freqCt[entry.freq] ← freqCt[entry.freq]+1;
      logicalCt[entry.logicalDict] ← logicalCt[entry.logicalDict]+1;

    ENDLOOP;

    --Print the statistics

    Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];
    Put.Text[logSW, "Total Number of Entries:  "L];
    Put.LongDecimal[logSW, totalCt];
    Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];

    Put.Line[logSW, "Distribution of Kana-String Lengths:"L];
    Put.CR[logSW];
    FOR i IN [0..MaxKana] DO
      IF kanaLengthCt[i] # 0 THEN
        OutLog[totalCt, kanaLengthCt[i], i, logSW]
      ELSE
        IF kanaLengthCt[MIN[i+1,MaxKana]] # 0 THEN Put.CR[logSW];
    ENDLOOP;
    Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];

    Put.Line[logSW, "Distribution of Kanji-String Lengths:"L];
    Put.CR[logSW];
    FOR i IN[0..MaxKanji] DO
      IF kanjiLengthCt[i] # 0 THEN
        OutLog[totalCt, kanjiLengthCt[i], i, logSW]
      ELSE
        IF kanjiLengthCt[MIN[i+1,MaxKanji]] # 0 THEN Put.CR[logSW];
    ENDLOOP;
    Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW]; 

    Put.Line[logSW, "Distribution of Parts of Speech:"L];
    Put.CR[logSW];
    FOR i IN [0..MaxPos] DO
      IF posCt[i] # 0 THEN
        OutLog[totalCt, posCt[i], i, logSW]
      ELSE
        IF posCt[MIN[i+1,MaxPos]] # 0 THEN Put.CR[logSW];
    ENDLOOP;
    Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];

    Put.Line[logSW, "Distribution of Categories:"L];
    Put.CR[logSW];
    FOR i IN [0..MaxCat] DO
      IF catCt[i] # 0 THEN
        OutLog[totalCt, catCt[i], i, logSW]
      ELSE
        IF catCt[MIN[i+1,MaxCat]] # 0 THEN Put.CR[logSW];
    ENDLOOP;
    Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];

    Put.Line[logSW, "Distribution of Frequencies:"L];
    Put.CR[logSW];
    FOR i IN [0..MaxFreq] DO
      IF freqCt[i] # 0 THEN
        OutLog[totalCt, freqCt[i], i, logSW]
      ELSE
        IF freqCt[MIN[i+1,MaxFreq]] # 0 THEN Put.CR[logSW];
    ENDLOOP;
    Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];

    Put.Line[logSW, "Distribution of Logical Dictionaries:"L];
    Put.CR[logSW];
    FOR i IN [0..MaxLogicalDict] DO
      IF logicalCt[i] # 0 THEN
        OutLog[totalCt, logicalCt[i], i, logSW]
      ELSE
        IF logicalCt[MIN[i+1,MaxLogicalDict]] # 0 THEN Put.CR[logSW];
    ENDLOOP;
    Put.CR[logSW]; Put.CR[logSW]; Put.CR[logSW];

    JDBMSDefs.heap.FREE[@entry];   
    END; --MakeStats


    OutLog: PROCEDURE
        [total, smTotal: LONG CARDINAL, i: CARDINAL, logSW: Window.Handle] =
      BEGIN
      temp: LONG CARDINAL ← 0;
      ones,dec: LONG CARDINAL ← 0;
      rowFormat: Format.NumberFormat ←
        [base: 10, zerofill: FALSE, unsigned: TRUE, columns: 10]; 

      Put.Number[logSW, i, rowFormat];
      Put.LongNumber[logSW, smTotal, rowFormat];
      temp ← (smTotal*10000/total)/10;
      [ones, dec] ← Inline.LongDivMod[temp, 10];
      Put.LongNumber[logSW, ones, rowFormat];
      Put.Text[logSW, "."L];
      Put.LongDecimal[logSW, dec];
      Put.Text[logSW, "%"L];
      Put.CR[logSW];

      END; --of OutLog


 END.   --of DictionaryStats   

LOG
  M.Ando 26-Jul-83 14:16:12  Converted to Trinity