-- CreateLookupImpl2.mesa -- ryu 27-Jun-84 19:48:39 DIRECTORY CESDictDataDefs USING [DictBytesPerEntry, Entry], JLispLookupFile USING [MaxKanaPerEntry, MaxKanjiPerEntry, LEntry, DictPtr], CharDefs USING [Char, Code], PhonicDefs USING [Phonics], CreateLookupDictDefs, Heap USING [Create, Delete], Put USING [Text, Line, Decimal], Stream USING [Block, CompletionCode, Handle, PutByte, PutWord, GetBlock, SetPosition], Space USING [PageCount], Window USING [Handle]; CreateLookupImpl2: PROGRAM IMPORTS Stream, Heap, Put EXPORTS CreateLookupDictDefs = BEGIN -- Types IndexRecord: TYPE = RECORD [ kana: CharDefs.Code, flag: {internal, external} _ internal, count: CARDINAL _ 0, next: LONG POINTER TO IndexRecord _ NIL, lower: LONG POINTER TO IndexRecord _ NIL, firstI: CARDINAL _ 0, lastI: CARDINAL _ 0, dictPtr: JLispLookupFile.DictPtr _ [0, , 0]]; Posset: TYPE = RECORD [ pos: CARDINAL[0..377B] _ 0, firstE: CARDINAL _ 0, lastE: CARDINAL _ 0 ]; -- Constants Onesbyte: CARDINAL = 377B; nullPhonic: CharDefs.Code = PhonicDefs.Phonics[nullPhonic].ORD; nullPtr: JLispLookupFile.DictPtr = [0, , 0]; maxEntries : CARDINAL = 134; maxIndexCount: CARDINAL = 30; maxPossets : CARDINAL = 22; -- Variables pRootIndex, pI0, pI1, pI2: LONG POINTER TO IndexRecord _ NIL; currentIndex: CARDINAL _ 0; kana0, kana1, kana2: CharDefs.Code _ nullPhonic; nKanaCodes : CARDINAL _ 0; KanaCode : PACKED ARRAY [0..JLispLookupFile.MaxKanaPerEntry) OF CharDefs.Code; kanjiArray: ARRAY [0..JLispLookupFile.MaxKanjiPerEntry) OF CharDefs.Char; nKanjis: CARDINAL _ 0; mEntry: CESDictDataDefs.Entry; pMEntry: LONG POINTER TO CESDictDataDefs.Entry _ @mEntry; inBlock: Stream.Block _ [LOOPHOLE [LONG[@mEntry]], 0, CESDictDataDefs.DictBytesPerEntry]; nBytes: CARDINAL _ 0; currentDictPtr, oldDictPtr: JLispLookupFile.DictPtr; nLEntries : CARDINAL _ 0; pLEntry : ARRAY [0 .. maxEntries] OF LONG POINTER TO JLispLookupFile.LEntry; nPossets : CARDINAL _ 0; posset : ARRAY [0 .. maxPossets] OF Posset _ ALL [ [0, 0, 0] ]; myZone: UNCOUNTED ZONE; zoneSize: Space.PageCount = 200; firstTime: BOOLEAN; why: Stream.CompletionCode; -- Create is the Main Line of this module CreateLookupDict: PUBLIC PROCEDURE [masterStrH: Stream.Handle, lookupStrH: Stream.Handle, indexStrH: Stream.Handle, msgSW, logSW: Window.Handle] RETURNS [nEntry, ctSkipped: LONG CARDINAL] = BEGIN -- Initialize initializes the index table (first part) of the lookup dictionary Initialize: PROCEDURE[] = BEGIN myZone _ Heap.Create[initial: zoneSize, increment: 10, threshold: 16, checking: TRUE]; KanaCode _ ALL [nullPhonic]; -- kana0, kana1, kana2 _ nullPhonic; nLEntries _ 0; pLEntry _ ALL [NIL]; currentDictPtr.dictPageNo _ 0; currentDictPtr.relAddr _ 1; oldDictPtr _ currentDictPtr; Stream.SetPosition[indexStrH, LONG[0] ]; Stream.PutWord[lookupStrH, 0]; firstTime _ TRUE; END; -- Pass1 Pass1: PROCEDURE[] = BEGIN DO -- Pass 1 [nBytes, why] _ Stream.GetBlock[masterStrH, inBlock]; IF why = endOfStream THEN { LastEntry[pMEntry]; EXIT }; IF pMEntry.pos >= 1 AND pMEntry.pos <= 60B AND --(pMEntry.freq >= 8 OR pMEntry.kanji[1].code.ORD = Onesbyte AND pMEntry.freq >= 5) (pMEntry.kanji[1].code.ORD = Onesbyte OR pMEntry.freq >= 4) THEN AddEntry[pMEntry]; ENDLOOP; Put.Line[logSW, "End of Pass 1"L]; END; -- AddEntry AddEntry: PROCEDURE [pMEntry: LONG POINTER TO CESDictDataDefs.Entry] = BEGIN ConvertNullPhonic[pMEntry]; IF EqKana[KanaCode, pMEntry.kana] THEN SameHomoset[pMEntry] ELSE DifferentHomoset[pMEntry]; END; -- ConvertNullPhonic ConvertNullPhonic: PROCEDURE [pMEntry: LONG POINTER TO CESDictDataDefs.Entry] = BEGIN FOR i: CARDINAL IN [0..JLispLookupFile.MaxKanaPerEntry) DO IF pMEntry.kana[i] = Onesbyte THEN pMEntry.kana[i] _ nullPhonic; ENDLOOP; END; -- EqKana EqKana: PROCEDURE[kana1, kana2: PACKED ARRAY [0..JLispLookupFile.MaxKanaPerEntry) OF CharDefs.Code] RETURNS[BOOLEAN] = BEGIN FOR i: CARDINAL IN [0..JLispLookupFile.MaxKanaPerEntry) DO IF kana1[i] # kana2[i] THEN RETURN[FALSE]; IF kana1[i] = nullPhonic THEN RETURN[TRUE]; ENDLOOP; RETURN[TRUE] END; -- SameHomoset SameHomoset: PROCEDURE [pMEntry: LONG POINTER TO CESDictDataDefs.Entry] = BEGIN IF nLEntries >= maxEntries THEN { Put.Text[logSW, "Entry table overflow: "L]; FOR i: CARDINAL IN [0 .. nKanaCodes-1) DO Put.Decimal[logSW, KanaCode[i]]; Put.Text[logSW, ", "] ENDLOOP; Put.Decimal[logSW, KanaCode[nKanaCodes-1]]; Put.Line[logSW, " "L] } ELSE { pLEntry[nLEntries] _ ConvertEntry[pMEntry]; nLEntries _ nLEntries + 1; pI0.count _ pI0.count + 1; pI1.count _ pI1.count + 1; pI2.count _ pI2.count + 1; nEntry _ nEntry + 1 }; END; -- ConvertEntry ConvertEntry: PROCEDURE [pMEntry: LONG POINTER TO CESDictDataDefs.Entry] RETURNS [pLEntry: LONG POINTER TO JLispLookupFile.LEntry] = BEGIN FOR i: CARDINAL IN [0..JLispLookupFile.MaxKanjiPerEntry) DO kanjiArray[i] _ pMEntry.kanji[i]; IF kanjiArray[i].code # Onesbyte THEN nKanjis _ i + 1; ENDLOOP; pLEntry _ myZone.NEW[JLispLookupFile.LEntry[nKanjis]]; pLEntry.pos _ pMEntry.pos; pLEntry.pre _ 1; pLEntry.freq _ pMEntry.freq; pLEntry.nKanjis _ nKanjis; FOR i: CARDINAL IN [0..nKanjis) DO pLEntry.kanji[i] _ pMEntry.kanji[i] ENDLOOP; RETURN [pLEntry]; END; -- DifferentHomoset DifferentHomoset: PROCEDURE [pMEntry: LONG POINTER TO CESDictDataDefs.Entry] = BEGIN IF ~firstTime THEN OutputPrevHomoset[]; SetupKanaCodes[pMEntry]; ClearAllLEntries[]; pLEntry[0] _ ConvertEntry[pMEntry]; nLEntries _ 1; pI0.count _ pI0.count + 1; pI1.count _ pI1.count + 1; pI2.count _ pI2.count + 1; nEntry _ nEntry + 1; END; -- OutputPrevHomoset OutputPrevHomoset: PROCEDURE = BEGIN recsize: CARDINAL; SetupPossets[]; recsize _ TotalRecsize[]; Stream.PutWord[lookupStrH, recsize]; Stream.PutWord[lookupStrH, nKanaCodes]; FOR i: CARDINAL IN [0..nKanaCodes) DO Stream.PutByte[lookupStrH, KanaCode[i]] ENDLOOP; IF nKanaCodes MOD 2 # 0 THEN Stream.PutByte[lookupStrH, nullPhonic]; Stream.PutWord[lookupStrH, nPossets]; FOR i: CARDINAL IN [0.. nPossets) DO OutputPosset[ posset[i] ]; ENDLOOP; AdvancePtr[recsize]; END; -- SetupPossets SetupPossets: PROCEDURE[] = BEGIN SortPos[0, nLEntries-1]; nPossets _ 1; posset[0].pos _ pLEntry[0].pos; posset[0].firstE _ 0; FOR i: CARDINAL IN [1..nLEntries) DO IF pLEntry[i].pos ~= posset[nPossets-1].pos THEN { posset[nPossets-1].lastE _ i-1; posset[nPossets].pos _ pLEntry[i].pos; posset[nPossets].firstE _ i; nPossets _ nPossets + 1; } ENDLOOP; posset[nPossets-1].lastE _ nLEntries-1; END; -- TotalRecsize TotalRecsize: PROCEDURE[] RETURNS [recsize: CARDINAL] = BEGIN recsize _ CeilQuotient[nKanaCodes, 2] + 3; -- 3 for recsize, nKanaCodes, and nPossets FOR i: CARDINAL IN [0..nPossets) DO recsize _ recsize + 1; -- for pos and nEntries ENDLOOP; FOR i: CARDINAL IN [0..nLEntries) DO recsize _ recsize + pLEntry[i].nKanjis + 2 -- for nKanjis, and pre & freq ENDLOOP; RETURN[recsize]; END; -- SortPos SortPos: PROCEDURE[l, r: CARDINAL] = BEGIN temp: LONG POINTER TO JLispLookupFile.LEntry; FOR i: CARDINAL IN (l..r] DO FOR j: CARDINAL DECREASING IN [i..r] DO IF pLEntry[j].pos < pLEntry[j-1].pos THEN { temp _ pLEntry[j]; pLEntry[j] _ pLEntry[j-1]; pLEntry[j-1] _ temp;} ENDLOOP ENDLOOP END; -- SortPos: PROCEDURE[l, r: CARDINAL] = -- BEGIN -- i, j: CARDINAL; -- midpos: [0..377B]; -- temp: LONG POINTER TO JLispLookupFile.LEntry; -- -- IF l>=r THEN RETURN; -- i _ l; j _ r; -- midpos _ pLEntry[ (l+r)/2 ].pos; -- UNTIL i>j DO -- WHILE pLEntry[i].pos < midpos DO i _ i + 1 ENDLOOP; -- WHILE pLEntry[j].pos > midpos DO j _ j - 1 ENDLOOP; -- IF i<=j THEN { -- temp _ pLEntry[i]; pLEntry[i] _ pLEntry[j]; pLEntry[j] _ temp; -- i _ i + 1; j _ j - 1; } -- ENDLOOP; -- SortPos[l, j]; -- SortPos[i, r]; -- END; -- OutputPosset OutputPosset: PROCEDURE[posset: Posset] = BEGIN nEntries: CARDINAL[0..377B]; nEntries _ posset.lastE - posset.firstE + 1; Stream.PutByte[lookupStrH, posset.pos]; Stream.PutByte[lookupStrH, nEntries]; FOR i: CARDINAL IN [posset.firstE..posset.lastE] DO OutputEntry[ pLEntry[i] ]; ENDLOOP; END; -- OutputEntry OutputEntry: PROCEDURE[pLEntry: LONG POINTER TO JLispLookupFile.LEntry] = BEGIN Stream.PutWord[lookupStrH, pLEntry.nKanjis]; FOR i: CARDINAL IN [0..pLEntry.nKanjis) DO Stream.PutByte[lookupStrH, pLEntry.kanji[i].chset]; Stream.PutByte[lookupStrH, pLEntry.kanji[i].code] ENDLOOP; Stream.PutByte[lookupStrH, pLEntry.pre]; Stream.PutByte[lookupStrH, pLEntry.freq]; END; -- OutputDictPtr OutputDictPtr: PROCEDURE [strH: Stream.Handle, aDictPtr: JLispLookupFile.DictPtr] = BEGIN Stream.PutWord[strH, aDictPtr.dictPageNo]; Stream.PutByte[strH, aDictPtr.padding]; Stream.PutByte[strH, aDictPtr.relAddr]; END; -- AdvancePtr AdvancePtr: PROCEDURE[adv: CARDINAL] = BEGIN temp: CARDINAL; temp _ CARDINAL[currentDictPtr.relAddr] + adv; currentDictPtr.dictPageNo _ currentDictPtr.dictPageNo + temp/256; currentDictPtr.relAddr _ temp MOD 256; END; -- ClearAllLEntries ClearAllLEntries: PROCEDURE = BEGIN FOR i: CARDINAL IN [0.. nLEntries) DO myZone.FREE[ @pLEntry[i] ]; ENDLOOP; END; -- SetupKanaCodes SetupKanaCodes: PROCEDURE [pMEntry: LONG POINTER TO CESDictDataDefs.Entry] = BEGIN FOR i: CARDINAL IN [0..JLispLookupFile.MaxKanaPerEntry) DO KanaCode[i] _ pMEntry.kana[i]; IF KanaCode[i] # nullPhonic THEN nKanaCodes _ i + 1 ENDLOOP; IF KanaCode[0] # kana0 THEN NewKana0[] ELSE IF KanaCode[1] # kana1 THEN NewKana1[] ELSE IF KanaCode[2] # kana2 THEN NewKana2[]; END; -- NewKana0 NewKana0: PROCEDURE [] = BEGIN new: LONG POINTER TO IndexRecord; kana0 _ KanaCode[0]; kana1 _ KanaCode[1]; kana2 _ KanaCode[2]; pI2 _ myZone.NEW[IndexRecord]; pI2^ _ IndexRecord[kana: KanaCode[2], next: NIL, count: 0, flag: external, lower: NIL, dictPtr: currentDictPtr]; pI1 _ myZone.NEW[IndexRecord]; pI1^ _ IndexRecord[kana: KanaCode[1], next: NIL, count: 0, lower: pI2, flag: internal, dictPtr: nullPtr]; new _ myZone.NEW[IndexRecord]; IF firstTime THEN { firstTime _ FALSE; pRootIndex _ new} ELSE pI0.next _ new; pI0 _ new; pI0^ _ IndexRecord[kana: KanaCode[0], next: NIL, count: 0, lower: pI1, flag: internal, dictPtr: nullPtr]; END; -- NewKana1 NewKana1: PROCEDURE [] = BEGIN new: LONG POINTER TO IndexRecord; kana1 _ KanaCode[1]; kana2 _ KanaCode[2]; pI2 _ myZone.NEW[IndexRecord]; pI2^ _ IndexRecord[kana: kana2, next: NIL, count: 0, flag: external, dictPtr: currentDictPtr]; new _ myZone.NEW[IndexRecord]; pI1.next _ new; pI1 _ new; pI1^ _ IndexRecord[kana: kana1, next: NIL, count: 0, flag: internal, lower: pI2, dictPtr: nullPtr]; END; -- NewKana2 NewKana2: PROCEDURE [] = BEGIN new: LONG POINTER TO IndexRecord; kana2 _ KanaCode[2]; new _ myZone.NEW[IndexRecord]; pI2.next _ new; pI2 _ new; pI2^ _ IndexRecord[kana: kana2, next: NIL, count: 0, flag: external, dictPtr: currentDictPtr]; END; -- LastEntry LastEntry: PROCEDURE [pMEntry: LONG POINTER TO CESDictDataDefs.Entry] = BEGIN OutputPrevHomoset[]; ClearAllLEntries[]; END; -- CeilQuotient CeilQuotient: PROCEDURE [x, y: CARDINAL] RETURNS[CARDINAL] = BEGIN IF x MOD y = 0 THEN RETURN[x/y] ELSE RETURN[x/y+1] END; -- Pass2 Pass2: PROCEDURE [] = BEGIN FOR pi: LONG POINTER TO IndexRecord _ pRootIndex, pi.next UNTIL pi = NIL DO MergeIndex[pi]; ENDLOOP; Put.Line[logSW, "End of Pass 2"L]; END; -- MergeIndex MergeIndex: PROCEDURE [pI: LONG POINTER TO IndexRecord] = BEGIN IF pI = NIL OR pI.flag = external THEN RETURN; FOR pi: LONG POINTER TO IndexRecord _ pI, pi.next UNTIL pi = NIL DO IF pi.flag = internal THEN { MergeIndex[pi.lower]; IF pi.count <= maxIndexCount OR pi.lower.next = NIL THEN { pi.flag _ external; pi.dictPtr _ pi.lower.dictPtr; pi.lower _ NIL; -- ? -- FreeIndex[pi.lower] -- } } ENDLOOP; END; -- FreeIndex FreeIndex: PROCEDURE [pI: LONG POINTER TO IndexRecord] = BEGIN this, next: LONG POINTER TO IndexRecord; IF pI = NIL THEN RETURN; this _ pI; next _ this.next; WHILE next ~= NIL DO myZone.FREE[ @this ]; this _ next; next _ this.next; ENDLOOP; myZone.FREE[ @this ]; END; -- Pass3 Pass3: PROCEDURE [] = BEGIN level0Count, level0F, level0L: CARDINAL; first, last: CARDINAL; level0L _ TotalCount[pRootIndex]; level0Count _ OneLevelCount[pRootIndex]; level0F _ level0L - level0Count + 1; OutputIndexHead[level0L, level0F, level0L]; [first, last] _ OutputIndices[pRootIndex]; IF first ~= level0F THEN { Put.Text[logSW, "level0F calculation error ... Calculated = "L]; Put.Decimal[logSW, level0F]; Put.Text[logSW, ", but real value = "L]; Put.Decimal[logSW, first]; Put.Line[logSW, " "L] }; IF last ~= level0L THEN { Put.Text[logSW, "level0L calculation error ... Calculated = "L]; Put.Decimal[logSW, level0L]; Put.Text[logSW, ", but real value = "L]; Put.Decimal[logSW, last]; Put.Line[logSW, " "L] }; Put.Line[logSW, "End of Pass 3"L]; END; -- OneLevelCount OneLevelCount: PROCEDURE [pI: LONG POINTER TO IndexRecord] RETURNS[count: CARDINAL] = BEGIN count _ 0; FOR pi: LONG POINTER TO IndexRecord _ pI, pi.next UNTIL pi = NIL DO count _ count + 1; ENDLOOP; RETURN[count]; END; -- TotalCount TotalCount: PROCEDURE [pI: LONG POINTER TO IndexRecord] RETURNS[count: CARDINAL] = BEGIN count _ 0; FOR pi: LONG POINTER TO IndexRecord _ pI, pi.next UNTIL pi = NIL DO IF pi.flag = internal THEN count _ count + 1 + TotalCount[pi.lower] ELSE count _ count + 1; ENDLOOP; RETURN [count]; END; -- OutputIndexHead OutputIndexHead: PROCEDURE [size, level0F, level0L: CARDINAL] = BEGIN Stream.PutWord[indexStrH, size]; Stream.PutWord[indexStrH, level0F]; Stream.PutWord[indexStrH, level0L]; currentIndex _ 1; END; -- OutputIndices OutputIndices: PROCEDURE [pI: LONG POINTER TO IndexRecord] RETURNS[first,last: CARDINAL] = BEGIN FOR pi: LONG POINTER TO IndexRecord _ pI, pi.next UNTIL pi = NIL DO IF pi.flag = internal THEN [pi.firstI, pi.lastI] _ OutputIndices[pi.lower]; ENDLOOP; first _ currentIndex; FOR pi: LONG POINTER TO IndexRecord _ pI, pi.next UNTIL pi = NIL DO OutputIndex[pi]; ENDLOOP; last _ currentIndex - 1; RETURN [first, last]; END; -- OutputIndex OutputIndex: PROCEDURE[pI: LONG POINTER TO IndexRecord] = BEGIN Stream.PutByte[indexStrH, pI.kana]; IF pI.flag = internal THEN { Stream.PutByte[indexStrH, pI.flag.ORD]; Stream.PutWord[indexStrH, pI.firstI]; Stream.PutWord[indexStrH, pI.lastI] } ELSE { Stream.PutByte[indexStrH, pI.flag.ORD]; OutputDictPtr[indexStrH, pI.dictPtr] }; currentIndex _ currentIndex + 1; END; -- main line of Create Initialize[]; nEntry _ 1; Pass1[]; Pass2[]; Pass3[]; Heap.Delete[myZone,TRUE]; RETURN[nEntry, 0]; END; -- of Create END.