DIRECTORY Handwriting, IO, PFS, Rope, Xl USING [Point]; HandwritingImpl: CEDAR MONITOR IMPORTS PFS, IO EXPORTS Handwriting ~ BEGIN OPEN Handwriting; IsTrainingDB: PUBLIC PROC [x: REF] RETURNS [BOOL] = { RETURN [ISTYPE[x, LIST OF NormalizedStrokes]]; }; NarrowTrainingDB: PUBLIC PROC [x: REF] RETURNS [TrainingDB] = { RETURN [NARROW[x, LIST OF NormalizedStrokes]]; }; IsNormalized: PUBLIC PROC [x: REF] RETURNS [BOOL] = { RETURN [ISTYPE[x, NormalizedStrokes]]; }; NarrowNormalized: PUBLIC PROC [x: REF] RETURNS [Normalized] = { RETURN [NARROW[x, NormalizedStrokes]]; }; MCopyStroke: PROC [s: Stroke, dx: INT, dy: INT, f: INT] RETURNS [copy: Stroke ¬ NIL] = { IF s#NIL THEN { lag: Stroke ¬ copy ¬ LIST[[(s.first.x+dx)*f, (s.first.y+dy)*f]]; FOR seg: Stroke ¬ s.rest, seg.rest WHILE seg#NIL DO lag.rest ¬ LIST[[(seg.first.x+dx)*f, (seg.first.y+dy)*f]]; lag ¬ lag.rest ENDLOOP } }; MCopyStrokes: PROC [strokes: Strokes, dx: INT, dy: INT, f: INT] RETURNS [copy: Strokes ¬ NIL] = { IF strokes#NIL THEN { lag: Strokes ¬ copy ¬ LIST[MCopyStroke[strokes.first, dx, dy, f]]; FOR sl: Strokes ¬ strokes.rest, sl.rest WHILE sl#NIL DO lag.rest ¬ LIST[MCopyStroke[sl.first, dx, dy, f]]; lag ¬ lag.rest ENDLOOP } }; normalPoints: NAT = 16; EachPoint: TYPE = RECORD [x, y: INT]; NormalizedSingleStroke: TYPE = ARRAY [0..normalPoints) OF EachPoint; NormalizedStrokes: TYPE = REF NormalizedStrokesRec; NormalizedStrokesRec: TYPE = RECORD [ key: INT ¬ 0, strokes: Strokes ¬ NIL, dx, dy, f: INT, included: BOOL ¬ FALSE, data: SEQUENCE strokeCnt: NAT OF NormalizedSingleStroke ]; idxForLeng: NAT = 1; normalizedLengthConst: INT = 010000H; CopyNormalizedStrokes: PROC [ns: NormalizedStrokes] RETURNS [copy: NormalizedStrokes] = { copy ¬ NEW[NormalizedStrokesRec[ns.strokeCnt]]; copy.key ¬ ns.key; copy.strokes ¬ ns.strokes; --pointer copy because is readonly copy.dx ¬ ns.dx; copy.dy ¬ ns.dy; copy.f ¬ ns.f; copy.included ¬ FALSE; FOR i: NAT IN [0..ns.strokeCnt) DO copy.data[i] ¬ ns.data[i]; ENDLOOP; }; CountStrokes: PROC [strokes: Strokes] RETURNS [cnt: INT ¬ 0] = { FOR ss: Strokes ¬ strokes, ss.rest WHILE ss#NIL DO cnt ¬ cnt+1; ENDLOOP; }; NormalizeStrokes: PROC [strokes: Strokes] RETURNS [ns: NormalizedStrokes] = { factor: INT; totalLeng: INT ¬ 0; strokeCnt, strokeIdx: INT ¬ 0; min: Xl.Point ¬ [INT.LAST, INT.LAST]; max: Xl.Point ¬ [INT.FIRST, INT.FIRST]; av: Xl.Point ¬ [0, 0]; strokeCnt ¬ CountStrokes[strokes]; IF strokeCnt=0 THEN ERROR; ns ¬ NEW[NormalizedStrokesRec[strokeCnt]]; strokeIdx ¬ 0; FOR sl: Strokes ¬ strokes, sl.rest WHILE sl#NIL DO strokeLeng: INT ¬ 0; lastP: Xl.Point ¬ sl.first.first; FOR seg: Stroke ¬ sl.first, seg.rest WHILE seg#NIL DO nextP: Xl.Point ~ seg.first; min.x ¬ MIN[min.x, nextP.x]; min.y ¬ MIN[min.y, nextP.y]; max.x ¬ MAX[max.x, nextP.x]; max.y ¬ MAX[max.y, nextP.y]; strokeLeng ¬ strokeLeng + ABS[lastP.x-nextP.x] + ABS[lastP.y-nextP.y]; lastP ¬ nextP; ENDLOOP; ns.data[strokeIdx][idxForLeng].x ¬ strokeLeng; totalLeng ¬ totalLeng+strokeLeng; strokeIdx ¬ strokeIdx+1; ENDLOOP; ns.dx ¬ -(max.x+min.x)/2; ns.dy ¬-(max.y+min.y)/2; IF (max.x-min.x)<=3 AND (max.y-min.y)<=3 THEN { ns.f ¬ 1; ns.strokes ¬ MCopyStrokes[strokes: strokes, dx: ns.dx, dy: ns.dy, f: 1]; FOR si: NAT IN [1..strokeCnt) DO ns.data[si] ¬ ALL[[0, 0]]; ENDLOOP; RETURN; }; factor ¬ ns.f ¬ MAX[(normalizedLengthConst / MAX[totalLeng, 1]), 1]; strokes ¬ ns.strokes ¬ MCopyStrokes[strokes: strokes, dx: ns.dx, dy: ns.dy, f: factor]; strokeIdx ¬ 0; FOR ss: Strokes ¬ strokes, ss.rest WHILE ss#NIL DO seg: Stroke ¬ ss.first; next: Stroke; actualStrokeLeng: INT ¬ ns.data[strokeIdx][idxForLeng].x*factor; desiredPieceLeng: INT ¬ actualStrokeLeng/(normalPoints-1); sofarLeng: INT ¬ 0; goalLeng: INT ¬ 0; lastP: Xl.Point ¬ seg.first; ns.data[strokeIdx][0].x ¬ lastP.x; ns.data[strokeIdx][0].y ¬ lastP.y; FOR i: NAT IN [1..normalPoints) DO dx, dy, dl: INT ¬ 0; goalLeng ¬ sofarLeng+desiredPieceLeng; DO --eventually skip some segments next ¬ seg.rest; IF next=NIL THEN { dl ¬ 0; EXIT; }; dx ¬ next.first.x-lastP.x; dy ¬ next.first.y-lastP.y; dl ¬ ABS[dx] + ABS[dy]; IF sofarLeng+dl>=goalLeng THEN EXIT --found the right segment ELSE { sofarLeng ¬ sofarLeng+dl; seg ¬ next; lastP ¬ next.first; }; ENDLOOP; IF sofarLeng+dl<=goalLeng OR dl<=0 THEN { IF next#NIL THEN lastP ¬ next.first; } ELSE { desiredSegLeng: INT ¬ (goalLeng-sofarLeng); lastP.x ¬ lastP.x+(desiredSegLeng*dx)/dl; lastP.y ¬ lastP.y+(desiredSegLeng*dy)/dl; }; sofarLeng ¬ goalLeng; ns.data[strokeIdx][i].x ¬ lastP.x; ns.data[strokeIdx][i].y ¬ lastP.y; ENDLOOP; strokeIdx ¬ strokeIdx+1; ENDLOOP; IF strokeCnt>0 THEN { tx, ty: INT ¬ 0; FOR si: INT IN [0..strokeCnt) DO FOR pi: INT IN [0..normalPoints) DO tx ¬ tx + ns.data[si][pi].x; ty ¬ ty + ns.data[si][pi].y; ENDLOOP; ENDLOOP; tx ¬ tx/(normalPoints*strokeCnt); ty ¬ ty/(normalPoints*strokeCnt); FOR si: INT IN [0..strokeCnt) DO FOR pi: INT IN [0..normalPoints) DO ns.data[si][pi].x ¬ ns.data[si][pi].x - tx; ns.data[si][pi].y ¬ ns.data[si][pi].y - ty; ENDLOOP; ENDLOOP; }; }; Normalize: PUBLIC PROC [strokes: Strokes, ch: INT ¬ -1] RETURNS [REF] = { ns: NormalizedStrokes ¬ NormalizeStrokes[strokes]; ns.key ¬ ch; RETURN [ns]; }; ToStrokes: PROC [ns: NormalizedStrokes, sizemax: INT ¬ 40] RETURNS [strokes: Strokes ¬ NIL] = { normalize: INT; min: Xl.Point ¬ [INT.LAST, INT.LAST]; max: Xl.Point ¬ [INT.FIRST, INT.FIRST]; FOR si: INT DECREASING IN [0..ns.strokeCnt) DO FOR pi: INT DECREASING IN [0..normalPoints) DO p: Xl.Point ¬ [ ns.data[si][pi].x, ns.data[si][pi].y ]; min.x ¬ MIN[min.x, p.x]; min.y ¬ MIN[min.y, p.y]; max.x ¬ MAX[max.x, p.x]; max.y ¬ MAX[max.y, p.y]; ENDLOOP; ENDLOOP; normalize ¬ MAX[ABS[max.x], ABS[max.y], ABS[min.x], ABS[min.y], 1]; FOR si: INT DECREASING IN [0..ns.strokeCnt) DO s: Stroke ¬ NIL; FOR pi: INT DECREASING IN [0..normalPoints) DO p: Xl.Point ¬ [ ns.data[si][pi].x, ns.data[si][pi].y ]; p.x ¬ (p.x*sizemax)/normalize; p.y ¬ (p.y*sizemax)/normalize; s ¬ CONS[p, s]; ENDLOOP; strokes ¬ CONS[s, strokes]; ENDLOOP; }; UnNormalize: PUBLIC PROC [x: REF, sizemax: INT ¬ 40] RETURNS [strokes: Strokes] = { ns: NormalizedStrokes ¬ NARROW[x]; strokes ¬ ToStrokes[ns, sizemax]; }; CompareFine: PROC [ns1, ns2: NormalizedStrokes, limit: INT ¬ LAST[INT]] RETURNS [b: INT ¬ 0] = { CompI: PROC [n1, n2: NormalizedStrokes, sidx: NAT, pidx: [0..normalPoints)] RETURNS [INT] = INLINE { RETURN [ABS[n1.data[sidx][pidx].x-n2.data[sidx][pidx].x] + ABS[n1.data[sidx][pidx].y-n2.data[sidx][pidx].y]] }; IF ns1.strokeCnt#ns2.strokeCnt THEN RETURN [LAST[INT]]; FOR si: INT IN [0..ns1.strokeCnt) DO FOR pi: NAT IN [0..normalPoints) DO d: INT ~ CompI[ns1, ns2, si, pi]; b ¬ b + d * (1 + d / 256); IF b>limit THEN RETURN [b]; ENDLOOP; ENDLOOP; }; Evaluation: TYPE = RECORD [ b: INT ¬ INT.LAST, ns: NormalizedStrokes ¬ NIL ]; Search: PROC [ns: NormalizedStrokes, list: LIST OF NormalizedStrokes] RETURNS [best, secondBest: Evaluation] = { considerCount: NAT = 2; cache: ARRAY [0..considerCount) OF Evaluation; next: INT ¬ 0; Insert1: PROC [e: Evaluation] = INLINE { FOR i: NAT IN [0..considerCount) DO IF e.b0 THEN { PutInt[stream, nsStartKey]; PutNormalizedStrokes[stream, l.first]; }; ENDLOOP; PutInt[stream, dbFinishKey]; }; GetDB: PROC [stream: IO.STREAM] RETURNS [list: LIST OF NormalizedStrokes ¬ NIL] = { lag: LIST OF NormalizedStrokes ¬ NIL; i: INT ¬ GetInt[stream]; IF i#dbStartKey THEN ERROR; i ¬ GetInt[stream]; WHILE i=nsStartKey DO ns: NormalizedStrokes ¬ GetNormalizedStrokes[stream]; IF ns#NIL THEN list ¬ CONS[ns, list]; i ¬ GetInt[stream]; ENDLOOP; }; ReadDB: PUBLIC PROC [name: Rope.ROPE] RETURNS [TrainingDB] = { stream: IO.STREAM ¬ PFS.StreamOpen[PFS.PathFromRope[name]]; list: LIST OF NormalizedStrokes ¬ GetDB[stream]; IO.Close[stream]; RETURN [list] }; WriteDB: PUBLIC PROC [name: Rope.ROPE, db: TrainingDB] = { list: LIST OF NormalizedStrokes ~ NARROW[db]; stream: IO.STREAM ¬ PFS.StreamOpen[PFS.PathFromRope[name], create]; PutDB[stream, list]; IO.Close[stream]; }; MergeDB: PUBLIC PROC [into: TrainingDB ¬ NIL, db: TrainingDB] RETURNS [TrainingDB] = { inDB: LIST OF NormalizedStrokes ¬ NARROW[into]; list: LIST OF NormalizedStrokes ~ NARROW[db]; FOR l: LIST OF NormalizedStrokes ¬ list, l.rest WHILE l#NIL DO ns: NormalizedStrokes ~ CopyNormalizedStrokes[l.first]; inDB ¬ AddToDB[inDB, ns]; ENDLOOP; RETURN [inDB]; }; END. H HandwritingImpl.mesa Copyright Σ 1992 by Xerox Corporation. All rights reserved. Christian Jacobi, September 8, 1992 4:05 pm PDT Christian Jacobi, May 6, 1993 8:55 pm PDT Handwriting recognition engine Generic utilities Normalization --Count strokes and allocate memory --Find total bounding box and length --Copy with normalized size and position of original segments --Special case for a dot --Interpolate points --Find each interpolated point --reset offsets to move the origin to the mass center Recognition Training --Monitor protects the structure, but not the contents Input output File format Ascii because I can edit it (good only while no good data base software available) Also shorter then 4 byte integers Use original points so internal format can be changed Design rationale for copying db: prevents circular db's Κˆ•NewlineDelimiter ™code™Kšœ Οeœ1™K˜K˜K˜Kšœžœ˜šžœžœžœž˜"K˜Kšžœ˜—K˜—K˜šŸ œžœžœžœ ˜@šžœ žœžœž˜2J˜ Jšžœ˜—J˜K˜—šŸœžœžœ˜MJšœžœ˜ Jšœ žœ˜Jšœžœ˜Jš œžœžœžœžœ˜%Jš œžœžœžœžœ˜'J˜K™#K˜"Jšžœ žœžœ˜Jšœžœ"˜*K™$J˜šžœ žœžœž˜2Jšœ žœ˜J˜!šžœ"žœžœž˜5J˜Jšœžœžœ˜9Jšœžœžœ˜9Kšœžœžœ˜FJ˜Jšžœ˜—J˜.J˜!J˜Jšžœ˜—J™=K˜K˜šžœžœžœ˜/J™J˜ J˜Hšžœžœžœž˜ Jšœžœ ˜Jšžœ˜—Jšžœ˜J˜—Kšœžœžœ˜DK˜WJ™J˜šžœ žœžœž˜2J˜J˜ Kšœžœ+˜@Kšœžœ%˜:Jšœ žœ˜Jšœ žœ˜J˜J˜"J˜"J™šžœžœžœž˜"Jšœ žœ˜K˜'šžœ ˜"J˜šžœžœžœ˜J˜Jšžœ˜J˜—K˜K˜Kšœžœžœ˜šžœ˜Jšžœžœ ˜#šžœ˜J˜J˜ J˜J˜——Jšžœ˜—šžœžœ˜#šžœ˜Jšžœžœžœ˜$J˜—šžœ˜Jšœžœ˜+K˜)K˜)J˜——J˜J˜"J˜"Jšžœ˜—J˜Jšžœ˜—J™5šžœ žœ˜Jšœžœ˜šžœžœžœž˜ šžœžœžœž˜#J˜:Jšžœ˜ —Jšžœ˜ —J˜!J˜!šžœžœžœž˜ šžœžœžœž˜#J˜-J˜+Jšžœ˜ —Jšžœ˜ —J˜—J˜—J˜š Ÿ œžœžœžœžœžœ˜IK˜2K˜ Kšžœ˜ K˜—J˜š Ÿ œžœ"žœžœžœ˜_Jšœ žœ˜Jš œžœžœžœžœ˜%Jš œžœžœžœžœ˜'š žœžœž œžœž˜.š žœžœž œžœž˜.J˜7Jšœžœ ˜Jšœžœ ˜Jšœžœ ˜Jšœžœ ˜Jšžœ˜—Jšžœ˜—Jš œ žœžœ žœ žœ žœ ˜Cš žœžœž œžœž˜.Jšœ žœ˜š žœžœž œžœž˜.J˜7J˜J˜Jšœžœ˜Jšžœ˜—Jšœ žœ ˜Jšžœ˜—J˜J˜—š Ÿ œžœžœžœ žœžœ˜SKšœžœ˜"K˜!K˜K˜——™ šŸ œžœ&žœžœžœžœžœ ˜`š Ÿœžœ#žœžœžœžœ˜dKšžœžœ0žœ.˜lJ˜—Kš žœžœžœžœžœ˜7šžœžœžœž˜$šžœžœžœž˜#Kšœžœ˜!J˜Jšžœ žœžœ˜Jšžœ˜—Jšžœ˜—J˜—J˜šœ žœžœ˜Kšœžœžœžœ˜Kšœž˜K˜K˜—š Ÿœžœžœžœžœ#˜pJšœžœ˜Jšœžœžœ ˜.Jšœžœ˜šŸœžœžœ˜(šžœžœžœž˜#šžœžœ˜Jšœžœ˜"š žœžœž œžœ ž˜%J˜Jšžœ˜—J˜ Jšžœ˜J˜—Jšž˜—J˜—š žœžœžœ#žœžœž˜AJšœžœ˜#J˜Jšžœ˜—J˜J˜J˜J˜—šŸœžœžœ!žœžœžœžœ˜uJ˜Jšœžœžœžœ˜/Jšœžœ˜"J˜(šžœžœž˜KšœžœI˜T—šžœ žœž˜Kšœžœ7˜B—J˜J˜—šŸ œžœžœ$žœžœžœžœ˜nJ˜2Kšžœ˜%J˜—K˜—™š Ÿœžœžœžœžœ ˜?Kšœžœ˜"Kšžœžœžœ ˜K˜K˜—šŸœžœžœžœ ˜6Kšœžœ˜"Kšžœ žœžœ˜K˜ K˜J˜—š Ÿœžœžœžœ žœžœ˜Wšžœžœžœ ˜Kšžœžœžœ˜Kšžœžœžœžœ˜)—K˜—K˜šŸœžœžœžœ+žœžœžœ˜lKšžœžœžœ˜+Kšžœžœ ˜J˜J˜—š Ÿœžœžœ(žœžœ˜YJšœžœžœžœ˜-K˜2K˜ Kšžœ˜K˜—K˜šŸœžœžœ!žœ˜WJšœžœžœžœ˜-Kšžœžœ˜!K˜K˜—š Ÿœžœžœ!žœžœ˜ZJšœžœžœžœ˜-Jšœžœ˜"Jšžœžœžœžœ˜#š žœžœžœ"žœžœž˜>šžœ žœžœžœ˜#Jšžœ˜J˜—Jšžœ˜—J˜K˜—š Ÿ œžœžœžœžœ˜Hš Ÿ œžœžœžœžœ-˜QJ™6šžœžœžœ˜Jšœžœžœ˜)Jšžœžœžœ žœ˜0J˜—J˜—Jšœžœžœžœ˜-Jšœžœ˜"Jš žœžœžœžœžœ˜Jšžœžœžœ ˜)š žœžœžœ"žœžœž˜>šžœžœžœžœ˜*J˜Jšžœ˜J˜—Jšžœ˜—Kšžœ˜ K˜K˜——™ ™ J™RJ™!J™5—J˜Jšœ žœ ˜Jšœ žœ˜Jšœ žœ˜Jšœžœ˜J˜š Ÿœžœ žœžœžœ˜,Jšžœžœ ˜Jšžœ˜J˜J˜—š Ÿœžœ žœžœžœžœ˜2Jšžœ˜J˜J˜—š Ÿœžœ žœžœžœ˜MJšœžœ˜šžœžœžœž˜-Jšœžœ˜)Jšœžœ˜)Jšžœ˜—J˜J˜J˜—š Ÿœžœ žœžœžœžœ˜LJšœžœ˜šž˜J˜Jšžœžœžœ˜J˜Jšœžœ ˜Jšžœžœžœžœ˜3J˜ Jšžœ˜—J˜J˜—šŸœžœ žœžœ˜IJ˜J˜J˜J˜šžœžœžœž˜"J˜;J˜Jšž˜—J˜J˜—š Ÿœžœ žœžœžœ˜RJšœžœ˜Jšœžœ˜Jšœžœ˜Jšœ žœ˜ šžœžœžœž˜J˜)šžœžœžœ˜Jšœžœ ˜Jšžœžœžœžœ˜4J˜ J˜—Jšž˜—J˜J˜ J˜—J˜š Ÿœžœ žœžœžœžœ˜DJ˜š žœžœžœ"žœžœž˜>J˜ šžœžœžœžœ˜#J˜J˜&J˜—Jšžœ˜—J˜J˜J˜—šŸœžœ žœžœžœžœžœžœ˜SJšœžœžœžœ˜%Jšœžœ˜Jšžœžœžœ˜J˜šžœž˜J˜5Jšžœžœžœžœ ˜%J˜Jšžœ˜—J˜—J˜š Ÿœžœžœ žœžœ˜>Jš œžœžœžœ žœ˜;Jšœžœžœ#˜0Jšžœ˜Jšžœ˜ J˜J˜—šŸœžœžœ žœ˜:Jšœžœžœžœ˜-Jš œžœžœžœ žœ˜CJ˜Jšžœ˜J˜—J˜šŸœž œžœžœ˜VJ™7Jšœžœžœžœ˜/Jšœžœžœžœ˜-š žœžœžœ"žœžœž˜>J˜7J˜Jšžœ˜—Jšžœ˜J˜J˜——šžœ˜J™—K˜—…—3(Iψ