[_CDCSL_93-16_]<1>Cedar>release>X11HandWriting>HandwritingImpl.mesa

HandwritingImpl.mesa

Christian Jacobi, September 8, 1992 4:05 pm PDT

Christian Jacobi, May 6, 1993 8:55 pm PDT

Handwriting recognition engine

DIRECTORY

Handwriting,

IO,

PFS,

Rope,

Xl USING [Point];

HandwritingImpl: CEDAR MONITOR

IMPORTS PFS, IO

EXPORTS Handwriting ~

BEGIN OPEN Handwriting;

Generic utilities

IsTrainingDB: PUBLIC PROC [x: REF] RETURNS [BOOL] = {

RETURN [ISTYPE[x, LIST OF NormalizedStrokes]];

};

NarrowTrainingDB: PUBLIC PROC [x: REF] RETURNS [TrainingDB] = {

RETURN [NARROW[x, LIST OF NormalizedStrokes]];

};

IsNormalized: PUBLIC PROC [x: REF] RETURNS [BOOL] = {

RETURN [ISTYPE[x, NormalizedStrokes]];

};

NarrowNormalized: PUBLIC PROC [x: REF] RETURNS [Normalized] = {

RETURN [NARROW[x, NormalizedStrokes]];

};

MCopyStroke: PROC [s: Stroke, dx: INT, dy: INT, f: INT] RETURNS [copy: Stroke ¬ NIL] = {

IF s#NIL THEN {

lag: Stroke ¬ copy ¬ LIST[[(s.first.x+dx)*f, (s.first.y+dy)*f]];

FOR seg: Stroke ¬ s.rest, seg.rest WHILE seg#NIL DO

lag.rest ¬ LIST[[(seg.first.x+dx)*f, (seg.first.y+dy)*f]];

lag ¬ lag.rest

ENDLOOP

}

};

MCopyStrokes: PROC [strokes: Strokes, dx: INT, dy: INT, f: INT] RETURNS [copy: Strokes ¬ NIL] = {

IF strokes#NIL THEN {

lag: Strokes ¬ copy ¬ LIST[MCopyStroke[strokes.first, dx, dy, f]];

FOR sl: Strokes ¬ strokes.rest, sl.rest WHILE sl#NIL DO

lag.rest ¬ LIST[MCopyStroke[sl.first, dx, dy, f]];

lag ¬ lag.rest

ENDLOOP

}

};

Normalization

normalPoints: NAT = 16;

EachPoint: TYPE = RECORD [x, y: INT];

NormalizedSingleStroke: TYPE = ARRAY [0..normalPoints) OF EachPoint;

NormalizedStrokes: TYPE = REF NormalizedStrokesRec;

NormalizedStrokesRec: TYPE = RECORD [

key: INT ¬ 0,

strokes: Strokes ¬ NIL,

dx, dy, f: INT,

included: BOOL ¬ FALSE,

data: SEQUENCE strokeCnt: NAT OF NormalizedSingleStroke

];

idxForLeng: NAT = 1;

normalizedLengthConst: INT = 010000H;

CopyNormalizedStrokes: PROC [ns: NormalizedStrokes] RETURNS [copy: NormalizedStrokes] = {

copy ¬ NEW[NormalizedStrokesRec[ns.strokeCnt]];

copy.key ¬ ns.key;

copy.strokes ¬ ns.strokes; --pointer copy because is readonly

copy.dx ¬ ns.dx;

copy.dy ¬ ns.dy;

copy.f ¬ ns.f;

copy.included ¬ FALSE;

FOR i: NAT IN [0..ns.strokeCnt) DO

copy.data[i] ¬ ns.data[i];

ENDLOOP;

};

CountStrokes: PROC [strokes: Strokes] RETURNS [cnt: INT ¬ 0] = {

FOR ss: Strokes ¬ strokes, ss.rest WHILE ss#NIL DO

cnt ¬ cnt+1;

ENDLOOP;

};

NormalizeStrokes: PROC [strokes: Strokes] RETURNS [ns: NormalizedStrokes] = {

factor: INT;

totalLeng: INT ¬ 0;

strokeCnt, strokeIdx: INT ¬ 0;

min: Xl.Point ¬ [INT.LAST, INT.LAST];

max: Xl.Point ¬ [INT.FIRST, INT.FIRST];

av: Xl.Point ¬ [0, 0];

--Count strokes and allocate memory

strokeCnt ¬ CountStrokes[strokes];

IF strokeCnt=0 THEN ERROR;

ns ¬ NEW[NormalizedStrokesRec[strokeCnt]];

--Find total bounding box and length

strokeIdx ¬ 0;

FOR sl: Strokes ¬ strokes, sl.rest WHILE sl#NIL DO

strokeLeng: INT ¬ 0;

lastP: Xl.Point ¬ sl.first.first;

FOR seg: Stroke ¬ sl.first, seg.rest WHILE seg#NIL DO

nextP: Xl.Point ~ seg.first;

min.x ¬ MIN[min.x, nextP.x]; min.y ¬ MIN[min.y, nextP.y];

max.x ¬ MAX[max.x, nextP.x]; max.y ¬ MAX[max.y, nextP.y];

strokeLeng ¬ strokeLeng + ABS[lastP.x-nextP.x] + ABS[lastP.y-nextP.y];

lastP ¬ nextP;

ENDLOOP;

ns.data[strokeIdx][idxForLeng].x ¬ strokeLeng;

totalLeng ¬ totalLeng+strokeLeng;

strokeIdx ¬ strokeIdx+1;

ENDLOOP;

--Copy with normalized size and position of original segments

ns.dx ¬ -(max.x+min.x)/2;

ns.dy ¬-(max.y+min.y)/2;

IF (max.x-min.x)<=3 AND (max.y-min.y)<=3 THEN {

--Special case for a dot

ns.f ¬ 1;

ns.strokes ¬ MCopyStrokes[strokes: strokes, dx: ns.dx, dy: ns.dy, f: 1];

FOR si: NAT IN [1..strokeCnt) DO

ns.data[si] ¬ ALL[[0, 0]];

ENDLOOP;

RETURN;

};

factor ¬ ns.f ¬ MAX[(normalizedLengthConst / MAX[totalLeng, 1]), 1];

strokes ¬ ns.strokes ¬ MCopyStrokes[strokes: strokes, dx: ns.dx, dy: ns.dy, f: factor];

--Interpolate points

strokeIdx ¬ 0;

FOR ss: Strokes ¬ strokes, ss.rest WHILE ss#NIL DO

seg: Stroke ¬ ss.first;

next: Stroke;

actualStrokeLeng: INT ¬ ns.data[strokeIdx][idxForLeng].x*factor;

desiredPieceLeng: INT ¬ actualStrokeLeng/(normalPoints-1);

sofarLeng: INT ¬ 0;

goalLeng: INT ¬ 0;

lastP: Xl.Point ¬ seg.first;

ns.data[strokeIdx][0].x ¬ lastP.x;

ns.data[strokeIdx][0].y ¬ lastP.y;

--Find each interpolated point

FOR i: NAT IN [1..normalPoints) DO

dx, dy, dl: INT ¬ 0;

goalLeng ¬ sofarLeng+desiredPieceLeng;

DO --eventually skip some segments

next ¬ seg.rest;

IF next=NIL THEN {

dl ¬ 0;

EXIT;

};

dx ¬ next.first.x-lastP.x;

dy ¬ next.first.y-lastP.y;

dl ¬ ABS[dx] + ABS[dy];

IF sofarLeng+dl>=goalLeng

THEN EXIT --found the right segment

ELSE {

sofarLeng ¬ sofarLeng+dl;

seg ¬ next;

lastP ¬ next.first;

};

ENDLOOP;

IF sofarLeng+dl<=goalLeng OR dl<=0

THEN {

IF next#NIL THEN lastP ¬ next.first;

}

ELSE {

desiredSegLeng: INT ¬ (goalLeng-sofarLeng);

lastP.x ¬ lastP.x+(desiredSegLeng*dx)/dl;

lastP.y ¬ lastP.y+(desiredSegLeng*dy)/dl;

};

sofarLeng ¬ goalLeng;

ns.data[strokeIdx][i].x ¬ lastP.x;

ns.data[strokeIdx][i].y ¬ lastP.y;

ENDLOOP;

strokeIdx ¬ strokeIdx+1;

ENDLOOP;

--reset offsets to move the origin to the mass center

IF strokeCnt>0 THEN {

tx, ty: INT ¬ 0;

FOR si: INT IN [0..strokeCnt) DO

FOR pi: INT IN [0..normalPoints) DO

tx ¬ tx + ns.data[si][pi].x; ty ¬ ty + ns.data[si][pi].y;

ENDLOOP;

tx ¬ tx/(normalPoints*strokeCnt);

ty ¬ ty/(normalPoints*strokeCnt);

FOR si: INT IN [0..strokeCnt) DO

FOR pi: INT IN [0..normalPoints) DO

ns.data[si][pi].x ¬ ns.data[si][pi].x - tx;

ns.data[si][pi].y ¬ ns.data[si][pi].y - ty;

ENDLOOP;

};

Normalize: PUBLIC PROC [strokes: Strokes, ch: INT ¬ -1] RETURNS [REF] = {

ns: NormalizedStrokes ¬ NormalizeStrokes[strokes];

ns.key ¬ ch;

RETURN [ns];

};

ToStrokes: PROC [ns: NormalizedStrokes, sizemax: INT ¬ 40] RETURNS [strokes: Strokes ¬ NIL] = {

normalize: INT;

min: Xl.Point ¬ [INT.LAST, INT.LAST];

max: Xl.Point ¬ [INT.FIRST, INT.FIRST];

FOR si: INT DECREASING IN [0..ns.strokeCnt) DO

FOR pi: INT DECREASING IN [0..normalPoints) DO

p: Xl.Point ¬ [ ns.data[si][pi].x, ns.data[si][pi].y ];

min.x ¬ MIN[min.x, p.x];

min.y ¬ MIN[min.y, p.y];

max.x ¬ MAX[max.x, p.x];

max.y ¬ MAX[max.y, p.y];

ENDLOOP;

normalize ¬ MAX[ABS[max.x], ABS[max.y], ABS[min.x], ABS[min.y], 1];

FOR si: INT DECREASING IN [0..ns.strokeCnt) DO

s: Stroke ¬ NIL;

FOR pi: INT DECREASING IN [0..normalPoints) DO

p: Xl.Point ¬ [ ns.data[si][pi].x, ns.data[si][pi].y ];

p.x ¬ (p.x*sizemax)/normalize;

p.y ¬ (p.y*sizemax)/normalize;

s ¬ CONS[p, s];

ENDLOOP;

strokes ¬ CONS[s, strokes];

ENDLOOP;

};

UnNormalize: PUBLIC PROC [x: REF, sizemax: INT ¬ 40] RETURNS [strokes: Strokes] = {

ns: NormalizedStrokes ¬ NARROW[x];

strokes ¬ ToStrokes[ns, sizemax];

};

Recognition

CompareFine: PROC [ns1, ns2: NormalizedStrokes, limit: INT ¬ LAST[INT]] RETURNS [b: INT ¬ 0] = {

CompI: PROC [n1, n2: NormalizedStrokes, sidx: NAT, pidx: [0..normalPoints)] RETURNS [INT] = INLINE {

RETURN [ABS[n1.data[sidx][pidx].x-n2.data[sidx][pidx].x] + ABS[n1.data[sidx][pidx].y-n2.data[sidx][pidx].y]]

};

IF ns1.strokeCnt#ns2.strokeCnt THEN RETURN [LAST[INT]];

FOR si: INT IN [0..ns1.strokeCnt) DO

FOR pi: NAT IN [0..normalPoints) DO

d: INT ~ CompI[ns1, ns2, si, pi];

b ¬ b + d * (1 + d / 256);

IF b>limit THEN RETURN [b];

ENDLOOP;

};

Evaluation: TYPE = RECORD [

b: INT ¬ INT.LAST,

ns: NormalizedStrokes ¬ NIL

];

Search: PROC [ns: NormalizedStrokes, list: LIST OF NormalizedStrokes] RETURNS [best, secondBest: Evaluation] = {

considerCount: NAT = 2;

cache: ARRAY [0..considerCount) OF Evaluation;

next: INT ¬ 0;

Insert1: PROC [e: Evaluation] = INLINE {

FOR i: NAT IN [0..considerCount) DO

IF e.b<cache[i].b THEN {

next ¬ MIN[next+1, considerCount];

FOR n: NAT DECREASING IN (i..next) DO

cache[n] ¬ cache[n-1];

ENDLOOP;

cache[i] ¬ e;

RETURN;

}

ENDLOOP

};

FOR l1: LIST OF NormalizedStrokes ¬ list, l1.rest WHILE l1#NIL DO

b: INT ¬ CompareFine[ns, l1.first];

Insert1[[ns: l1.first, b: b]];

ENDLOOP;

best ¬ cache[0];

secondBest ¬ cache[1];

};

RecognizeNormalized: PUBLIC PROC [db: TrainingDB, x: Normalized] RETURNS [list: LIST OF Handwriting.Report ¬ NIL] = {

best, secondBest: Evaluation;

dbList: LIST OF NormalizedStrokes ~ NARROW[db];

ns: NormalizedStrokes ~ NARROW[x];

[best, secondBest] ¬ Search[ns, dbList];

IF secondBest.ns#NIL THEN

list ¬ CONS[[ch: secondBest.ns.key, x: secondBest.ns, badness: secondBest.b], list];

IF best.ns#NIL THEN

list ¬ CONS[[ch: best.ns.key, x: best.ns, badness: best.b], list];

};

Recognize: PUBLIC PROC [db: TrainingDB, strokes: Strokes] RETURNS [list: LIST OF Handwriting.Report ¬ NIL] = {

ns: NormalizedStrokes ~ NormalizeStrokes[strokes];

RETURN [RecognizeNormalized[db, ns]];

};

Training

GetChar: PUBLIC PROC [x: Normalized] RETURNS [ch: INT ¬ -1] = {

ns: NormalizedStrokes ¬ NARROW[x];

IF ns#NIL THEN ch ¬ ns.key;

};

SetChar: PUBLIC PROC [x: Normalized, ch: INT ¬ -1] = {

ns: NormalizedStrokes ¬ NARROW[x];

IF ns.included THEN ERROR;

ns.key ¬ ch;

};

SetIncludeFlag: ENTRY PROC [ns: NormalizedStrokes] RETURNS [mustRaise: BOOL ¬ TRUE] = {

IF ns=NIL OR ns.included

THEN RETURN [TRUE]

ELSE {ns.included ¬ TRUE; RETURN [FALSE]}

};

AddToDB: PROC [db: LIST OF NormalizedStrokes, ns: NormalizedStrokes] RETURNS [LIST OF NormalizedStrokes] = {

IF SetIncludeFlag[ns].mustRaise THEN ERROR;

RETURN [CONS[ns, db]];

};

TrainDB: PUBLIC PROC [db: TrainingDB, strokes: Strokes, ch: INT] RETURNS [TrainingDB] = {

list: LIST OF NormalizedStrokes ¬ NARROW[db];

ns: NormalizedStrokes ¬ NormalizeStrokes[strokes];

ns.key ¬ ch;

RETURN [AddToDB[list, ns]];

};

TrainDBNormalized: PUBLIC PROC [db: TrainingDB, x: Normalized] RETURNS [TrainingDB] = {

list: LIST OF NormalizedStrokes ¬ NARROW[db];

RETURN [AddToDB[list, NARROW[x]]]

};

NextNormalized: PUBLIC PROC [db: TrainingDB, x: Normalized] RETURNS [Normalized ¬ NIL] = {

list: LIST OF NormalizedStrokes ¬ NARROW[db];

ns: NormalizedStrokes ¬ NARROW[x];

IF ns=NIL THEN RETURN [list.first];

FOR l: LIST OF NormalizedStrokes ¬ list, l.rest WHILE l#NIL DO

IF l.first=ns AND l.rest#NIL THEN {

RETURN [l.rest.first];

}

ENDLOOP;

};

UnTrainDB: PUBLIC PROC [db: TrainingDB, x: REF] RETURNS [TrainingDB] = {

RemoveNext: ENTRY PROC [lag: LIST OF NormalizedStrokes, x: NormalizedStrokes] = {

--Monitor protects the structure, but not the contents

IF lag#NIL THEN {

lr: LIST OF NormalizedStrokes ~ lag.rest;

IF lr#NIL AND lr.first=x THEN lag.rest ¬ lr.rest

};

list: LIST OF NormalizedStrokes ¬ NARROW[db];

ns: NormalizedStrokes ¬ NARROW[x];

IF list=NIL THEN RETURN [NIL];

IF list.first=ns THEN RETURN [list.rest];

FOR l: LIST OF NormalizedStrokes ¬ list, l.rest WHILE l#NIL DO

IF l.rest#NIL AND l.rest.first = ns THEN {

RemoveNext[l, ns];

RETURN [list];

}

ENDLOOP;

RETURN [list]

};

Input output

File format

Ascii because I can edit it (good only while no good data base software available)

Also shorter then 4 byte integers

Use original points so internal format can be changed

dbStartKey: INT ~ -1208567;

dbFinishKey: INT ~ -999;

nsStartKey: INT ~ -9;

strokeEndKey: INT ~ -8;

PutInt: PROC [stream: IO.STREAM, i: INT] = {

IO.Put1[stream, IO.int[i]];

IO.PutChar[stream, ' ];

};

GetInt: PROC [stream: IO.STREAM] RETURNS [INT] = {

RETURN [IO.GetInt[stream]];

};

PutSingleStroke: PROC [stream: IO.STREAM, stroke: Stroke, dx, dy, f: INT] = {

f ¬ MAX[f, 1];

FOR l: Stroke ¬ stroke, l.rest WHILE l#NIL DO

PutInt[stream, MAX[l.first.x/f-dx+1, 0]];

PutInt[stream, MAX[l.first.y/f-dy+1, 0]];

ENDLOOP;

PutInt[stream, strokeEndKey];

};

GetSingleStroke: PROC [stream: IO.STREAM] RETURNS [stroke: Stroke ¬ NIL] = {

x, y: INT; lag, list: Stroke;

x ¬ GetInt[stream];

IF x=strokeEndKey THEN EXIT;

y ¬ GetInt[stream];

list ¬ LIST[[x, y]];

IF lag=NIL THEN stroke ¬ list ELSE lag.rest ¬ list;

lag ¬ list;

ENDLOOP;

};

PutNormalizedStrokes: PROC [stream: IO.STREAM, ns: NormalizedStrokes] = {

strokes: Strokes ¬ ns.strokes;

PutInt[stream, ns.key];

PutInt[stream, 0];

PutInt[stream, ns.strokeCnt];

FOR i: INT IN [0..ns.strokeCnt) DO

PutSingleStroke[stream, strokes.first, ns.dx, ns.dy, ns.f];

strokes ¬ strokes.rest

ENDLOOP

};

GetNormalizedStrokes: PROC [stream: IO.STREAM] RETURNS [ns: NormalizedStrokes] = {

strokes, lag: Strokes ¬ NIL;

key: INT ¬ GetInt[stream];

unused: INT ¬ GetInt[stream];

strokeCnt: INT ¬ GetInt[stream];

FOR i: INT IN [0..strokeCnt) DO

stroke: Stroke ¬ GetSingleStroke[stream];

IF stroke#NIL THEN {

list: Strokes ¬ LIST[stroke];

IF lag=NIL THEN strokes ¬ list ELSE lag.rest ¬ list;

lag ¬ list;

};

ENDLOOP;

ns ¬ NormalizeStrokes[strokes];

ns.key ¬ key

};

PutDB: PROC [stream: IO.STREAM, list: LIST OF NormalizedStrokes] = {

PutInt[stream, dbStartKey];

FOR l: LIST OF NormalizedStrokes ¬ list, l.rest WHILE l#NIL DO

ns: NormalizedStrokes ~ l.first;

IF ns#NIL AND ns.strokeCnt>0 THEN {

PutInt[stream, nsStartKey];

PutNormalizedStrokes[stream, l.first];

};

ENDLOOP;

PutInt[stream, dbFinishKey];

};

GetDB: PROC [stream: IO.STREAM] RETURNS [list: LIST OF NormalizedStrokes ¬ NIL] = {

lag: LIST OF NormalizedStrokes ¬ NIL;

i: INT ¬ GetInt[stream];

IF i#dbStartKey THEN ERROR;

i ¬ GetInt[stream];

WHILE i=nsStartKey DO

ns: NormalizedStrokes ¬ GetNormalizedStrokes[stream];

IF ns#NIL THEN list ¬ CONS[ns, list];

i ¬ GetInt[stream];

ENDLOOP;

};

ReadDB: PUBLIC PROC [name: Rope.ROPE] RETURNS [TrainingDB] = {

stream: IO.STREAM ¬ PFS.StreamOpen[PFS.PathFromRope[name]];

list: LIST OF NormalizedStrokes ¬ GetDB[stream];

IO.Close[stream];

RETURN [list]

};

WriteDB: PUBLIC PROC [name: Rope.ROPE, db: TrainingDB] = {

list: LIST OF NormalizedStrokes ~ NARROW[db];

stream: IO.STREAM ¬ PFS.StreamOpen[PFS.PathFromRope[name], create];

PutDB[stream, list];

IO.Close[stream];

};

MergeDB: PUBLIC PROC [into: TrainingDB ¬ NIL, db: TrainingDB] RETURNS [TrainingDB] = {

Design rationale for copying db: prevents circular db's

inDB: LIST OF NormalizedStrokes ¬ NARROW[into];

list: LIST OF NormalizedStrokes ~ NARROW[db];

FOR l: LIST OF NormalizedStrokes ¬ list, l.rest WHILE l#NIL DO

ns: NormalizedStrokes ~ CopyNormalizedStrokes[l.first];

inDB ¬ AddToDB[inDB, ns];

ENDLOOP;

RETURN [inDB];

};

END.