DIRECTORY IO, Rope USING [ Concat, Equal, Fetch, Length, ROPE, Substr ], RuntimeError USING [ BoundsFault ], Synthesizer, VoiceUtils USING [ Report ] ; SynthesizerImpl: CEDAR PROGRAM IMPORTS Rope, RuntimeError, VoiceUtils EXPORTS Synthesizer= { BreakText: PUBLIC PROC [text: Rope.ROPE, maxlen: INT] RETURNS [packet, remainder: Rope.ROPE] = { index: INT; insidePhonemes: BOOL; IF text.Length < maxlen THEN RETURN[packet: text, remainder: ""]; [index, insidePhonemes] _ FindTextBreak[text, maxlen]; IF index=0 THEN { index _ maxlen; VoiceUtils.Report[Rope.Concat["Warning: No good place to break following string.\n", text.Substr[len: index]], $Finch]; }; packet _ text.Substr[len: index]; remainder _ text.Substr[start: index]; IF insidePhonemes THEN { -- close the current phoneme command, break, and reopen it packet _ Rope.Concat[packet, "\033\\"]; remainder _ Rope.Concat["\033P;z", remainder]; }; }; FindTextBreak: PROC [text: Rope.ROPE, maxlen: INT] RETURNS [breakIndex: INT, indexInsidePhonemes: BOOL] = { cmdBoundary, primaryBreak, secondaryBreak: INT _ 0; insidePhonemes, primInsidePhonemes, secInsidePhonemes: BOOL _ FALSE; prevc: CHAR _ IO.SP; -- i-2 c: CHAR _ text.Fetch[0]; -- i-1 FOR i: INT IN [1..maxlen] DO nextc: CHAR _ ProtectFetch[text, i]; -- i SELECT c FROM IO.ESC => { SELECT nextc FROM 'P => { -- c is the first character of a command; may be a phoneme command cmdBoundary _ i-1; IF PhonemeCmd[text, i+1] THEN insidePhonemes _ TRUE; }; '\\ => { -- nextc is the end of a DECtalk command cmdBoundary _ i+1; insidePhonemes _ FALSE; }; ENDCASE; -- c is some other ESC (what is this silly user doing??) }; IO.SP, IO.TAB, IO.CR, IO.LF => { secondaryBreak _ i; secInsidePhonemes _ insidePhonemes; SELECT prevc FROM '. => IF NOT SpecialAbbrev[text, i-4] THEN {primaryBreak _ i; primInsidePhonemes _ insidePhonemes}; ', , ';, '!, '?, '), '}, '], '", '' => {primaryBreak _ i; primInsidePhonemes _ insidePhonemes}; ENDCASE => { SELECT nextc FROM '(, '{, '[, '", '', '` => {primaryBreak _ i; -- the last one is an open quote primInsidePhonemes _ insidePhonemes}; ENDCASE; }; }; ENDCASE; prevc _ c; c _ nextc; ENDLOOP; IF primaryBreak > 0 THEN RETURN [primaryBreak, primInsidePhonemes] -- could be inside a dict cmd, oh well ELSE IF cmdBoundary > 0 THEN RETURN [cmdBoundary, FALSE] ELSE IF secondaryBreak > 0 THEN RETURN [secondaryBreak, secInsidePhonemes] ELSE RETURN [0, insidePhonemes] -- this is a wierd string: no good place to break }; PhonemeCmd: PROC [text: Rope.ROPE, i: INT] RETURNS [BOOL _ FALSE] = { nextw: Rope.ROPE _ ProtectSubstr[text: text, start: i, len: 2]; IF Rope.Equal[s1: nextw, s2: ";z", case: TRUE] THEN RETURN[TRUE]; nextw _ ProtectSubstr[text: text, start: i, len: 4]; IF Rope.Equal[s1: nextw, s2: "0;0z", case: TRUE] THEN RETURN[TRUE]; }; SpecialAbbrev: PROC [text: Rope.ROPE, i: INT] RETURNS [BOOL _ FALSE] = { prevw: Rope.ROPE _ ProtectSubstr[text: text, start: i, len: 2]; prevc: CHAR _ ProtectFetch[text, i-1]; SELECT prevc FROM IO.SP, IO.TAB, IO.CR, IO.LF => NULL; ENDCASE => RETURN[FALSE]; IF Rope.Equal[s1: prevw, s2: "ft", case: FALSE] OR Rope.Equal[s1: prevw, s2: "st", case: FALSE] OR Rope.Equal[s1: prevw, s2: "dr", case: FALSE] THEN FOR j: INT _ i+1, j+1 DO scanc: CHAR _ ProtectFetch[text: text, index: j, default: 'a]; SELECT scanc FROM IN ['A..'Z] => RETURN[TRUE]; IN ['a..'z] => RETURN[FALSE]; ENDCASE; ENDLOOP; }; ProtectFetch: PROC [text: Rope.ROPE, index: INT, default: CHAR _ IO.SP] RETURNS [char: CHAR] ~ { ENABLE RuntimeError.BoundsFault => {char _ default; CONTINUE}; char _ text.Fetch[index]; }; ProtectSubstr: PROC [text: Rope.ROPE, start: INT, len: INT] RETURNS [substr: Rope.ROPE _ ""] ~ { ENABLE RuntimeError.BoundsFault => CONTINUE; substr _ text.Substr[start, len]; }; }. ΒSynthesizerImpl.mesa Copyright c 1986 by Xerox Corporation. All rights reserved. Polle Zellweger (PTZ) July 28, 1986 11:15:41 pm PDT This module is currently set up for the DECtalk. July 17, 1986 PTZ primary/secondaryBreak will be the first character of the next packet. Returns breakIndex = 0 if there is no good place to break the string. '[, 'c, IO.SP => c may be the first character of some other DECtalk command, but the user is not permitted to send these commands. Now choose an appropriate spot to break the text.... A DECtalk phoneme command looks like ESC P 0 ; 0 z phonemic text ESC \ The zeros are optional, but we only recognize the cases of both zeros there or both zeros missing. DECtalk: St. followed by a capital letter = Saint; not followed by a capital letter = Street. Dr. followed by a capital letter = Doctor; not followed by a capital letter = Drive. Prose adds: Ft. followed by a capital letter = Fort; not followed by a capital letter = feet. (The DECtalk distinguishes between these latter two on the basis of capitalization: Ft. = Fort, ft. = feet. But what about FT.? Seems like a bug!) If we run off the beginning of the string, there must have been a legal break there. Polle Zellweger (PTZ) July 17, 1986 6:02:46 pm PDT changes to: SynthesizerImpl, BreakText, FindTextBreak Polle Zellweger (PTZ) July 18, 1986 11:41:44 am PDT changes to: DIRECTORY, BreakText, FindTextBreak, PhonemeCmd, nextc (local of FindTextBreak), nextw (local of PhonemeCmd), prevw (local of SpecialAbbrev), SpecialAbbrev, scanc (local of SpecialAbbrev), SynthesizerImpl Polle Zellweger (PTZ) July 18, 1986 5:17:06 pm PDT changes to: BreakText, FindTextBreak, PhonemeCmd, prevw (local of SpecialAbbrev), prevc (local of SpecialAbbrev), SpecialAbbrev, nextw (local of PhonemeCmd) Polle Zellweger (PTZ) July 28, 1986 8:57:29 pm PDT changes to: nextc (local of FindTextBreak), nextw (local of PhonemeCmd), PhonemeCmd, prevw (local of SpecialAbbrev), prevc (local of SpecialAbbrev), scanc (local of SpecialAbbrev) Polle Zellweger (PTZ) July 28, 1986 9:30:23 pm PDT changes to: FindTextBreak, PhonemeCmd, SpecialAbbrev, ProtectFetch, ProtectSubstr Polle Zellweger (PTZ) July 28, 1986 10:34:46 pm PDT changes to: FindTextBreak Κά˜šœ™Icodešœ Οmœ1™šžœž˜Kšžœ žœžœ˜Kšžœ žœžœ˜Kšžœ˜—Kšžœ˜——K˜—K˜šŸ œžœ žœ žœ ž œžœžœžœ˜`Kšžœ.žœ˜>Kšœ˜K˜K˜—šŸ œžœ žœ žœžœžœžœ ˜`Kšžœž ˜,Kšœ!˜!K˜—K˜K˜™2Kšœ Οr)™5—™3Kš œ ‘6œ‘œ‘œ‘œ‘™Ψ—™2Kšœ ‘+œ‘œ‘œ™œ—K™™2Kš œ ‘œ‘œ‘œ‘œ‘œ™³—™2Kšœ ‘E™Q—™3Kšœ ‘ ™—K™—…—¬J