File: SpellingWordMapImpl.mesa
Last Edited by: Nix, October 24, 1983 10:47 am
DIRECTORY
SpellingWordMap,
Rope USING [ROPE, Size],
RopeReader USING [Create, SetPosition, Get, Ref, Backwards];
SpellingWordMapImpl: CEDAR MONITOR
EXPORTS
SpellingWordMap =
BEGIN
ROPE: TYPE = Rope.ROPE;
reader: RopeReader.Ref ← RopeReader.Create[]; -- To make word parsing efficient.
MapWordsInRope:
PUBLIC ENTRY PROC [words: ROPE, buffer: REF TEXT, f: PROC [REF TEXT] RETURNS [BOOL]]
RETURNS [premature: BOOLEAN ← FALSE, wordStart, wordEnd: INT ← 0, newBuffer: REF TEXT] = {
Maps the function f over each of the words in the ROPE words, stopping either when f returns TRUE or when all of the words are exhausted. A word is defined to be the longest non-empty sequence of ['A..'Z]+['a..'z], with embedded apostrophes allowed: "abcd" is a word, "ab'cd" is a word, but "abcd'" is not a word. The accumulation and testing of words is carried out in the buffer, which is grown to accomodate long words, and which is returned (possibly longer) when the mapping is complete. On return, premature is TRUE iff the function f returned TRUE on some word. If premature is true, then wordStart and wordEnd are set to the starting index and stopping index of the word for which f returned true. If premature is false, then these values denote nothing of interest.
HandleExpansion:
PROC [] =
INLINE {
IF wp >= buffer.maxLength
THEN {
newBuffer ← NEW[TEXT[2*buffer.maxLength + 1]];
newBuffer.length ← newBuffer.maxLength;
FOR j:
NAT
IN [0..wp)
DO
newBuffer[j] ← buffer[j];
ENDLOOP;
buffer ← newBuffer;
};
};
ParseState: TYPE = {InWord, InJunk, Apostrophe};
state: ParseState ← InJunk;
wp: NAT ← 0;
size: INT ← words.Size[];
c: CHAR;
newBuffer ← buffer;
buffer.length ← buffer.maxLength;
RopeReader.SetPosition[reader, words, 0];
FOR pos:
INT
IN [0..size)
DO
c ← reader.Get[];
IF c
IN ['a..'z]
OR c
IN ['A..'Z]
THEN {
IF state = InJunk
THEN {
wordStart ← pos;
wp ← 0;
}
ELSE
IF state = Apostrophe
THEN {
HandleExpansion[];
buffer[wp] ← '\';
wp ← wp + 1;
};
state ← InWord;
HandleExpansion[];
buffer[wp] ← c;
wp ← wp + 1;
}
ELSE {
IF state = InWord
THEN {
IF c = '\'
THEN {
state ← Apostrophe;
}
ELSE {
buffer.length ← wp;
wordEnd ← pos;
premature ← f[buffer];
IF premature THEN RETURN;
state ← InJunk;
};
}
ELSE
IF state = Apostrophe
THEN {
buffer.length ← wp;
wordEnd ← pos-1;
premature ← f[buffer];
IF premature THEN RETURN;
state ← InJunk;
};
};
ENDLOOP;
IF state = InWord
THEN {
buffer.length ← wp;
wordEnd ← size;
premature ← f[buffer];
}
ELSE
IF state = Apostrophe
THEN {
buffer.length ← wp;
wordEnd ← size-1;
premature ← f[buffer];
};
};
MapWordsInRopeBackward:
PUBLIC ENTRY PROC [words: ROPE, buffer: REF TEXT, f: PROC [REF TEXT] RETURNS [BOOL]]
RETURNS [premature: BOOLEAN ← FALSE, wordStart, wordEnd: INT ← 0, newBuffer: REF TEXT] = {
Maps the function f over each of the words in the ROPE words, stopping either when f returns TRUE or when all of the words are exhausted. A word is defined to be the longest non-empty sequence of ['A..'Z]+['a..'z], with embedded apostrophes allowed: "abcd" is a word, "ab'cd" is a word, but "abcd'" is not a word. The accumulation and testing of words is carried out in the buffer, which is grown to accomodate long words, and which is returned (possibly longer) when the mapping is complete. On return, premature is TRUE iff the function f returned TRUE on some word. If premature is true, then wordStart and wordEnd are set to the starting index and stopping index of the word for which f returned true. If premature is false, then these values denote nothing of interest.
HandleExpansion:
PROC [] =
INLINE {
IF wp >= buffer.maxLength
THEN {
newBuffer ← NEW[TEXT[2*buffer.maxLength + 1]];
newBuffer.length ← newBuffer.maxLength;
FOR j:
NAT
IN [0..wp)
DO
newBuffer[j] ← buffer[j];
ENDLOOP;
buffer ← newBuffer;
};
};
ReverseWord:
PROC [] =
INLINE {
j: NAT ← buffer.length;
c: CHAR;
FOR i:
NAT
IN [0..j/2)
DO
j ← j - 1;
c ← buffer[i];
buffer[i] ← buffer[j];
buffer[j] ← c;
ENDLOOP;
};
ParseState: TYPE = {InWord, InJunk, Apostrophe};
state: ParseState ← InJunk;
wp: NAT ← 0;
size: INT ← words.Size[];
c: CHAR;
newBuffer ← buffer;
buffer.length ← buffer.maxLength;
RopeReader.SetPosition[reader, words, words.Size[]];
FOR pos:
INT DECREASING
IN [0..size)
DO
c ← reader.Backwards[];
IF c
IN ['a..'z]
OR c
IN ['A..'Z]
THEN {
IF state = InJunk
THEN {
wordEnd ← pos+1;
wp ← 0;
}
ELSE
IF state = Apostrophe
THEN {
HandleExpansion[];
buffer[wp] ← '\';
wp ← wp + 1;
};
state ← InWord;
HandleExpansion[];
buffer[wp] ← c;
wp ← wp + 1;
}
ELSE {
IF state = InWord
THEN {
IF c = '\'
THEN {
state ← Apostrophe;
}
ELSE {
buffer.length ← wp;
wordStart ← pos+1;
ReverseWord[];
premature ← f[buffer];
IF premature THEN RETURN;
state ← InJunk;
};
}
ELSE
IF state = Apostrophe
THEN {
buffer.length ← wp;
wordStart ← pos+2;
ReverseWord[];
premature ← f[buffer];
IF premature THEN RETURN;
state ← InJunk;
};
};
ENDLOOP;
IF state = InWord
THEN {
buffer.length ← wp;
wordStart ← 0;
ReverseWord[];
premature ← f[buffer];
}
ELSE
IF state = Apostrophe
THEN {
buffer.length ← wp;
wordStart ← 1;
ReverseWord[];
premature ← f[buffer];
};
};
END.
CHANGE LOG
Created by Nix on October 7, 1983 2:22 pm