DIRECTORY IO USING [Backup, GetChar, EndOfStream, RIS, STREAM], Process USING [CheckForAbort], RefTab USING [Create, EachPairAction, Fetch, Pairs, Ref, Store], Rope USING [ActionType, Length, Map, ROPE], ACFind; ACFindImpl: CEDAR PROGRAM IMPORTS IO, Process, RefTab, Rope EXPORTS ACFind ~ BEGIN OPEN ACFind; ROPE: TYPE ~ Rope.ROPE; STREAM: TYPE ~ IO.STREAM; Create: PUBLIC PROC [keys: LIST OF ROPE, caseSensitive: BOOL _ FALSE] RETURNS [self: Ref] ~ { ENABLE ABORTED => GOTO Done; goto: RefTab.Ref _ RefTab.Create[]; SymbolEntry: TYPE ~ RECORD [ state: State, c: CHAR ]; Goto: PROC [state: State, c: CHAR] RETURNS [next: State] ~ { val: REF LIST OF SymbolEntry _ NARROW[goto.Fetch[state].val]; IF val = NIL THEN RETURN [NIL]; FOR list: LIST OF SymbolEntry _ val^, list.rest UNTIL list = NIL DO IF c = list.first.c THEN RETURN [list.first.state]; ENDLOOP; RETURN [NIL]; }; MakeGoto: PROC [state: State, c: CHAR, next: State] ~ { Append: PROC [l: REF LIST OF SymbolEntry, item: SymbolEntry] ~ { tail: LIST OF SymbolEntry; IF l = NIL THEN { l _ NEW[LIST OF SymbolEntry]; [] _ RefTab.Store[goto, state, l]; }; IF l^ = NIL THEN { l^ _ LIST[item]; RETURN }; FOR tail _ l^, tail.rest UNTIL tail.rest = NIL DO ENDLOOP; tail.rest _ LIST[item]; }; Append[NARROW[goto.Fetch[state].val], [next, c]]; }; BuildGoto: PROC [keys: LIST OF ROPE] ~ { Enter: PROC [key: ROPE] ~ { ris: STREAM _ IO.RIS[key]; this: State _ self.initialState; next: State; { ENABLE IO.EndOfStream => GOTO Done; c: CHAR; UNTIL (next _ Goto[this, c _ GetChar[self, ris]]) = NIL DO this _ next; ENDLOOP; Backup[ris, c]; WHILE TRUE DO MakeGoto[this, GetChar[self, ris], (next _ NEW[StateRep])]; next.move _ NEW[MoveRep[LAST[CHAR]-FIRST[CHAR]+1]]; this _ next; ENDLOOP; EXITS Done => { IF this = NIL THEN this _ NEW[StateRep]; this.keys _ CONS[key, this.keys]; }; }; }; FOR list: LIST OF ROPE _ keys, list.rest UNTIL list = NIL DO Enter[list.first]; ENDLOOP; FOR c: CHAR IN CHAR DO IF Goto[self.initialState, c] = NIL THEN MakeGoto[self.initialState, c, self.initialState]; ENDLOOP; }; Traverse: PROC [] ~ { q: LIST OF State _ NIL; Append: PROC [q: LIST OF State, new: State] RETURNS [val: LIST OF State] ~ { temp: LIST OF State _ NIL; val _ LIST[new]; IF q = NIL THEN RETURN [val]; val _ CONS[q.first, val]; temp _ val; UNTIL (q _ q.rest) = NIL DO temp.rest _ CONS[q.first, temp.rest]; temp _ temp.rest; ENDLOOP; RETURN[val]; }; MakeMove: PROC [state: State, input: CHAR, next: State] ~ INLINE { state.move[input-state.offset] _ next; }; NilFailures: RefTab.EachPairAction ~ { this: State _ NARROW[key]; this.failure _ NIL; RETURN [FALSE]; }; CondenseMoves: RefTab.EachPairAction ~ { this: State _ NARROW[key]; newOffset: CHAR _ this.offset; lastChar: CHAR; newMove: REF MoveRep; IF this.failure # NIL THEN RETURN [FALSE]; this.failure _ Move[this, LAST[CHAR]]; WHILE Move[this, newOffset] = this.failure DO newOffset _ newOffset + 1; ENDLOOP; lastChar _ LAST[CHAR]; WHILE Move[this, lastChar] = this.failure DO lastChar _ lastChar - 1; ENDLOOP; newMove _ NEW[MoveRep[lastChar-newOffset+1]]; FOR c: CHAR IN [newOffset..lastChar] DO newMove[c-newOffset] _ Move[this, c]; ENDLOOP; this.offset _ newOffset; this.move _ newMove; RETURN [FALSE]; }; activeState: State; FOR c: CHAR IN CHAR DO MakeMove[self.initialState, c, Goto[self.initialState, c]]; IF (activeState _ Goto[self.initialState, c]) # self.initialState THEN { q _ Append[q, activeState]; activeState.failure _ self.initialState; }; ENDLOOP; WHILE q # NIL DO this: State _ q.first; next: State; q _ q.rest; FOR c: CHAR IN CHAR DO IF (next _ Goto[this, c]) # NIL THEN { temp: State _ this.failure; WHILE Goto[temp, c] = NIL DO temp _ temp.failure; ENDLOOP; q _ Append[q, next]; next.failure _ Goto[temp, c]; FOR list: LIST OF ROPE _ next.failure.keys, list.rest UNTIL list = NIL DO next.keys _ CONS[list.first, next.keys] ENDLOOP; MakeMove[this, c, next]; } ELSE MakeMove[this, c, Move[this.failure, c]]; ENDLOOP; ENDLOOP; [] _ RefTab.Pairs[goto, NilFailures]; [] _ RefTab.Pairs[goto, CondenseMoves]; }; self _ NEW[Rep]; self.initialState _ NEW[StateRep]; IF keys = NIL THEN { self.initialState.failure _ self.initialState; self.initialState.offset _ '\377; self.initialState.move _ NEW[MoveRep[0]]; RETURN[self]; }; self.initialState.move _ NEW[MoveRep[LAST[CHAR]-FIRST[CHAR]+1]]; self.caseSensitive _ caseSensitive; BuildGoto[keys]; Traverse[]; EXITS Done => NULL; }; Move: PROC [state: State, input: CHAR] RETURNS [State] ~ INLINE { IF NOT (input IN [state.offset..state.offset + state.move.l)) THEN RETURN [state.failure]; RETURN[state.move[input - state.offset]]; }; Find: PUBLIC PROC [self: Ref, text: ROPE, action: ActionProc] RETURNS [foundAny: BOOLEAN _ FALSE] ~ { pos: INT _ 0; MoveAction: Rope.ActionType ~ { ENABLE ABORTED => GOTO Aborted; Process.CheckForAbort[]; IF NOT self.caseSensitive THEN c _ toUpper[c]; state _ Move[state, c]; pos _ pos + 1; IF state.keys # NIL THEN { foundAny _ TRUE; FOR list: LIST OF ROPE _ state.keys, list.rest UNTIL list = NIL DO quit _ action[pos, list.first] OR quit; ENDLOOP; }; EXITS Aborted => RETURN [TRUE]; }; state: State _ self.initialState; [] _ Rope.Map[text, 0, text.Length, MoveAction]; }; lastChar: CHAR; toUpper: ARRAY CHAR OF CHAR; GetChar: PROC [self: Ref, stream: STREAM] RETURNS [CHAR] ~ { IF self.caseSensitive THEN RETURN [lastChar _ IO.GetChar[stream]]; RETURN[toUpper[lastChar _ IO.GetChar[stream]]]; }; Backup: PROC [s: STREAM, c: CHAR] ~ { IO.Backup[s, lastChar]; }; FOR c: CHAR IN CHAR DO toUpper[c] _ IF c IN ['a..'z] THEN c - ('a - 'A) ELSE c; ENDLOOP; END. €ACFindImpl.mesa Copyright c 1985 by Xerox Corporation. All rights reserved. Dave Rumph, June 18, 1986 9:19:34 am PDT Implements the Aho-Corasick pattern matching algorithm. Building FSA's Build the FSA, creating a new instance if required. Builds the Goto table of the non-deterministic FSA, to be traversed to build the Move table for the deterministic FSA. Return a do-nothing Finder Searching Find the next state, given the current state and the input character Step through the text, calling the ActionProc for each match. Utilities Return the next character, converting to upper case if not self.caseSensitive Initialization Κ ˜codešœ™Kšœ Οmœ1™