<<>> <> <> <> <<>> DIRECTORY Char, CharOps, Rope, TextFind, TextFindPrivate; TextFindImpl: CEDAR PROGRAM IMPORTS Char, CharOps, Rope EXPORTS TextFind ~ BEGIN OPEN TextFind; XCHAR: TYPE ~ Char.XCHAR; ROPE: TYPE ~ Rope.ROPE; Error: PUBLIC ERROR [index: INT, reason: ROPE] ~ CODE; refFalse: REF BOOL ~ NEW[BOOL ¬ FALSE]; <> LiteralSearch: PUBLIC PROC [direction: Direction, targetHash: PROC [INT] RETURNS [BYTE], targetStart: INT, targetLen: INT, objectHash: PROC [INT] RETURNS [BYTE], objectStart: INT, objectLen: INT, match: PROC [objectStart, targetStart, len: INT] RETURNS [BOOL], interrupt: REF BOOL ¬ NIL] RETURNS [found: BOOL ¬ FALSE, matchStart, matchEnd: INT ¬ 0] ~ { IF interrupt=NIL THEN interrupt ¬ refFalse; IF targetLen>=0 AND objectLen>=0 AND targetLen<=objectLen THEN { objectStop: INT ~ objectStart+objectLen-targetLen; matchStart ¬ IF direction=backward THEN objectStop ELSE objectStart; IF targetLen=0 THEN GOTO Found ELSE { -- 0 < targetLen <= objectLen skip: PACKED ARRAY BYTE OF BYTE ¬ ALL[MIN[targetLen, BYTE.LAST]]; delta: INT ~ targetLen-1; SELECT direction FROM forward => { h1: BYTE ~ targetHash[targetStart+delta]; FOR k: BYTE DECREASING IN[1..MIN[delta, BYTE.LAST]] DO skip[targetHash[targetStart+delta-k]] ¬ k; ENDLOOP; UNTIL matchStart>objectStop OR interrupt­ DO h2: BYTE ~ objectHash[matchStart+delta]; IF h1=h2 AND match[matchStart, targetStart, targetLen] THEN GOTO Found; matchStart ¬ matchStart+skip[h2]; ENDLOOP; }; backward => { h1: BYTE ~ targetHash[targetStart]; FOR k: BYTE DECREASING IN[1..MIN[delta, BYTE.LAST]] DO skip[targetHash[targetStart+k]] ¬ k; ENDLOOP; UNTIL matchStart { matchEnd ¬ matchStart+targetLen; found ¬ TRUE }; }; IF NOT found THEN RETURN[FALSE]; -- force default return values }; NewCharMap: PROC [proc: PROC [CHAR] RETURNS [CHAR]] RETURNS [CharMap] ~ { map: CharMap ~ NEW[CharMapRep]; FOR c: CHAR IN CHAR DO map[c] ¬ proc[c] ENDLOOP; RETURN[map]; }; MapIdentity: PROC [c: CHAR] RETURNS [CHAR] ~ { RETURN[c] }; MapLower: PROC [c: CHAR] RETURNS [CHAR] ~ { RETURN[CharOps.Lower[c]] }; charMapFromCase: ARRAY --case:--BOOL OF CharMap ~ [ FALSE: NewCharMap[MapLower], TRUE: NewCharMap[MapIdentity]]; CharMapFromCase: PUBLIC PROC [case: BOOL] RETURNS [CharMap] ~ { RETURN[charMapFromCase[case]]; }; <<>> <> <<>> Name: TYPE ~ TextFindPrivate.Name; NameRep: TYPE ~ TextFindPrivate.NameRep; Target: TYPE ~ TextFindPrivate.Target; TargetRep: PUBLIC TYPE ~ TextFindPrivate.TargetRep; Item: TYPE ~ TextFindPrivate.Item; ItemRep: TYPE ~ TextFindPrivate.ItemRep; <> <> <> < named subpattern>> xHash: XCHAR ~ Char.Widen['#]; xAt: XCHAR ~ Char.Widen['@]; xPercent: XCHAR ~ Char.Widen['%]; xAsterisk: XCHAR ~ Char.Widen['*]; xAmpersand: XCHAR ~ Char.Widen['&]; xDollar: XCHAR ~ Char.Widen['$]; xTilde: XCHAR ~ Char.Widen['~]; xSingleQuote: XCHAR ~ Char.Widen['']; xLeftBrace: XCHAR ~ Char.Widen['{]; xRightBrace: XCHAR ~ Char.Widen['}]; xLeftAngle: XCHAR ~ Char.Widen['<]; xRightAngle: XCHAR ~ Char.Widen['>]; xBar: XCHAR ~ Char.Widen['|]; xColon: XCHAR ~ Char.Widen[':]; leftSelItem: Item ~ NEW[ItemRep.leftSel ¬ [leftSel[]]]; rightSelItem: Item ~ NEW[ItemRep.rightSel ¬ [rightSel[]]]; leftBoundaryItem: Item ~ NEW[ItemRep.leftBoundary ¬ [leftBoundary[]]]; rightBoundaryItem: Item ~ NEW[ItemRep.rightBoundary ¬ [rightBoundary[]]]; MakeName: PROC [fetch: FetchProc, start, len: INT] RETURNS [name: Name] ~ { name ¬ NEW[NameRep[len]]; FOR i: NAT IN[0..name.length) DO name[i] ¬ fetch[start+i] ENDLOOP; }; CreateTarget: PUBLIC PROC [size: INT, start: INT ¬ 0, len: INT ¬ LAST[INT], fetch: FetchProc, substr: SubstrProc, pattern: BOOL ¬ FALSE] RETURNS [target: Target] ~ { i0: INT ~ MIN[MAX[0, start], size]; i1: INT ~ i0+MIN[MAX[0, len], size-i0]; list: LIST OF Item ¬ NIL; -- pattern items, in reverse order count: NAT ¬ 0; -- number of pattern items Cons: PROC [item: Item] ~ { list ¬ CONS[item, list]; count ¬ count+1 }; IF pattern THEN { state: {null, quote, tilde, tildeQuote, name} ¬ null; -- state of the lexical scan braceState: {none, left, right} ¬ none; -- any braces seen? pending: {none, seq, string} ¬ none; -- any pending item? seqType: WildType ¬ any; seqIndex: INT ¬ 0; -- for pending seq stringStart, stringLen: INT ¬ 0; -- for pending string Flush: PROC ~ { SELECT pending FROM none => NULL; seq => Cons[NEW[ItemRep.min ¬ [min[seqType, seqIndex-i0]]]]; string => Cons[NEW[ItemRep.string ¬ [string[stringStart-i0, stringLen]]]]; ENDCASE => ERROR; pending ¬ none; }; Seq: PROC [type: WildType, index: INT] ~ { IF pending=seq AND seqType=type THEN { Cons[NEW[ItemRep.max ¬ [max[seqType, seqIndex-i0]]]]; pending ¬ none } ELSE { Flush[]; seqType ¬ type; seqIndex ¬ index; pending ¬ seq }; }; Put: PROC [item: Item] ~ { Flush[]; Cons[item] }; One: PROC [type: WildType, i: INT] ~ { Put[NEW[ItemRep.one ¬ [one[type, i-i0]]]] }; Chr: PROC [eq: BOOL, i: INT] ~ { Put[NEW[ItemRep.char ¬ [char[eq, i-i0]]]] }; nameStart, nameLen: INT ¬ 0; name: Name ¬ NIL; -- current subpattern name inNamedPattern: BOOL ¬ FALSE; -- inside a subpattern? StartName: PROC [] ~ { name ¬ MakeName[fetch, nameStart, nameLen]; Put[NEW[ItemRep.startName ¬ [startName[name]]]]; inNamedPattern ¬ TRUE; }; EndName: PROC [implicit: BOOL] ~ { IF implicit THEN Put[NEW[ItemRep.min ¬ [min[any, nameStart]]]]; Put[NEW[ItemRep.endName ¬ [endName[name]]]]; inNamedPattern ¬ FALSE; }; FOR i: INT IN[i0..i1) DO char: XCHAR ~ fetch[i]; SELECT state FROM null => SELECT char FROM xTilde => state ¬ tilde; xSingleQuote => state ¬ quote; xHash => One[any, i]; xAt => One[alpha, i]; xPercent => One[blank, i]; xAsterisk => Seq[any, i]; xAmpersand => Seq[alpha, i]; xDollar => Seq[blank, i]; xLeftBrace => SELECT braceState FROM none => { Put[leftSelItem]; braceState ¬ left }; ENDCASE => ERROR Error[i, "misplaced { in pattern"]; xRightBrace => SELECT braceState FROM none, left => { Put[rightSelItem]; braceState ¬ right }; ENDCASE => ERROR Error[i, "misplaced } in pattern"]; xLeftAngle => IF inNamedPattern THEN ERROR Error[i, "misplaced < in pattern"] ELSE { nameStart ¬ i+1; nameLen ¬ 0; state ¬ name }; xRightAngle => IF inNamedPattern THEN EndName[implicit: FALSE] ELSE ERROR Error[i, "misplaced > in pattern"]; xBar => SELECT i FROM i0 => Put[leftBoundaryItem]; (i1-1) => Put[rightBoundaryItem]; ENDCASE => ERROR Error[i, "misplaced | in pattern"]; ENDCASE => SELECT pending FROM string => stringLen ¬ stringLen+1; ENDCASE => { Flush[]; stringStart ¬ i; stringLen ¬ 1; pending ¬ string }; tilde => SELECT char FROM xSingleQuote => state ¬ tildeQuote; xAt => { One[nonalpha, i]; state ¬ null }; xPercent => { One[nonblank, i]; state ¬ null }; xAmpersand => { Seq[nonalpha, i]; state ¬ null }; xDollar => { Seq[nonblank, i]; state ¬ null }; ENDCASE => { Chr[FALSE, i]; state ¬ null }; quote => { Chr[TRUE, i]; state ¬ null }; tildeQuote => { Chr[FALSE, i]; state ¬ null }; name => SELECT char FROM xColon => { StartName[]; state ¬ null }; xRightAngle => { StartName[]; EndName[implicit: TRUE]; state ¬ null }; ENDCASE => nameLen ¬ nameLen+1; ENDCASE => ERROR; ENDLOOP; SELECT state FROM null => Flush[]; -- ok tilde => ERROR Error[i1, "trailing ~ in pattern"]; quote => ERROR Error[i1, "trailing ' in pattern"]; tildeQuote => ERROR Error[i1, "trailing ~' in pattern"]; name => ERROR Error[i1, "pattern ends inside subpattern name"]; ENDCASE => ERROR; IF inNamedPattern THEN ERROR Error[i1, "pattern ends inside subpattern"]; } ELSE Cons[NEW[ItemRep.string ¬ [string[0, i1-i0]]]]; -- literal target ¬ NEW[TargetRep[count] ¬ [text: substr[i0, i1-i0], pattern: ]]; FOR i: NAT DECREASING IN [0..count) DO target[i] ¬ list.first; list ¬ list.rest ENDLOOP; }; -- of CreateTarget <<>> <> Subs: TYPE ~ TextFindPrivate.Subs; SubsRep: PUBLIC TYPE ~ TextFindPrivate.SubsRep; Sub: TYPE ~ TextFindPrivate.Sub; SubRep: TYPE ~ TextFindPrivate.SubRep; <<>> Search: PUBLIC PROC [direction: Direction, target: Target, size: INT, start: INT ¬ 0, len: INT ¬ INT.LAST, substr: SubstrProc, matchType: MatchTypeProc, matchString: MatchStringProc, matchProps: MatchStringProc ¬ NIL, matchBound: MatchBoundProc ¬ NIL, interrupt: REF BOOL ¬ NIL] RETURNS [found: BOOL ¬ FALSE, matchStart, matchEnd, selStart, selEnd: INT ¬ 0, subs: Subs ¬ NIL] ~ { list: LIST OF Sub ¬ NIL; count: NAT ¬ 0; Cons: PROC [name: Name, start, end: INT] ~ { sub: Sub ~ NEW[SubRep ¬ [name: name, start: start-matchStart, len: end-start]]; list ¬ CONS[sub, list]; count ¬ count+1; }; i0: INT ~ MIN[MAX[start, 0], size]; i1: INT ~ i0+MIN[MAX[len, 0], size-i0]; MatchType: PROC [i: INT, type: WildType] RETURNS [BOOL] ~ INLINE { RETURN[type=any OR matchType[i, type]]; }; MatchString: PROC [i, start, len: INT] RETURNS [BOOL] ~ INLINE { RETURN[matchString[i, target.text, start, len]]; }; MatchProps: PROC [i, start, len: INT] RETURNS [BOOL] ~ INLINE { RETURN[matchProps=NIL OR matchProps[i, target.text, start, len]]; }; MatchBound: PROC [i: INT, bound: Bound] RETURNS [BOOL] ~ INLINE { RETURN[matchBound=NIL OR matchBound[i, bound]]; }; nameStart, nameEnd: INT ¬ 0; IF interrupt=NIL THEN interrupt ¬ refFalse; SELECT direction FROM forward => { MatchForward: PROC [p: NAT, i: INT] RETURNS [ok: BOOL ¬ FALSE] ~ { IF p RETURN[(i1-i)>=item.len AND MatchString[i, item.start, item.len] AND MatchProps[i, item.start, item.len] AND MatchForward[p+1, i+item.len]]; item: REF ItemRep.char => RETURN[i RETURN[i FOR k: INT IN[i..i1] DO IF MatchForward[p+1, k] THEN RETURN[TRUE]; IF (k FOR m: INT IN[i..i1] DO IF (m IF (ok ¬ MatchForward[p+1, i]) THEN Cons[name: item.name, start: (nameStart ¬ i), end: nameEnd]; item: REF ItemRep.endName => IF (ok ¬ MatchForward[p+1, i]) THEN nameEnd ¬ i; item: REF ItemRep.leftSel => IF (ok ¬ MatchForward[p+1, i]) THEN selStart ¬ i; item: REF ItemRep.rightSel => IF (ok ¬ MatchForward[p+1, i]) THEN selEnd ¬ i; item: REF ItemRep.leftBoundary => RETURN[i=0 AND MatchForward[p+1, i]]; item: REF ItemRep.rightBoundary => RETURN[i=size AND MatchForward[p+1, i]]; ENDCASE => ERROR ELSE IF MatchBound[i, end] THEN { matchEnd ¬ selEnd ¬ i; RETURN[TRUE] }; }; FOR i: INT IN[i0..i1] UNTIL found OR interrupt­ DO IF MatchBound[i, start] THEN found ¬ MatchForward[0, matchStart ¬ selStart ¬ i]; ENDLOOP; }; backward => { MatchBackward: PROC [p: NAT, i: INT] RETURNS [ok: BOOL ¬ FALSE] ~ { IF p>0 THEN WITH target[p-1] SELECT FROM item: REF ItemRep.string => RETURN[(i-i0)>=item.len AND MatchString[i-item.len, item.start, item.len] AND MatchProps[i-item.len, item.start, item.len] AND MatchBackward[p-1, i-item.len]]; item: REF ItemRep.char => RETURN[i>i0 AND MatchString[i-1, item.index, 1]=item.eq AND MatchProps[i-1, item.index, 1] AND MatchBackward[p-1, i-1]]; item: REF ItemRep.one => RETURN[i>i0 AND MatchType[i-1, item.type] AND MatchProps[i-1, item.index, 1] AND MatchBackward[p-1, i-1]]; item: REF ItemRep.min => FOR k: INT DECREASING IN[i0..i] DO IF MatchBackward[p-1, k] THEN RETURN[TRUE]; IF (k>i0 AND MatchType[k-1, item.type] AND MatchProps[k-1, item.index, 1] ) THEN LOOP; RETURN[FALSE]; ENDLOOP; item: REF ItemRep.max => FOR m: INT DECREASING IN[i0..i] DO IF (m>i0 AND MatchType[m-1, item.type] AND MatchProps[m-1, item.index, 1] ) THEN LOOP; FOR k: INT IN[m..i] DO IF MatchBackward[p-1, k] THEN RETURN[TRUE]; ENDLOOP; RETURN[FALSE]; ENDLOOP; item: REF ItemRep.startName => IF (ok ¬ MatchBackward[p-1, i]) THEN nameStart ¬ i; item: REF ItemRep.endName => IF (ok ¬ MatchBackward[p-1, i]) THEN Cons[name: item.name, start: nameStart, end: (nameEnd ¬ i)]; item: REF ItemRep.leftSel => IF (ok ¬ MatchBackward[p-1, i]) THEN selStart ¬ i; item: REF ItemRep.rightSel => IF (ok ¬ MatchBackward[p-1, i]) THEN selEnd ¬ i; item: REF ItemRep.leftBoundary => RETURN[i=0 AND MatchBackward[p-1, i]]; item: REF ItemRep.rightBoundary => RETURN[i=size AND MatchBackward[p-1, i]]; ENDCASE => ERROR ELSE IF MatchBound[i, start] THEN { matchStart ¬ selStart ¬ i; RETURN[TRUE] }; }; FOR i: INT DECREASING IN[i0..i1] UNTIL found OR interrupt­ DO IF MatchBound[i, end] THEN found ¬ MatchBackward[target.size, matchEnd ¬ selEnd ¬ i]; ENDLOOP; }; ENDCASE => ERROR; IF count>0 THEN { subs ¬ NEW[SubsRep[count] ¬ [text: substr[matchStart, matchEnd-matchStart], subs: ]]; FOR i: NAT IN [0..count) DO subs[i] ¬ list.first; list ¬ list.rest; ENDLOOP }; }; <> NameEq: PROC [name: Name, fetch: FetchProc, start, len: INT] RETURNS [BOOL] ~ { IF name.length#len THEN RETURN[FALSE]; FOR i: NAT IN[0..name.length) DO IF name[i]#fetch[start+i] THEN RETURN[FALSE]; ENDLOOP; RETURN[TRUE]; }; FindSub: PROC [subs: Subs, fetch: FetchProc, start, len: INT] RETURNS [Sub] ~ { IF subs#NIL THEN FOR k: NAT IN[0..subs.size) DO sub: Sub ~ subs[k]; IF NameEq[sub.name, fetch, start, len] THEN RETURN[sub]; ENDLOOP; RETURN[NIL]; }; Replace: PUBLIC PROC [replace: ReplaceProc, substitute: SubstituteProc, size: INT, start: INT ¬ 0, len: INT ¬ INT.LAST, fetch: FetchProc, pattern: BOOL ¬ FALSE, subs: Subs ¬ NIL] ~ { i0: INT ~ MIN[MAX[start, 0], size]; i1: INT ~ i0+MIN[MAX[len, 0], size-i0]; IF pattern THEN { state: {null, quote, name} ¬ null; s0: INT ¬ 0; -- start of literal string or name pending: BOOL ¬ FALSE; -- literal string pending, starting at s0 Flush: PROC [s1: INT] ~ { IF pending THEN { replace[s0, s1-s0]; pending ¬ FALSE } }; FOR i: INT IN[i0..i1) DO char: XCHAR ~ fetch[i]; SELECT state FROM null => SELECT char FROM xSingleQuote => { Flush[i]; state ¬ quote }; xLeftAngle => { Flush[i]; s0 ¬ i+1; state ¬ name }; ENDCASE => IF pending THEN NULL ELSE { s0 ¬ i; pending ¬ TRUE }; quote => { replace[i, 1]; state ¬ null }; name => SELECT char FROM xRightAngle => { sub: Sub ~ FindSub[subs, fetch, s0, i-s0]; IF sub#NIL THEN substitute[subs.text, sub.start, sub.len, s0, i-s0]; state ¬ null; }; ENDCASE => NULL; ENDCASE => ERROR; ENDLOOP; Flush[i1]; } ELSE IF i1>i0 THEN replace[i0, i1-i0]; }; <> RopeLiteralSearch: PUBLIC PROC [direction: Direction, target: ROPE, targetStart: INT ¬ 0, targetLen: INT ¬ INT.LAST, object: ROPE, objectStart: INT ¬ 0, objectLen: INT ¬ INT.LAST, case: BOOL ¬ TRUE] RETURNS [found: BOOL, matchStart, matchEnd: INT] ~ { targetSize: INT ~ Rope.Size[target]; objectSize: INT ~ Rope.Size[object]; map: CharMap ~ CharMapFromCase[case]; targetHash: PROC [i: INT] RETURNS [BYTE] ~ { RETURN[ORD[map[Rope.Fetch[target, i]]]] }; objectHash: PROC [i: INT] RETURNS [BYTE] ~ { RETURN[ORD[map[Rope.Fetch[object, i]]]] }; match: PROC [objectStart, targetStart, len: INT] RETURNS [BOOL] ~ { RETURN[Rope.Run[s1: target, pos1: targetStart, s2: object, pos2: objectStart, case: case, len: targetLen]=targetLen]; }; targetStart ¬ MIN[MAX[0, targetStart], targetSize]; targetLen ¬ MIN[MAX[0, targetLen], targetSize-targetStart]; objectStart ¬ MIN[MAX[0, objectStart], objectSize]; objectLen ¬ MIN[MAX[0, objectLen], objectSize-objectStart]; RETURN LiteralSearch[direction: direction, targetHash: targetHash, targetStart: targetStart, targetLen: targetLen, objectHash: objectHash, objectStart: objectStart, objectLen: objectLen, match: match]; }; TargetFromRope: PUBLIC PROC [rope: ROPE, start: INT ¬ 0, len: INT ¬ LAST[INT], pattern: BOOL ¬ FALSE] RETURNS [Target] ~ { fetch: FetchProc ~ { RETURN[Char.Widen[Rope.Fetch[rope, index]]] }; substr: SubstrProc ~ { RETURN[Rope.Substr[rope, start, len]] }; RETURN CreateTarget[size: Rope.Size[rope], start: start, len: len, fetch: fetch, substr: substr, pattern: pattern]; }; RopeSearch: PUBLIC PROC [direction: Direction, target: Target, rope: ROPE, start: INT ¬ 0, len: INT ¬ INT.LAST, case: BOOL ¬ TRUE, word, def, all: BOOL ¬ FALSE, interrupt: REF BOOL ¬ NIL] RETURNS [found: BOOL, matchStart, matchEnd, selStart, selEnd: INT, subs: Subs] ~ { size: INT ~ Rope.Size[rope]; matchType: MatchTypeProc ~ { char: CHAR ~ Rope.Fetch[rope, index]; RETURN[SELECT type FROM any => TRUE, alpha => CharOps.AlphaNumeric[char], nonalpha => NOT CharOps.AlphaNumeric[char], blank => CharOps.Blank[char], nonblank => NOT CharOps.Blank[char], ENDCASE => ERROR]; }; matchString: MatchStringProc ~ { WITH text SELECT FROM text: ROPE => RETURN[Rope.Run[s1: rope, pos1: index, s2: text, pos2: start, case: case, len: len]=len]; ENDCASE => RETURN[FALSE]; }; matchBound: MatchBoundProc ~ { RETURN[NOT (SELECT bound FROM start => (all AND index#0) OR ((word OR def) AND index>0 AND matchType[index-1, alpha]), end => (all AND index#size) OR (word AND index