1   /*
2    *  ConstraintGroup.java - transducer class
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 24/07/98
12   *
13   *  $Id: ConstraintGroup.java,v 1.11 2005/01/11 13:51:36 ian Exp $
14   */
15  
16  
17  package gate.jape;
18  
19  import java.util.ArrayList;
20  import java.util.Iterator;
21  
22  import gate.AnnotationSet;
23  import gate.Document;
24  import gate.annotation.AnnotationSetImpl;
25  import gate.util.Strings;
26  
27  
28  /**
29    * A sequence of conjunctions of PatternElement that form a
30    * disjunction.
31    */
32  public class ConstraintGroup
33  extends PatternElement implements JapeConstants, java.io.Serializable
34  {
35    /** Debug flag */
36    private static final boolean DEBUG = false;
37  
38    /** Anonymous constructor. */
39    public ConstraintGroup() {
40      patternElementDisjunction1 = new ArrayList();
41      currentConjunction = new ArrayList();
42      patternElementDisjunction1.add(currentConjunction);
43    } // Anonymous constructor
44  
45    /** Need cloning for processing of macro references. See comments on
46      * <CODE>PatternElement.clone()</CODE>
47      */
48    public Object clone() {
49      ConstraintGroup newPE = (ConstraintGroup) super.clone();
50  
51      // created by createDisjunction
52      newPE.currentConjunction = null;
53  
54      newPE.patternElementDisjunction1 = new ArrayList();
55      // for each (conjunction) member of the pattern element discjunction
56      for(
57        Iterator disjunction = patternElementDisjunction1.iterator();
58        disjunction.hasNext();
59  
60      ) {
61  
62        newPE.createDisjunction();
63        // for each pattern element making up this conjunction
64        for(
65          Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
66          conjunction.hasNext();
67  
68        ) {
69          PatternElement pat = (PatternElement) conjunction.next();
70  
71          newPE.addPatternElement((PatternElement) pat.clone());
72        } // for each element of the conjunction
73      } // for each conjunction (element of the disjunction)
74  
75      return newPE;
76    } // clone
77  
78    /** An array of arrays that represent PatternElement conjunctions
79      * during parsing of the .jape. Each conjunction is
80      * considered as being disjunct with the next. (I.e. they are
81      * or'd, in the same way as expressions around "||" in C and
82      * Java.) Set during parsing; replaced by finish().
83      */
84    private ArrayList patternElementDisjunction1;
85  
86    /** The pattern element disjunction for transduction - Java arrays. */
87    private PatternElement[][] patternElementDisjunction2;
88  
89    /** An array of PatternElements making up a conjunction. It is a member of
90      * patternElementDisjunction. This is the one we're adding to
91      * at present. Used during parsing, not matching.
92      */
93    private ArrayList currentConjunction;
94  
95    /** Make a new disjunction at this point. */
96    public void createDisjunction() {
97      currentConjunction = new ArrayList();
98      patternElementDisjunction1.add(currentConjunction);
99    } // createDisjunction
100 
101   /** Add an element to the current conjunction. */
102   public void addPatternElement(PatternElement pe) {
103     currentConjunction.add(pe);
104   } // addPatternElement
105 
106   /** Get an list of CPEs that we contain. */
107   protected Iterator getCPEs() {
108     ArrayList cpes = new ArrayList();
109 
110     // for each (conjunction) member of the pattern element discjunction
111     for(
112       Iterator disjunction = patternElementDisjunction1.iterator();
113       disjunction.hasNext();
114     ) {
115       // for each pattern element making up this conjunction
116       for(
117         Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
118         conjunction.hasNext();
119       ) {
120         PatternElement pat = (PatternElement) conjunction.next();
121 
122         Iterator i = null;
123         if(pat instanceof ComplexPatternElement) {
124           cpes.add(pat);
125           i = ((ComplexPatternElement) pat).getCPEs();
126         }
127         else if(pat instanceof ConstraintGroup)
128           i = ((ConstraintGroup) pat).getCPEs();
129 
130         if(i != null)
131           for( ; i.hasNext(); )
132             cpes.add(i.next());
133       } // for each element of the conjunction
134     } // for each conjunction (element of the disjunction)
135 
136     return cpes.iterator();
137   } // getCPEs
138 
139   /** Finish: replace dynamic data structures with Java arrays; called
140     * after parsing.
141     */
142   public void finish() {
143 
144     // index into patternElementDisjunction2
145     int i = 0;
146 
147     // index into the conjunctions (second dimension of pED2)
148     int j = 0;
149 
150     patternElementDisjunction2 =
151       new PatternElement[patternElementDisjunction1.size()][];
152 
153     // for each (conjunction) member of the pattern element discjunction
154     for(
155       Iterator disjuncIter = patternElementDisjunction1.iterator();
156       disjuncIter.hasNext();
157       i++
158     ) {
159       ArrayList conjunction = (ArrayList) disjuncIter.next();
160       patternElementDisjunction2[i] = new PatternElement[conjunction.size()];
161       j = 0;
162 
163       // for each pattern element making up this conjunction
164       for(
165         Iterator conjIter = conjunction.iterator();
166         conjIter.hasNext();
167         j++
168       ) {
169         patternElementDisjunction2[i][j] = (PatternElement) conjIter.next();
170         patternElementDisjunction2[i][j].finish();
171       } // loop on conjunction
172 
173     } // loop on patternElementDisjunction1
174 
175     patternElementDisjunction1 = null;
176   } // finish
177 
178   /** Access to the annotations that have been matched by this group. */
179   public AnnotationSet getMatchedAnnots() {
180     AnnotationSet matchedAnnots = new AnnotationSetImpl((Document) null);
181     int pEDLen = patternElementDisjunction2.length;
182 
183     // for each (conjunction) member of the pattern element disjunction
184     for(int i = 0; i < pEDLen; i++) {
185       int conjLen = patternElementDisjunction2[i].length;
186 
187       // for each pattern element making up this conjunction
188       for(int j = 0; j < conjLen; j++) {
189         PatternElement pat = patternElementDisjunction2[i][j];
190         AnnotationSet patMatchedAnnots = pat.getMatchedAnnots();
191         if(patMatchedAnnots != null)
192           matchedAnnots.addAll(pat.getMatchedAnnots());
193       } // for each element of the conjunction
194 
195     } // for each conjunction (element of the disjunction)
196 
197     return matchedAnnots;
198   } // getMatchedAnnots
199 
200 
201   /** Clear all the annotations that have been matched by this group. */
202   public void reset() {
203     // Debug.pr(this, "CG reset, matchHistory.size() = " + matchHistory.size());
204     int pEDLen = patternElementDisjunction2.length;
205 
206     // for each (conjunction) member of the pattern element disjunction
207     for(int i = 0; i < pEDLen; i++) {
208       int conjLen = patternElementDisjunction2[i].length;
209 
210       // for each pattern element making up this conjunction
211       for(int j = 0; j < conjLen; j++)
212         patternElementDisjunction2[i][j].reset();
213     }
214 
215     super.reset(); // should be redundant: there for if PE.reset changes
216   } // reset
217 
218   /** Multilevel rollback of annot caches etc. */
219   public void rollback(int arity) {
220     // Debug.pr(this, "CG rollback(" + arity + "), matchHistory.size() = " +
221     //                   matchHistory.size());
222     for(int i=0; i<arity; i++) {
223       PatternElement[] conjunction = (PatternElement[]) matchHistory.pop();
224       int conjLen = conjunction.length;
225       for(int j = 0; j < conjLen; j++)
226         conjunction[j].rollback(1);
227     }
228   } // rollback
229 
230 
231   /** Does this element match the document at this position? */
232   public boolean matches(
233     Document doc, int position, MutableInteger newPosition
234   ) {
235     // if a whole conjunction matches, we set newPosition to the max of
236     // rightmost advance of all the composite elements that matched, and
237     // position.
238     int rightmostAdvance = position;
239 
240     // when we fail the whole disjunction, we set newPosition to the max of
241     // leftmost failure point, and position
242     int leftmostFailurePoint = Integer.MAX_VALUE;
243 
244     // outerLoop:
245     // for each conjunction
246     //   for each element in the conjunction
247     //     if it fails continue outerLoop;
248     //   return true;
249     // return false;
250 
251     // for each member of the disjunctions array
252     int savedPosition = position;
253     int pEDLen = patternElementDisjunction2.length;
254     outerLoop:
255     for(int i = 0; i < pEDLen; i++) {
256       int conjLen = patternElementDisjunction2[i].length;
257       position = savedPosition;
258       rightmostAdvance = position;
259 
260       // for each pattern element making up this conjunction
261       for(int j = 0; j < conjLen; j++) {
262         PatternElement pat = patternElementDisjunction2[i][j];
263 
264         if(! pat.matches(doc, position, newPosition)) {
265           // reset the last failure point to the furthest we got so far
266           leftmostFailurePoint =
267             Math.min(leftmostFailurePoint, newPosition.value);
268 
269           // rollback matches done in the previous elements of this conjunction
270           for(int k = j - 1; k >= 0; k--)
271             patternElementDisjunction2[i][k].rollback(1);
272 
273           // try the next conjunction
274           continue outerLoop;
275         }
276 
277         // reset our advance point to the furthest so far
278         position = rightmostAdvance =
279           Math.max(rightmostAdvance, newPosition.value);
280 
281       } // for each element of the conjunction
282 
283       // a whole conjunction matched: record advance and which conj succeeded
284       newPosition.value = rightmostAdvance;
285       matchHistory.push(patternElementDisjunction2[i]);
286       //Debug.pr(this, "CG matches: pushing");
287       return true;
288 
289     } // for each conjunction (element of the disjunction)
290 
291     // we reached the end of the disjunction without matching a
292     // whole conjunction
293     if(leftmostFailurePoint == Integer.MAX_VALUE)
294       leftmostFailurePoint = position + 1;
295     newPosition.value = Math.max(position + 1, leftmostFailurePoint);
296     return false; // annot caches have been rolled back already in inner loop
297   } // matches
298 
299 
300   /** Create a string representation of the object. */
301   public String toString() { return toString(""); }
302 
303   /** Create a string representation of the object. */
304   public String toString(String pad) {
305     String newline = Strings.getNl();
306 
307     StringBuffer buf =
308       new StringBuffer(pad + "CG: disjunction(" + newline);
309     String newPad = Strings.addPadding(pad, INDENT_PADDING);
310 
311     boolean firstTime = true;
312 
313     if(patternElementDisjunction1 != null) { // before finish()
314       // for each (conjunction) member of the pattern element discjunction
315       for(
316         Iterator disjunction = patternElementDisjunction1.iterator();
317         disjunction.hasNext();
318       ) {
319         if(firstTime) firstTime = false;
320         else buf.append(newline + pad + "|" + newline);
321 
322         // for each pattern element making up this conjunction
323         for(
324           Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
325           conjunction.hasNext();
326         ) {
327           buf.append(
328             ((PatternElement) conjunction.next()).toString(newPad) + newline
329           );
330         } // for each element of the conjunction
331       } // for each conjunction (element of the disjunction)
332 
333     } else { // after finish
334       int pEDLen = patternElementDisjunction2.length;
335       if(firstTime) firstTime = false;
336       else buf.append(newline + pad + "|" + newline);
337 
338       for(int i = 0; i < pEDLen; i++) {
339         int conjLen = patternElementDisjunction2[i].length;
340         // for each pattern element making up this conjunction
341         for(int j = 0; j < conjLen; j++)
342           buf.append(
343             patternElementDisjunction2[i][j].toString(newPad) + newline
344           );
345       }
346     }
347 
348     buf.append(pad + ") CG." + newline);
349 
350     return buf.toString();
351   } // toString
352 
353 
354   //needed by FSM
355   public PatternElement[][] getPatternElementDisjunction(){
356     return patternElementDisjunction2;
357   }
358 
359 } // class ConstraintGroup
360 
361 
362 // $Log: ConstraintGroup.java,v $
363 // Revision 1.11  2005/01/11 13:51:36  ian
364 // Updating copyrights to 1998-2005 in preparation for v3.0
365 //
366 // Revision 1.10  2004/07/21 17:10:07  akshay
367 // Changed copyright from 1998-2001 to 1998-2004
368 //
369 // Revision 1.9  2004/03/25 13:01:15  valyt
370 // Imports optimisation throughout the Java sources
371 // (to get rid of annoying warnings in Eclipse)
372 //
373 // Revision 1.8  2001/09/13 12:09:50  kalina
374 // Removed completely the use of jgl.objectspace.Array and such.
375 // Instead all sources now use the new Collections, typically ArrayList.
376 // I ran the tests and I ran some documents and compared with keys.
377 // JAPE seems to work well (that's where it all was). If there are problems
378 // maybe look at those new structures first.
379 //
380 // Revision 1.7  2001/09/12 11:59:33  kalina
381 // Changed the old JAPE stuff to use the new Collections API,
382 // instead of com.objectspace stuff. Will eliminate that library
383 // completely very soon! Just one class left to re-implement,
384 //
385 // ParseCPSL.jj changed accordingly. All tested and no smoke.
386 //
387 // Revision 1.6  2000/11/08 16:35:02  hamish
388 // formatting
389 //
390 // Revision 1.5  2000/10/26 10:45:30  oana
391 // Modified in the code style
392 //
393 // Revision 1.4  2000/10/16 16:44:33  oana
394 // Changed the comment of DEBUG variable
395 //
396 // Revision 1.3  2000/10/10 15:36:35  oana
397 // Changed System.out in Out and System.err in Err;
398 // Added the DEBUG variable seted on false;
399 // Added in the header the licence;
400 //
401 // Revision 1.2  2000/04/14 18:02:46  valyt
402 // Added some gate.fsm classes
403 // added some accessor function in old jape classes
404 //
405 // Revision 1.1  2000/02/23 13:46:06  hamish
406 // added
407 //
408 // Revision 1.1.1.1  1999/02/03 16:23:01  hamish
409 // added gate2
410 //
411 // Revision 1.17  1998/11/24 16:18:29  hamish
412 // fixed toString for calls after finish
413 //
414 // Revision 1.16  1998/11/01 21:21:36  hamish
415 // use Java arrays in transduction where possible
416 //
417 // Revision 1.15  1998/11/01 14:55:54  hamish
418 // fixed lFP setting in matches
419 //
420 // Revision 1.14  1998/10/30 14:06:45  hamish
421 // added getTransducer
422 //
423 // Revision 1.13  1998/10/29 12:07:49  hamish
424 // toString change
425 //
426 // Revision 1.12  1998/10/06 16:16:10  hamish
427 // negation percolation during constrain add; position advance when none at end
428 //
429 // Revision 1.11  1998/10/01 16:06:30  hamish
430 // new appelt transduction style, replacing buggy version
431 //
432 // Revision 1.10  1998/09/26 09:19:16  hamish
433 // added cloning of PE macros
434 //
435 // Revision 1.9  1998/09/17 16:48:31  hamish
436 // added macro defs and macro refs on LHS
437 //
438 // Revision 1.8  1998/08/12 19:05:43  hamish
439 // fixed multi-part CG bug; set reset to real reset and fixed multi-doc bug
440 //
441 // Revision 1.7  1998/08/12 15:39:35  hamish
442 // added padding toString methods
443 //
444 // Revision 1.6  1998/08/05 21:58:06  hamish
445 // backend works on simple test
446 //
447 // Revision 1.5  1998/08/03 19:51:20  hamish
448 // rollback added
449 //
450 // Revision 1.4  1998/07/31 13:12:16  hamish
451 // done RHS stuff, not tested
452 //
453 // Revision 1.3  1998/07/30 11:05:16  hamish
454 // more jape
455 //
456 // Revision 1.2  1998/07/29 11:06:56  hamish
457 // first compiling version
458 //
459 // Revision 1.1.1.1  1998/07/28 16:37:46  hamish
460 // gate2 lives
461