ConstraintGroup.java |
1 /* 2 * ConstraintGroup.java - transducer class 3 * 4 * Copyright (c) 1998-2005, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 24/07/98 12 * 13 * $Id: ConstraintGroup.java,v 1.11 2005/01/11 13:51:36 ian Exp $ 14 */ 15 16 17 package gate.jape; 18 19 import java.util.ArrayList; 20 import java.util.Iterator; 21 22 import gate.AnnotationSet; 23 import gate.Document; 24 import gate.annotation.AnnotationSetImpl; 25 import gate.util.Strings; 26 27 28 /** 29 * A sequence of conjunctions of PatternElement that form a 30 * disjunction. 31 */ 32 public class ConstraintGroup 33 extends PatternElement implements JapeConstants, java.io.Serializable 34 { 35 /** Debug flag */ 36 private static final boolean DEBUG = false; 37 38 /** Anonymous constructor. */ 39 public ConstraintGroup() { 40 patternElementDisjunction1 = new ArrayList(); 41 currentConjunction = new ArrayList(); 42 patternElementDisjunction1.add(currentConjunction); 43 } // Anonymous constructor 44 45 /** Need cloning for processing of macro references. See comments on 46 * <CODE>PatternElement.clone()</CODE> 47 */ 48 public Object clone() { 49 ConstraintGroup newPE = (ConstraintGroup) super.clone(); 50 51 // created by createDisjunction 52 newPE.currentConjunction = null; 53 54 newPE.patternElementDisjunction1 = new ArrayList(); 55 // for each (conjunction) member of the pattern element discjunction 56 for( 57 Iterator disjunction = patternElementDisjunction1.iterator(); 58 disjunction.hasNext(); 59 60 ) { 61 62 newPE.createDisjunction(); 63 // for each pattern element making up this conjunction 64 for( 65 Iterator conjunction = ((ArrayList) disjunction.next()).iterator(); 66 conjunction.hasNext(); 67 68 ) { 69 PatternElement pat = (PatternElement) conjunction.next(); 70 71 newPE.addPatternElement((PatternElement) pat.clone()); 72 } // for each element of the conjunction 73 } // for each conjunction (element of the disjunction) 74 75 return newPE; 76 } // clone 77 78 /** An array of arrays that represent PatternElement conjunctions 79 * during parsing of the .jape. Each conjunction is 80 * considered as being disjunct with the next. (I.e. they are 81 * or'd, in the same way as expressions around "||" in C and 82 * Java.) Set during parsing; replaced by finish(). 83 */ 84 private ArrayList patternElementDisjunction1; 85 86 /** The pattern element disjunction for transduction - Java arrays. */ 87 private PatternElement[][] patternElementDisjunction2; 88 89 /** An array of PatternElements making up a conjunction. It is a member of 90 * patternElementDisjunction. This is the one we're adding to 91 * at present. Used during parsing, not matching. 92 */ 93 private ArrayList currentConjunction; 94 95 /** Make a new disjunction at this point. */ 96 public void createDisjunction() { 97 currentConjunction = new ArrayList(); 98 patternElementDisjunction1.add(currentConjunction); 99 } // createDisjunction 100 101 /** Add an element to the current conjunction. */ 102 public void addPatternElement(PatternElement pe) { 103 currentConjunction.add(pe); 104 } // addPatternElement 105 106 /** Get an list of CPEs that we contain. */ 107 protected Iterator getCPEs() { 108 ArrayList cpes = new ArrayList(); 109 110 // for each (conjunction) member of the pattern element discjunction 111 for( 112 Iterator disjunction = patternElementDisjunction1.iterator(); 113 disjunction.hasNext(); 114 ) { 115 // for each pattern element making up this conjunction 116 for( 117 Iterator conjunction = ((ArrayList) disjunction.next()).iterator(); 118 conjunction.hasNext(); 119 ) { 120 PatternElement pat = (PatternElement) conjunction.next(); 121 122 Iterator i = null; 123 if(pat instanceof ComplexPatternElement) { 124 cpes.add(pat); 125 i = ((ComplexPatternElement) pat).getCPEs(); 126 } 127 else if(pat instanceof ConstraintGroup) 128 i = ((ConstraintGroup) pat).getCPEs(); 129 130 if(i != null) 131 for( ; i.hasNext(); ) 132 cpes.add(i.next()); 133 } // for each element of the conjunction 134 } // for each conjunction (element of the disjunction) 135 136 return cpes.iterator(); 137 } // getCPEs 138 139 /** Finish: replace dynamic data structures with Java arrays; called 140 * after parsing. 141 */ 142 public void finish() { 143 144 // index into patternElementDisjunction2 145 int i = 0; 146 147 // index into the conjunctions (second dimension of pED2) 148 int j = 0; 149 150 patternElementDisjunction2 = 151 new PatternElement[patternElementDisjunction1.size()][]; 152 153 // for each (conjunction) member of the pattern element discjunction 154 for( 155 Iterator disjuncIter = patternElementDisjunction1.iterator(); 156 disjuncIter.hasNext(); 157 i++ 158 ) { 159 ArrayList conjunction = (ArrayList) disjuncIter.next(); 160 patternElementDisjunction2[i] = new PatternElement[conjunction.size()]; 161 j = 0; 162 163 // for each pattern element making up this conjunction 164 for( 165 Iterator conjIter = conjunction.iterator(); 166 conjIter.hasNext(); 167 j++ 168 ) { 169 patternElementDisjunction2[i][j] = (PatternElement) conjIter.next(); 170 patternElementDisjunction2[i][j].finish(); 171 } // loop on conjunction 172 173 } // loop on patternElementDisjunction1 174 175 patternElementDisjunction1 = null; 176 } // finish 177 178 /** Access to the annotations that have been matched by this group. */ 179 public AnnotationSet getMatchedAnnots() { 180 AnnotationSet matchedAnnots = new AnnotationSetImpl((Document) null); 181 int pEDLen = patternElementDisjunction2.length; 182 183 // for each (conjunction) member of the pattern element disjunction 184 for(int i = 0; i < pEDLen; i++) { 185 int conjLen = patternElementDisjunction2[i].length; 186 187 // for each pattern element making up this conjunction 188 for(int j = 0; j < conjLen; j++) { 189 PatternElement pat = patternElementDisjunction2[i][j]; 190 AnnotationSet patMatchedAnnots = pat.getMatchedAnnots(); 191 if(patMatchedAnnots != null) 192 matchedAnnots.addAll(pat.getMatchedAnnots()); 193 } // for each element of the conjunction 194 195 } // for each conjunction (element of the disjunction) 196 197 return matchedAnnots; 198 } // getMatchedAnnots 199 200 201 /** Clear all the annotations that have been matched by this group. */ 202 public void reset() { 203 // Debug.pr(this, "CG reset, matchHistory.size() = " + matchHistory.size()); 204 int pEDLen = patternElementDisjunction2.length; 205 206 // for each (conjunction) member of the pattern element disjunction 207 for(int i = 0; i < pEDLen; i++) { 208 int conjLen = patternElementDisjunction2[i].length; 209 210 // for each pattern element making up this conjunction 211 for(int j = 0; j < conjLen; j++) 212 patternElementDisjunction2[i][j].reset(); 213 } 214 215 super.reset(); // should be redundant: there for if PE.reset changes 216 } // reset 217 218 /** Multilevel rollback of annot caches etc. */ 219 public void rollback(int arity) { 220 // Debug.pr(this, "CG rollback(" + arity + "), matchHistory.size() = " + 221 // matchHistory.size()); 222 for(int i=0; i<arity; i++) { 223 PatternElement[] conjunction = (PatternElement[]) matchHistory.pop(); 224 int conjLen = conjunction.length; 225 for(int j = 0; j < conjLen; j++) 226 conjunction[j].rollback(1); 227 } 228 } // rollback 229 230 231 /** Does this element match the document at this position? */ 232 public boolean matches( 233 Document doc, int position, MutableInteger newPosition 234 ) { 235 // if a whole conjunction matches, we set newPosition to the max of 236 // rightmost advance of all the composite elements that matched, and 237 // position. 238 int rightmostAdvance = position; 239 240 // when we fail the whole disjunction, we set newPosition to the max of 241 // leftmost failure point, and position 242 int leftmostFailurePoint = Integer.MAX_VALUE; 243 244 // outerLoop: 245 // for each conjunction 246 // for each element in the conjunction 247 // if it fails continue outerLoop; 248 // return true; 249 // return false; 250 251 // for each member of the disjunctions array 252 int savedPosition = position; 253 int pEDLen = patternElementDisjunction2.length; 254 outerLoop: 255 for(int i = 0; i < pEDLen; i++) { 256 int conjLen = patternElementDisjunction2[i].length; 257 position = savedPosition; 258 rightmostAdvance = position; 259 260 // for each pattern element making up this conjunction 261 for(int j = 0; j < conjLen; j++) { 262 PatternElement pat = patternElementDisjunction2[i][j]; 263 264 if(! pat.matches(doc, position, newPosition)) { 265 // reset the last failure point to the furthest we got so far 266 leftmostFailurePoint = 267 Math.min(leftmostFailurePoint, newPosition.value); 268 269 // rollback matches done in the previous elements of this conjunction 270 for(int k = j - 1; k >= 0; k--) 271 patternElementDisjunction2[i][k].rollback(1); 272 273 // try the next conjunction 274 continue outerLoop; 275 } 276 277 // reset our advance point to the furthest so far 278 position = rightmostAdvance = 279 Math.max(rightmostAdvance, newPosition.value); 280 281 } // for each element of the conjunction 282 283 // a whole conjunction matched: record advance and which conj succeeded 284 newPosition.value = rightmostAdvance; 285 matchHistory.push(patternElementDisjunction2[i]); 286 //Debug.pr(this, "CG matches: pushing"); 287 return true; 288 289 } // for each conjunction (element of the disjunction) 290 291 // we reached the end of the disjunction without matching a 292 // whole conjunction 293 if(leftmostFailurePoint == Integer.MAX_VALUE) 294 leftmostFailurePoint = position + 1; 295 newPosition.value = Math.max(position + 1, leftmostFailurePoint); 296 return false; // annot caches have been rolled back already in inner loop 297 } // matches 298 299 300 /** Create a string representation of the object. */ 301 public String toString() { return toString(""); } 302 303 /** Create a string representation of the object. */ 304 public String toString(String pad) { 305 String newline = Strings.getNl(); 306 307 StringBuffer buf = 308 new StringBuffer(pad + "CG: disjunction(" + newline); 309 String newPad = Strings.addPadding(pad, INDENT_PADDING); 310 311 boolean firstTime = true; 312 313 if(patternElementDisjunction1 != null) { // before finish() 314 // for each (conjunction) member of the pattern element discjunction 315 for( 316 Iterator disjunction = patternElementDisjunction1.iterator(); 317 disjunction.hasNext(); 318 ) { 319 if(firstTime) firstTime = false; 320 else buf.append(newline + pad + "|" + newline); 321 322 // for each pattern element making up this conjunction 323 for( 324 Iterator conjunction = ((ArrayList) disjunction.next()).iterator(); 325 conjunction.hasNext(); 326 ) { 327 buf.append( 328 ((PatternElement) conjunction.next()).toString(newPad) + newline 329 ); 330 } // for each element of the conjunction 331 } // for each conjunction (element of the disjunction) 332 333 } else { // after finish 334 int pEDLen = patternElementDisjunction2.length; 335 if(firstTime) firstTime = false; 336 else buf.append(newline + pad + "|" + newline); 337 338 for(int i = 0; i < pEDLen; i++) { 339 int conjLen = patternElementDisjunction2[i].length; 340 // for each pattern element making up this conjunction 341 for(int j = 0; j < conjLen; j++) 342 buf.append( 343 patternElementDisjunction2[i][j].toString(newPad) + newline 344 ); 345 } 346 } 347 348 buf.append(pad + ") CG." + newline); 349 350 return buf.toString(); 351 } // toString 352 353 354 //needed by FSM 355 public PatternElement[][] getPatternElementDisjunction(){ 356 return patternElementDisjunction2; 357 } 358 359 } // class ConstraintGroup 360 361 362 // $Log: ConstraintGroup.java,v $ 363 // Revision 1.11 2005/01/11 13:51:36 ian 364 // Updating copyrights to 1998-2005 in preparation for v3.0 365 // 366 // Revision 1.10 2004/07/21 17:10:07 akshay 367 // Changed copyright from 1998-2001 to 1998-2004 368 // 369 // Revision 1.9 2004/03/25 13:01:15 valyt 370 // Imports optimisation throughout the Java sources 371 // (to get rid of annoying warnings in Eclipse) 372 // 373 // Revision 1.8 2001/09/13 12:09:50 kalina 374 // Removed completely the use of jgl.objectspace.Array and such. 375 // Instead all sources now use the new Collections, typically ArrayList. 376 // I ran the tests and I ran some documents and compared with keys. 377 // JAPE seems to work well (that's where it all was). If there are problems 378 // maybe look at those new structures first. 379 // 380 // Revision 1.7 2001/09/12 11:59:33 kalina 381 // Changed the old JAPE stuff to use the new Collections API, 382 // instead of com.objectspace stuff. Will eliminate that library 383 // completely very soon! Just one class left to re-implement, 384 // 385 // ParseCPSL.jj changed accordingly. All tested and no smoke. 386 // 387 // Revision 1.6 2000/11/08 16:35:02 hamish 388 // formatting 389 // 390 // Revision 1.5 2000/10/26 10:45:30 oana 391 // Modified in the code style 392 // 393 // Revision 1.4 2000/10/16 16:44:33 oana 394 // Changed the comment of DEBUG variable 395 // 396 // Revision 1.3 2000/10/10 15:36:35 oana 397 // Changed System.out in Out and System.err in Err; 398 // Added the DEBUG variable seted on false; 399 // Added in the header the licence; 400 // 401 // Revision 1.2 2000/04/14 18:02:46 valyt 402 // Added some gate.fsm classes 403 // added some accessor function in old jape classes 404 // 405 // Revision 1.1 2000/02/23 13:46:06 hamish 406 // added 407 // 408 // Revision 1.1.1.1 1999/02/03 16:23:01 hamish 409 // added gate2 410 // 411 // Revision 1.17 1998/11/24 16:18:29 hamish 412 // fixed toString for calls after finish 413 // 414 // Revision 1.16 1998/11/01 21:21:36 hamish 415 // use Java arrays in transduction where possible 416 // 417 // Revision 1.15 1998/11/01 14:55:54 hamish 418 // fixed lFP setting in matches 419 // 420 // Revision 1.14 1998/10/30 14:06:45 hamish 421 // added getTransducer 422 // 423 // Revision 1.13 1998/10/29 12:07:49 hamish 424 // toString change 425 // 426 // Revision 1.12 1998/10/06 16:16:10 hamish 427 // negation percolation during constrain add; position advance when none at end 428 // 429 // Revision 1.11 1998/10/01 16:06:30 hamish 430 // new appelt transduction style, replacing buggy version 431 // 432 // Revision 1.10 1998/09/26 09:19:16 hamish 433 // added cloning of PE macros 434 // 435 // Revision 1.9 1998/09/17 16:48:31 hamish 436 // added macro defs and macro refs on LHS 437 // 438 // Revision 1.8 1998/08/12 19:05:43 hamish 439 // fixed multi-part CG bug; set reset to real reset and fixed multi-doc bug 440 // 441 // Revision 1.7 1998/08/12 15:39:35 hamish 442 // added padding toString methods 443 // 444 // Revision 1.6 1998/08/05 21:58:06 hamish 445 // backend works on simple test 446 // 447 // Revision 1.5 1998/08/03 19:51:20 hamish 448 // rollback added 449 // 450 // Revision 1.4 1998/07/31 13:12:16 hamish 451 // done RHS stuff, not tested 452 // 453 // Revision 1.3 1998/07/30 11:05:16 hamish 454 // more jape 455 // 456 // Revision 1.2 1998/07/29 11:06:56 hamish 457 // first compiling version 458 // 459 // Revision 1.1.1.1 1998/07/28 16:37:46 hamish 460 // gate2 lives 461