TestJape2.java |
1 /* 2 * TestJape2.java (Java Annotation Patterns Engine) 3 * 4 * Copyright (c) 1998-2005, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 23/02/2000 12 * 13 * $Id: TestJape2.java,v 1.13 2005/01/11 13:51:36 ian Exp $ 14 * 15 * Description: Test class for JAPE. 16 */ 17 18 package gate.jape; 19 20 import java.io.File; 21 import java.util.ArrayList; 22 import java.util.Iterator; 23 24 import gate.*; 25 import gate.annotation.AnnotationSetImpl; 26 import gate.creole.ResourceInstantiationException; 27 import gate.util.Err; 28 import gate.util.Out; 29 30 /** 31 * Second test harness for JAPE. 32 * Uses the Sheffield Tokeniser and Gazetteer, and must be run 33 * from the gate directory. 34 * @author Hamish Cunningham 35 */ 36 public class TestJape2 { 37 38 /** Debug flag */ 39 private static final boolean DEBUG = false; 40 41 /** How much noise to make. */ 42 static private boolean verbose = false; 43 44 45 /** Take a list of text files and a collection name, and 46 * call tokeniser/gazetteer/jape on them, creating the 47 * collection. 48 */ 49 static public void main(String[] args) { 50 51 // turn debug output on/off 52 //Debug.setDebug(true); 53 //Debug.setDebug(AnnotationSet.class, true); 54 //Debug.setDebug(BasicPatternElement.class, true); 55 //Debug.setDebug(ComplexPatternElement.class, true); 56 //Debug.setDebug(ConstraintGroup.class, true); 57 //Debug.setDebug(SinglePhaseTransducer.class, true); 58 59 // variables to parse the command line options into 60 String collName = null; 61 String japeName = null; 62 ArrayList fileNames = null; 63 64 // process options 65 for(int i=0; i<args.length; i++) { 66 if(args[i].equals("-c") && ++i < args.length) // -c = coll name 67 collName = args[i]; 68 else if(args[i].equals("-j") && ++i < args.length) // -j: .jape name 69 japeName = args[i]; 70 else if(args[i].equals("-v")) // -v = verbose 71 verbose = true; 72 else { // a list of files 73 fileNames = new ArrayList(); 74 do { 75 fileNames.add(args[i++]); 76 } while(i < args.length); 77 } 78 } // for each arg 79 80 // did they give valid options? 81 message("checking options"); 82 if(collName == null || japeName == null || fileNames == null) 83 usage("you must supply collection, transducer and file names"); 84 85 // create a collection and run the tokeniser 86 message("creating coll, tokenising and gazetteering"); 87 Corpus coll = null; 88 try { 89 coll = tokAndGaz(collName, fileNames); 90 } catch(ResourceInstantiationException e) { 91 usage("couldn't open collection: " + e); 92 } 93 /* 94 // run the parser test 95 message("parsing the .jape file (or deserialising the .ser file)"); 96 Batch batch = null; 97 try { batch = new Batch(japeName); 98 } catch(JapeException e) { 99 usage("can't create transducer " + e.getMessage()); 100 } 101 */ 102 /*Transducer transducer = parseJape(japeName); 103 //Out.println(transducer); 104 if(transducer == null) 105 System.exit(1);*/ 106 107 // test the transducers from the parser 108 /* 109 message("running the transducer"); 110 try { batch.transduce(coll); } catch(JapeException e) { 111 usage("couldn't run transducer " + e.getMessage()); 112 } 113 //runTransducer(transducer, coll); 114 //Out.println(transducer); 115 116 message("done\n\r"); 117 //System.exit(0); 118 */ 119 } // main 120 121 122 /** 123 * Create a collection and put tokenised and gazetteered docs in it. 124 */ 125 static public Corpus tokAndGaz(String collName, ArrayList fileNames) 126 throws ResourceInstantiationException { 127 128 // create or overwrite the collection 129 Corpus collection = null; 130 File collDir = new File(collName); 131 collection = Factory.newCorpus( 132 collDir.getAbsolutePath() 133 ); 134 135 // add all the documents 136 for(Iterator i = fileNames.iterator(); i.hasNext(); ) { 137 String fname = (String) i.next(); 138 139 File f = new File(fname); 140 FeatureMap attrs = Factory.newFeatureMap(); 141 Document doc = null; 142 143 try { 144 AnnotationSet annots = new AnnotationSetImpl(doc); 145 collection.add( 146 Factory.newDocument(f.getAbsolutePath()) 147 ); 148 } catch(ResourceInstantiationException e) { 149 e.printStackTrace(); 150 } 151 152 /* 153 // Tokenise the document 154 Tokeniser tokeniser = new Tokeniser(doc, Tokeniser.HMM); 155 try { tokeniser.hmmTokenSequence(); } 156 catch(sheffield.creole.tokeniser.ParseException ex) { 157 ex.printStackTrace(); 158 return null; 159 } catch (CreoleException ex) { 160 ex.printStackTrace(); 161 return null; 162 } 163 164 // Gazetteer the document 165 gate.creole.Annotator gazetteer = new GazetteerAnnotator(); 166 gazetteer.annotate(doc, null); 167 */ 168 } // for each doc name 169 170 // return the annotated collection 171 return collection; 172 173 } //tokAndGaz 174 175 176 /** 177 * Must be run from the gate directory. 178 * Parse the .jape file. 179 */ 180 /* 181 static public Transducer parseJape(String japeName) { 182 Transducer transducer = null; 183 184 if(japeName.endsWith(".ser")) { // it's compiled already 185 message("deserialising " + japeName); 186 File f = new File(japeName); 187 if(! f.exists()) 188 Out.println(japeName + " not found"); 189 190 try { 191 FileInputStream fis = new FileInputStream(f.getPath()); 192 ObjectInputStream ois = new ObjectInputStream(fis); 193 transducer = (Transducer) ois.readObject(); 194 ois.close(); 195 } catch (Exception ex) { 196 Err.println( 197 "Can't read from " + f.getName() + ": " + ex.toString() 198 ); 199 } 200 } else { // parse it 201 message("parsing " + japeName); 202 try { 203 ParseCpsl cpslParser = new ParseCpsl(japeName); 204 transducer = cpslParser.MultiPhaseTransducer(); 205 } catch(IOException e) { 206 e.printStackTrace(); 207 } catch(gate.jape.parser.ParseException ee) { 208 Err.println("Error parsing transducer: " + ee.getMessage()); 209 } 210 } 211 212 return transducer; 213 } // parseJape 214 215 216 static public void runTransducer( 217 Transducer transducer, Corpus coll 218 ) { 219 220 try { 221 Document doc = coll.firstDocument(); 222 do { 223 message("doing document " + doc.getId()); 224 transducer.transduce(doc); 225 // Out.println(transducer.toString()); 226 } while( (doc = coll.nextDocument()) != null ); 227 } catch(JdmException e) { 228 e.printStackTrace(); 229 } catch(JapeException e) { 230 e.printStackTrace(); 231 } 232 } // runTransducer 233 */ 234 235 /** You got something wrong, dumbo. */ 236 public static void usage(String errorMessage) { 237 String usageMessage = 238 "usage: java gate.jape.TestJape2.main [-v] " + 239 "-j JapePatternFile -c CollectionName FileName(s)"; 240 241 Err.println(errorMessage); 242 Err.println(usageMessage); 243 //System.exit(1); 244 245 } // usage 246 247 248 /** Hello? Anybody there?? */ 249 public static void message(String mess) { 250 if(verbose) Out.println("TestJape2: " + mess); 251 } // message 252 253 } // class TestJape2 254 255 256 // $Log: TestJape2.java,v $ 257 // Revision 1.13 2005/01/11 13:51:36 ian 258 // Updating copyrights to 1998-2005 in preparation for v3.0 259 // 260 // Revision 1.12 2004/07/21 17:10:08 akshay 261 // Changed copyright from 1998-2001 to 1998-2004 262 // 263 // Revision 1.11 2004/03/25 13:01:14 valyt 264 // Imports optimisation throughout the Java sources 265 // (to get rid of annoying warnings in Eclipse) 266 // 267 // Revision 1.10 2001/09/13 12:09:50 kalina 268 // Removed completely the use of jgl.objectspace.Array and such. 269 // Instead all sources now use the new Collections, typically ArrayList. 270 // I ran the tests and I ran some documents and compared with keys. 271 // JAPE seems to work well (that's where it all was). If there are problems 272 // maybe look at those new structures first. 273 // 274 // Revision 1.9 2001/02/08 13:46:06 valyt 275 // Added full Unicode support for the gazetteer and Jape 276 // converted the gazetteer files to UTF-8 277 // 278 // Revision 1.8 2001/01/30 14:18:02 hamish 279 // fixed some hard-coded paths 280 // 281 // Revision 1.7 2000/11/08 16:35:04 hamish 282 // formatting 283 // 284 // Revision 1.6 2000/10/26 10:45:31 oana 285 // Modified in the code style 286 // 287 // Revision 1.5 2000/10/23 21:50:42 hamish 288 // cleaned up exception handling in gate.creole and added 289 // ResourceInstantiationException; 290 // 291 // changed Factory.newDocument(URL u) to use the new instantiation 292 // facilities; 293 // 294 // added COMMENT to resource metadata / ResourceData; 295 // 296 // changed Document and DocumentImpl to follow beans style, and moved 297 // constructor logic to init(); changed all the Factory newDocument methods to 298 // use the new resource creation stuff; 299 // 300 // added builtin document and corpus metadata to creole/creole.xml (copied from 301 // gate.ac.uk/tests/creole.xml); 302 // 303 // changed Corpus to the new style too; 304 // 305 // removed CreoleRegister.init() 306 // 307 // Revision 1.4 2000/10/18 13:26:48 hamish 308 // Factory.createResource now working, with a utility method that uses reflection (via java.beans.Introspector) to set properties on a resource from the 309 // parameter list fed to createResource. 310 // resources may now have both an interface and a class; they are indexed by interface type; the class is used to instantiate them 311 // moved createResource from CR to Factory 312 // removed Transients; use Factory instead 313 // 314 // Revision 1.3 2000/10/16 16:44:34 oana 315 // Changed the comment of DEBUG variable 316 // 317 // Revision 1.2 2000/10/10 15:36:37 oana 318 // Changed System.out in Out and System.err in Err; 319 // Added the DEBUG variable seted on false; 320 // Added in the header the licence; 321 // 322 // Revision 1.1 2000/02/23 13:46:12 hamish 323 // added 324 // 325 // Revision 1.1.1.1 1999/02/03 16:23:03 hamish 326 // added gate2 327 // 328 // Revision 1.9 1998/10/29 12:13:55 hamish 329 // reorganised to use Batch 330 // 331 // Revision 1.8 1998/10/01 16:06:41 hamish 332 // new appelt transduction style, replacing buggy version 333 // 334 // Revision 1.7 1998/09/26 09:19:21 hamish 335 // added cloning of PE macros 336 // 337 // Revision 1.6 1998/09/23 12:48:03 hamish 338 // negation added; noncontiguous BPEs disallowed 339 // 340 // Revision 1.5 1998/09/17 12:53:09 hamish 341 // fixed for new tok; new construction pattern 342 // 343 // Revision 1.4 1998/09/17 10:24:05 hamish 344 // added options support, and Appelt-style rule application 345 // 346 // Revision 1.3 1998/08/19 20:21:46 hamish 347 // new RHS assignment expression stuff added 348 // 349 // Revision 1.2 1998/08/18 14:37:45 hamish 350 // added some messages 351 // 352 // Revision 1.1 1998/08/18 12:43:11 hamish 353 // fixed SPT bug, not advancing newPosition 354