| TestJape2.java |
1 /*
2 * TestJape2.java (Java Annotation Patterns Engine)
3 *
4 * Copyright (c) 1998-2005, The University of Sheffield.
5 *
6 * This file is part of GATE (see http://gate.ac.uk/), and is free
7 * software, licenced under the GNU Library General Public License,
8 * Version 2, June 1991 (in the distribution as file licence.html,
9 * and also available at http://gate.ac.uk/gate/licence.html).
10 *
11 * Hamish Cunningham, 23/02/2000
12 *
13 * $Id: TestJape2.java,v 1.13 2005/01/11 13:51:36 ian Exp $
14 *
15 * Description: Test class for JAPE.
16 */
17
18 package gate.jape;
19
20 import java.io.File;
21 import java.util.ArrayList;
22 import java.util.Iterator;
23
24 import gate.*;
25 import gate.annotation.AnnotationSetImpl;
26 import gate.creole.ResourceInstantiationException;
27 import gate.util.Err;
28 import gate.util.Out;
29
30 /**
31 * Second test harness for JAPE.
32 * Uses the Sheffield Tokeniser and Gazetteer, and must be run
33 * from the gate directory.
34 * @author Hamish Cunningham
35 */
36 public class TestJape2 {
37
38 /** Debug flag */
39 private static final boolean DEBUG = false;
40
41 /** How much noise to make. */
42 static private boolean verbose = false;
43
44
45 /** Take a list of text files and a collection name, and
46 * call tokeniser/gazetteer/jape on them, creating the
47 * collection.
48 */
49 static public void main(String[] args) {
50
51 // turn debug output on/off
52 //Debug.setDebug(true);
53 //Debug.setDebug(AnnotationSet.class, true);
54 //Debug.setDebug(BasicPatternElement.class, true);
55 //Debug.setDebug(ComplexPatternElement.class, true);
56 //Debug.setDebug(ConstraintGroup.class, true);
57 //Debug.setDebug(SinglePhaseTransducer.class, true);
58
59 // variables to parse the command line options into
60 String collName = null;
61 String japeName = null;
62 ArrayList fileNames = null;
63
64 // process options
65 for(int i=0; i<args.length; i++) {
66 if(args[i].equals("-c") && ++i < args.length) // -c = coll name
67 collName = args[i];
68 else if(args[i].equals("-j") && ++i < args.length) // -j: .jape name
69 japeName = args[i];
70 else if(args[i].equals("-v")) // -v = verbose
71 verbose = true;
72 else { // a list of files
73 fileNames = new ArrayList();
74 do {
75 fileNames.add(args[i++]);
76 } while(i < args.length);
77 }
78 } // for each arg
79
80 // did they give valid options?
81 message("checking options");
82 if(collName == null || japeName == null || fileNames == null)
83 usage("you must supply collection, transducer and file names");
84
85 // create a collection and run the tokeniser
86 message("creating coll, tokenising and gazetteering");
87 Corpus coll = null;
88 try {
89 coll = tokAndGaz(collName, fileNames);
90 } catch(ResourceInstantiationException e) {
91 usage("couldn't open collection: " + e);
92 }
93 /*
94 // run the parser test
95 message("parsing the .jape file (or deserialising the .ser file)");
96 Batch batch = null;
97 try { batch = new Batch(japeName);
98 } catch(JapeException e) {
99 usage("can't create transducer " + e.getMessage());
100 }
101 */
102 /*Transducer transducer = parseJape(japeName);
103 //Out.println(transducer);
104 if(transducer == null)
105 System.exit(1);*/
106
107 // test the transducers from the parser
108 /*
109 message("running the transducer");
110 try { batch.transduce(coll); } catch(JapeException e) {
111 usage("couldn't run transducer " + e.getMessage());
112 }
113 //runTransducer(transducer, coll);
114 //Out.println(transducer);
115
116 message("done\n\r");
117 //System.exit(0);
118 */
119 } // main
120
121
122 /**
123 * Create a collection and put tokenised and gazetteered docs in it.
124 */
125 static public Corpus tokAndGaz(String collName, ArrayList fileNames)
126 throws ResourceInstantiationException {
127
128 // create or overwrite the collection
129 Corpus collection = null;
130 File collDir = new File(collName);
131 collection = Factory.newCorpus(
132 collDir.getAbsolutePath()
133 );
134
135 // add all the documents
136 for(Iterator i = fileNames.iterator(); i.hasNext(); ) {
137 String fname = (String) i.next();
138
139 File f = new File(fname);
140 FeatureMap attrs = Factory.newFeatureMap();
141 Document doc = null;
142
143 try {
144 AnnotationSet annots = new AnnotationSetImpl(doc);
145 collection.add(
146 Factory.newDocument(f.getAbsolutePath())
147 );
148 } catch(ResourceInstantiationException e) {
149 e.printStackTrace();
150 }
151
152 /*
153 // Tokenise the document
154 Tokeniser tokeniser = new Tokeniser(doc, Tokeniser.HMM);
155 try { tokeniser.hmmTokenSequence(); }
156 catch(sheffield.creole.tokeniser.ParseException ex) {
157 ex.printStackTrace();
158 return null;
159 } catch (CreoleException ex) {
160 ex.printStackTrace();
161 return null;
162 }
163
164 // Gazetteer the document
165 gate.creole.Annotator gazetteer = new GazetteerAnnotator();
166 gazetteer.annotate(doc, null);
167 */
168 } // for each doc name
169
170 // return the annotated collection
171 return collection;
172
173 } //tokAndGaz
174
175
176 /**
177 * Must be run from the gate directory.
178 * Parse the .jape file.
179 */
180 /*
181 static public Transducer parseJape(String japeName) {
182 Transducer transducer = null;
183
184 if(japeName.endsWith(".ser")) { // it's compiled already
185 message("deserialising " + japeName);
186 File f = new File(japeName);
187 if(! f.exists())
188 Out.println(japeName + " not found");
189
190 try {
191 FileInputStream fis = new FileInputStream(f.getPath());
192 ObjectInputStream ois = new ObjectInputStream(fis);
193 transducer = (Transducer) ois.readObject();
194 ois.close();
195 } catch (Exception ex) {
196 Err.println(
197 "Can't read from " + f.getName() + ": " + ex.toString()
198 );
199 }
200 } else { // parse it
201 message("parsing " + japeName);
202 try {
203 ParseCpsl cpslParser = new ParseCpsl(japeName);
204 transducer = cpslParser.MultiPhaseTransducer();
205 } catch(IOException e) {
206 e.printStackTrace();
207 } catch(gate.jape.parser.ParseException ee) {
208 Err.println("Error parsing transducer: " + ee.getMessage());
209 }
210 }
211
212 return transducer;
213 } // parseJape
214
215
216 static public void runTransducer(
217 Transducer transducer, Corpus coll
218 ) {
219
220 try {
221 Document doc = coll.firstDocument();
222 do {
223 message("doing document " + doc.getId());
224 transducer.transduce(doc);
225 // Out.println(transducer.toString());
226 } while( (doc = coll.nextDocument()) != null );
227 } catch(JdmException e) {
228 e.printStackTrace();
229 } catch(JapeException e) {
230 e.printStackTrace();
231 }
232 } // runTransducer
233 */
234
235 /** You got something wrong, dumbo. */
236 public static void usage(String errorMessage) {
237 String usageMessage =
238 "usage: java gate.jape.TestJape2.main [-v] " +
239 "-j JapePatternFile -c CollectionName FileName(s)";
240
241 Err.println(errorMessage);
242 Err.println(usageMessage);
243 //System.exit(1);
244
245 } // usage
246
247
248 /** Hello? Anybody there?? */
249 public static void message(String mess) {
250 if(verbose) Out.println("TestJape2: " + mess);
251 } // message
252
253 } // class TestJape2
254
255
256 // $Log: TestJape2.java,v $
257 // Revision 1.13 2005/01/11 13:51:36 ian
258 // Updating copyrights to 1998-2005 in preparation for v3.0
259 //
260 // Revision 1.12 2004/07/21 17:10:08 akshay
261 // Changed copyright from 1998-2001 to 1998-2004
262 //
263 // Revision 1.11 2004/03/25 13:01:14 valyt
264 // Imports optimisation throughout the Java sources
265 // (to get rid of annoying warnings in Eclipse)
266 //
267 // Revision 1.10 2001/09/13 12:09:50 kalina
268 // Removed completely the use of jgl.objectspace.Array and such.
269 // Instead all sources now use the new Collections, typically ArrayList.
270 // I ran the tests and I ran some documents and compared with keys.
271 // JAPE seems to work well (that's where it all was). If there are problems
272 // maybe look at those new structures first.
273 //
274 // Revision 1.9 2001/02/08 13:46:06 valyt
275 // Added full Unicode support for the gazetteer and Jape
276 // converted the gazetteer files to UTF-8
277 //
278 // Revision 1.8 2001/01/30 14:18:02 hamish
279 // fixed some hard-coded paths
280 //
281 // Revision 1.7 2000/11/08 16:35:04 hamish
282 // formatting
283 //
284 // Revision 1.6 2000/10/26 10:45:31 oana
285 // Modified in the code style
286 //
287 // Revision 1.5 2000/10/23 21:50:42 hamish
288 // cleaned up exception handling in gate.creole and added
289 // ResourceInstantiationException;
290 //
291 // changed Factory.newDocument(URL u) to use the new instantiation
292 // facilities;
293 //
294 // added COMMENT to resource metadata / ResourceData;
295 //
296 // changed Document and DocumentImpl to follow beans style, and moved
297 // constructor logic to init(); changed all the Factory newDocument methods to
298 // use the new resource creation stuff;
299 //
300 // added builtin document and corpus metadata to creole/creole.xml (copied from
301 // gate.ac.uk/tests/creole.xml);
302 //
303 // changed Corpus to the new style too;
304 //
305 // removed CreoleRegister.init()
306 //
307 // Revision 1.4 2000/10/18 13:26:48 hamish
308 // Factory.createResource now working, with a utility method that uses reflection (via java.beans.Introspector) to set properties on a resource from the
309 // parameter list fed to createResource.
310 // resources may now have both an interface and a class; they are indexed by interface type; the class is used to instantiate them
311 // moved createResource from CR to Factory
312 // removed Transients; use Factory instead
313 //
314 // Revision 1.3 2000/10/16 16:44:34 oana
315 // Changed the comment of DEBUG variable
316 //
317 // Revision 1.2 2000/10/10 15:36:37 oana
318 // Changed System.out in Out and System.err in Err;
319 // Added the DEBUG variable seted on false;
320 // Added in the header the licence;
321 //
322 // Revision 1.1 2000/02/23 13:46:12 hamish
323 // added
324 //
325 // Revision 1.1.1.1 1999/02/03 16:23:03 hamish
326 // added gate2
327 //
328 // Revision 1.9 1998/10/29 12:13:55 hamish
329 // reorganised to use Batch
330 //
331 // Revision 1.8 1998/10/01 16:06:41 hamish
332 // new appelt transduction style, replacing buggy version
333 //
334 // Revision 1.7 1998/09/26 09:19:21 hamish
335 // added cloning of PE macros
336 //
337 // Revision 1.6 1998/09/23 12:48:03 hamish
338 // negation added; noncontiguous BPEs disallowed
339 //
340 // Revision 1.5 1998/09/17 12:53:09 hamish
341 // fixed for new tok; new construction pattern
342 //
343 // Revision 1.4 1998/09/17 10:24:05 hamish
344 // added options support, and Appelt-style rule application
345 //
346 // Revision 1.3 1998/08/19 20:21:46 hamish
347 // new RHS assignment expression stuff added
348 //
349 // Revision 1.2 1998/08/18 14:37:45 hamish
350 // added some messages
351 //
352 // Revision 1.1 1998/08/18 12:43:11 hamish
353 // fixed SPT bug, not advancing newPosition
354