1   /*
2    *  Batch.java - transducer class
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 10/08/98
12   *
13   *  $Id: Batch.java,v 1.40 2005/10/10 10:39:51 valyt Exp $
14   *
15   *  DEVELOPER NOTES:
16   *
17   *  This is one that got away; the relation between constructors,
18   *  initTransducer and parseTransducer are totally screwy and get worse
19   *  every time I add something (e.g. support for resource loading).
20   *  We should probably junk this whole thing and start again....
21   */
22  
23  package gate.jape;
24  
25  import java.io.IOException;
26  import java.net.URL;
27  import java.util.Iterator;
28  import java.util.Vector;
29  
30  import gate.*;
31  import gate.creole.ExecutionException;
32  import gate.event.ProgressListener;
33  import gate.event.StatusListener;
34  import gate.util.Err;
35  import gate.util.Out;
36  
37  /** Batch processing of JAPE transducers against documents or collections.
38    * Construction will parse or deserialise a transducer as required.
39    */
40  public class Batch implements JapeConstants {
41    /** Debug flag */
42    private static final boolean DEBUG = false;
43  
44    /** The name of the transducer file, a .jape or .ser. */
45  //  private String japeFileName;
46  
47    /** The URL that points to a .jape file */
48    private URL japeURL;
49  
50    /**The encoding used for reading the grammar file(s)*/
51    private String encoding;
52  
53    /** The JAPE transducer. */
54    private Transducer transducer;
55  
56    /** A stream connected to the JAPE file (often null). */
57  //  private InputStream japeStream = null;
58  
59    /** Create non-initialised instance (private, used in main). */
60    private Batch() { }
61  
62    /** Create a fully initialised instance.
63      * <P><CODE>japeFileName</CODE>: the name of a .jape or .ser transducer
64      * file. This may be an absolute path, or may a .jar
65      * that lives somewhere on the classpath.
66      */
67    public Batch(URL url, String encoding) throws JapeException {
68      this.japeURL = url;
69      this.encoding =  encoding;
70      parseJape();
71      linkListeners();
72    } // full init constructor
73  
74    public Batch(URL url, String encoding, StatusListener sListener)
75           throws JapeException {
76  
77      this.addStatusListener(sListener);
78      this.japeURL = url;
79      this.encoding =  encoding;
80      parseJape();
81      linkListeners();
82    } // full init constructor
83  
84    private void readObject(java.io.ObjectInputStream in)
85    throws IOException, ClassNotFoundException{
86      in.defaultReadObject();
87      //now recreate the listeners
88      linkListeners();
89    }
90    
91    /**
92     * Creates inner listeners that forward events from the transducer object 
93     *  to our own listeners.
94     */
95    protected void linkListeners(){
96      if(transducer != null){
97        transducer.addStatusListener(new StatusListener(){
98          public void statusChanged(String text){
99            fireStatusChanged(text);
100         }
101       });
102 
103       transducer.addProgressListener(new ProgressListener(){
104         public void progressChanged(int value){
105           fireProgressChanged(value);
106         }
107 
108         public void processFinished(){
109           fireProcessFinished();
110         }
111       });
112     }    
113   }
114   
115   /**
116    * Notifies this PR that it should stop its execution as soon as possible.
117    */
118   public synchronized void interrupt(){
119     transducer.interrupt();
120   }
121   /** Create a fully initialised instance.
122     * <P><CODE>japeFileName</CODE>: the name of a .jape or .ser transducer
123     * file. This may be an absolute path, or may a .jar
124     * that lives somewhere on the classpath.
125     */
126 /*
127   public Batch(String japeFileName) throws JapeException {
128     this.japeFileName = japeFileName;
129     initTransducer();
130   } // full init constructor
131 */
132 /*
133   public Batch(String japeFileName, StatusListener sListener)
134                                                         throws JapeException {
135     this.japeFileName = japeFileName;
136     this.addStatusListener(sListener);
137     initTransducer();
138   } // full init constructor
139 */
140 
141   /** Create a fully initialised instance from an InputStream connected
142     * to the JAPE file.
143     */
144 /*
145   public Batch(InputStream japeStream) throws JapeException {
146     if(japeStream == null)
147       throw new JapeException(
148         "attempt to create a batch parser with null input stream"
149       );
150     this.japeFileName = "stream";
151     this.japeStream = japeStream;
152     initTransducer();
153   } // full init constructor
154 */
155   /** Create a fully initialised instance from a resource path and resource
156     * name.
157     */
158 /*
159   public Batch(String resPath, String resName) throws JapeException {
160     fromResource = true;
161     this.japeFileName = resName;
162     this.resPath = resPath;
163     initTransducer();
164   } // full init constructor
165 */
166 
167   /** Get the transducer. */
168   public Transducer getTransducer() { return transducer; }
169 
170   /** Instantiate transducer member as necessary. */
171 /*
172   private void initTransducer()
173   throws JapeException {
174     if(fromResource) {
175       parseJape(resPath, japeFileName);
176     } else if(japeFileName.endsWith(".ser") || japeFileName.endsWith(".SER"))
177       deserialiseJape(new File(japeFileName));
178     else if(japeFileName.endsWith(".jape") || japeFileName.endsWith(".JAPE"))
179       parseJape();
180     else if(japeFileName.endsWith(".jar") || japeFileName.endsWith(".JAR"))
181       deserialiseJape();
182     else if(japeFileName.equals("stream"))
183       parseJape(japeStream);
184     else
185       throw new JapeException(
186         "unknown file type (not .jape, .ser or .jar):" + japeFileName
187       );
188     if(transducer != null) transducer.addStatusListener(new StatusListener() {
189       public void statusChanged(String text){
190         fireStatusChangedEvent(text);
191       }
192     });
193   }
194 */
195   /** Parse a jape file from {@link #japeURL} and store the transducer. */
196   private void parseJape() throws JapeException {
197     try {
198       gate.jape.parser.ParseCpsl parser = Factory.newJapeParser(japeURL, encoding);
199 
200       StatusListener listener = null;
201       listener = new StatusListener(){
202         public void statusChanged(String text){
203           fireStatusChanged(text);
204         }
205       };
206       parser.addStatusListener(listener);
207       transducer = parser.MultiPhaseTransducer();
208       parser.removeStatusListener(listener);
209       //the call to finish needs to be handled from here now as it
210       //was removed from the .jj file
211       transducer.addStatusListener(listener);
212       transducer.finish();
213       transducer.removeStatusListener(listener);
214 
215     } catch (gate.jape.parser.ParseException e) {
216       throw new
217         JapeException("Batch: error parsing transducer: " + e.getMessage());
218     } catch (java.io.IOException e) {
219       throw new
220         JapeException("Batch: couldn't open JAPE file: " + e.getMessage());
221     }
222   } // parseJape
223 
224   /** Parse a jape file from an InputStream and store the transducer. */
225 /*
226   private void parseJape(InputStream japeStream) throws JapeException {
227     try {
228       gate.jape.parser.ParseCpsl parser =
229         new gate.jape.parser.ParseCpsl(japeFileName, japeStream);
230       transducer = parser.MultiPhaseTransducer();
231     } catch (gate.jape.parser.ParseException e) {
232       throw new
233         JapeException("Batch: error parsing transducer: " + e.getMessage());
234     } catch (java.io.IOException e) {
235       throw new
236         JapeException("Batch: couldn't read JAPE stream: " + e.getMessage());
237     }
238   } // parseJape(InputStream)
239 */
240   /** Parse a jape file from a resource and store the transducer. */
241 /*
242   private void parseJape(String resPath, String resName) throws JapeException {
243     try {
244       gate.jape.parser.ParseCpsl parser =
245         new gate.jape.parser.ParseCpsl(resPath, resName);
246       transducer = parser.MultiPhaseTransducer();
247     } catch (gate.jape.parser.ParseException e) {
248       throw new
249         JapeException("Batch: error parsing transducer: " + e.getMessage());
250     } catch (java.io.IOException e) {
251       throw new
252         JapeException("Batch: couldn't read JAPE resource: " + e.getMessage());
253     }
254   } // parseJape(resPath, resName)
255 */
256 
257   /** Deserialise from a .ser file. */
258 /*
259   private void deserialiseJape(File japeFile) throws JapeException {
260 
261     // set up a file input stream
262     FileInputStream japeInputStream = null;
263     try {
264       japeInputStream = new FileInputStream(japeFile.getPath());
265     } catch (IOException e) {
266       throw new JapeException(
267         "Can't read from " + japeFile.getPath() + ": " + e.getMessage()
268       );
269     }
270 
271     // call the input stream deserialise method
272     deserialiseJape(japeInputStream);
273   } // deserialiseJape(File)
274 */
275   /** Deserialise from a JAR file. */
276 /*
277   private void deserialiseJape() throws JapeException {
278     // find the jar from CLASSPATH
279     //SearchPath classPath =
280     //  new SearchPath(System.getProperty("java.class.path"), ".");
281     File jarFile = new File(japeFileName); //classPath.getFile(japeFileName);
282     if(jarFile == null)
283       throw new JapeException("Batch: can't find " + japeFileName);
284 
285     // get a byte array input stream with the .ser in out of the jar file
286     JarFile jar = null;
287     BufferedInputStream japeInputStream = null;
288     try {
289       jar = new JarFile(jarFile.getPath());
290       japeInputStream = new BufferedInputStream(
291         jar.getInputStream(jar.getJarEntry(jarNameToSerName(japeFileName)))
292       );
293     } catch(IOException e) {
294       throw new JapeException("couldn't read jar file " + japeFileName);
295     }
296 
297 
298     // call the input stream deserialise method
299     deserialiseJape(japeInputStream);
300   } // deserialiseJape()
301 */
302   /** Create a transducer from an object input stream (deserialisation). */
303 /*
304   private void deserialiseJape(InputStream japeInputStream)
305   throws JapeException {
306     try {
307       ObjectInputStream ois = new ObjectInputStream(japeInputStream);
308       transducer = (Transducer) ois.readObject();
309       ois.close();
310       japeInputStream.close(); // redundant?
311     } catch (IOException e) {
312       throw new JapeException(
313         "Batch: can't deserialise InputStream (1): " + e.getMessage()
314       );
315     } catch (ClassNotFoundException e) {
316       throw new JapeException(
317         "Batch: can't deserialise InputStream (2): " + e.getMessage()
318       );
319     }
320   } // deserialise(OIS)
321 */
322   /** Create a .ser name from a .jar name. */
323 /*
324   private String jarNameToSerName(String jarName) {
325     return jarName.substring(0, jarName.length() - 4) + ".ser";
326   } // jarNameToSerName
327 */
328 
329   /** Process the given collection. */
330   public void transduce(Corpus coll) throws JapeException, ExecutionException {
331     // for each doc run the transducer
332     Iterator iter = coll.iterator();
333     while(iter.hasNext()) {
334       Document doc = (Document) iter.next();
335       // transducer.transduce(doc);
336       transduce(doc, doc.getAnnotations(), doc.getAnnotations());
337     }
338   } // transduce(coll)
339 
340   /** Process a single document. */
341   public void transduce(Document doc) throws JapeException, ExecutionException {
342     transducer.transduce(doc, doc.getAnnotations(), doc.getAnnotations());
343   } // transduce(doc)
344 
345   /** Process a single document. */
346   public void transduce(Document doc, AnnotationSet inputAS,
347                         AnnotationSet outputAS) throws JapeException,
348                                                        ExecutionException {
349     //no need to transduce empty document
350     if (inputAS == null || inputAS.isEmpty())
351       return;
352     transducer.transduce(doc, inputAS, outputAS);
353 
354   } // transduce(doc)
355 
356   /** Process a single text. */
357 /*
358   public Document transduce(String text) throws JapeException {
359     Document doc = null;
360     try {
361       doc = Factory.newDocument(text);
362     } catch (ResourceInstantiationException e) {
363       throw new JapeException(e.toString());
364     }
365     transducer.transduce(doc, doc.getAnnotations());
366     return doc;
367   } // transduce(text)
368 */
369   /** Process a single file. */
370 /*
371   public Document transduce(File textFile) throws JapeException {
372     String text = null;
373     try {
374       text = gate.util.Files.getString(textFile);
375     } catch(IOException e) { throw new JapeException(e.toString()); }
376     return transduce(text);
377   } // transduce(textFile)
378 */
379   /** Process a set of files. */
380 /*
381   public Corpus transduce(String[] textFileNames) throws JapeException {
382     Corpus coll = null;
383     try {
384       coll = Factory.newCorpus("JAPE batch corpus");
385       Document doc = null;
386       for(int i = 0; i < textFileNames.length; i++) {
387           doc = Factory.newDocument(textFileNames[i]);
388           doc.setFeatures(Factory.newFeatureMap());
389           /*coll.createDocument(
390             textFileNames[i],
391             null, // the text - should get read from disk
392             new AnnotationSetImpl(doc),
393             Factory.newFeatureMap(),
394             Document.COPIED
395           );*/
396 /*
397         transducer.transduce(doc, doc.getAnnotations());
398       }
399     } catch(ResourceInstantiationException e) {
400       throw new JapeException(e.toString());
401     }
402     return coll;
403   } // transduce(textFileNames)
404 */
405   /** This is where it all happens. This is <I>the</I> place to be. Take
406     * your summer holidays here. Visit on Saturday nights. Buy a season
407     * ticket from <CODE>www.programmer.gone.insane.com</CODE>.
408     * <P>
409     * Takes a .jape/.jar/.ser
410     *  file name (-j option) which is assumed to hold a pattern
411     * grammar for a multi-phase transducer, and a collection
412     * name (-c option) or a list of files. As needed it then parses and
413     * compiles the transducer, then transduces all the documents in the
414     * collection and saves it to disk.
415     */
416   public static void main(String args[]) {
417 /*
418     // oh great bug in the sky give us this day our daily fuckup
419     //gate.util.Debug.setDebug(true);
420     //gate.util.Debug.setDebug(Rule.class, true);
421     //gate.util.Debug.setDebug(LeftHandSide.class, true);
422     //gate.util.Debug.setDebug(BasicPatternElement.class, true);
423     //gate.util.Debug.setDebug(AnnotationSet.class, true);
424 
425     // The persistent name of the collection.
426     String persCollName = null;;
427 
428     // The collection to process.
429     Corpus collection = null;
430 
431     // create one of us
432     Batch batch = new Batch();
433 
434     // process the options
435     int i = 0;
436     for( ; i<args.length; i++) {
437       if(args[i].equals("-c") && ++i < args.length) // -c = coll name
438         persCollName = args[i];
439       else if(args[i].equals("-j") && ++i < args.length)// -j = transducer name
440         batch.japeFileName = args[i];
441       else if(args[i].equals("-v")) // -v = verbose
442         batch.setVerbose(true);
443       else if(args[i].startsWith("-"))
444         batch.usage("unknown option " + args[i]);
445       else
446         break;
447     } // for each arg
448 
449     // file name list
450     String[] fileNames = null;
451     if(args.length > i) {
452       fileNames = new String[args.length - i];
453       for(int j = 0; i<args.length; j++, i++)
454         fileNames[j] = args[i];
455     }
456 
457     // did they give valid options?
458     if(batch.japeFileName == null)
459       batch.usage("you must supply a transducer name");
460     if(fileNames != null && persCollName != null)
461       batch.usage("can't read a collection AND process a file list");
462 
463     // parse the transducer or bomb
464     batch.message("parsing the transducer");
465     try { batch.initTransducer(); }
466     catch(JapeException e) {
467       batch.usage("oops: " + e.toString());
468     }
469 
470     Corpus coll = null;
471     if(persCollName != null) { // we got a collection name, not a list of files
472 
473       // open the collection or bomb
474       coll = null;
475       batch.message("opening the collection");
476       try {
477         coll = Factory.newCorpus(persCollName);
478       } catch(ResourceInstantiationException e) {
479         batch.usage("oops (x): " + e);
480       }
481 
482       // transduce
483       batch.message("calling transducer");
484       try { batch.transduce(coll); }
485       catch(JapeException e) {
486         batch.usage("oops (1): " + e.toString());
487       }
488 
489       // save to disk
490       batch.message("saving the collection");
491       batch.usage("couldn't sync coll ");
492 
493     // we got a list of files, not a collection
494     } else {
495       batch.message("transducing transient collection");
496       try {
497         coll = batch.transduce(fileNames);
498       } catch(JapeException e) {
499         batch.usage("oops (2): " + e.toString());
500       }
501     }
502 
503     // we won! we won! we can smash up all the computers now!
504     batch.message("done");
505     //System.exit(0);
506 */
507   } // main
508 
509 
510   /** Whether to print progress messages or not. */
511   private boolean verbose = false;
512 
513   /** Set verbosity. */
514   public void setVerbose(boolean turtleSoup) { verbose = turtleSoup; }
515 
516   /** You got something wrong, dumbo. */
517   public void usage(String errorMessage) {
518     String usageMessage =
519       "usage: java gate.jape.Batch.main [-v] " +
520         "-j japefile(.ser|.jape|.jar) " +
521         "(-c CollectionName | filenames)";
522 
523     Err.println(errorMessage);
524     Err.println(usageMessage);
525     // System.exit(1);
526 
527   } // usage
528 
529   /** Hello? Anybody there?? */
530   public void message(String mess) {
531     if(verbose) Out.println("Batch: " + mess);
532   } // message
533 
534   public void setFeatures(gate.FeatureMap newFeatures) {
535     features = newFeatures;
536   }
537   public gate.FeatureMap getFeatures() {
538     return features;
539   }
540   public synchronized void removeProgressListener(ProgressListener l) {
541     if (progressListeners != null && progressListeners.contains(l)) {
542       Vector v = (Vector) progressListeners.clone();
543       v.removeElement(l);
544       progressListeners = v;
545     }
546   }
547   public synchronized void addProgressListener(ProgressListener l) {
548     Vector v = progressListeners == null ? new Vector(2) : (Vector) progressListeners.clone();
549     if (!v.contains(l)) {
550       v.addElement(l);
551       progressListeners = v;
552     }
553   }
554 
555   //ProcessProgressReporter implementation ends here
556 
557   /** Are we initialising from a resource? */
558 //  private boolean fromResource = false;
559 
560   /** Path to the resources tree */
561 //  private String resPath = null;
562 
563 
564   private gate.FeatureMap features;
565   private transient Vector progressListeners;
566   private transient Vector statusListeners;
567   private boolean enableDebugging;
568 
569   protected void fireProgressChanged(int e) {
570     if (progressListeners != null) {
571       Vector listeners = progressListeners;
572       int count = listeners.size();
573       for (int i = 0; i < count; i++) {
574         ((ProgressListener) listeners.elementAt(i)).progressChanged(e);
575       }
576     }
577   }
578   protected void fireProcessFinished() {
579     if (progressListeners != null) {
580       Vector listeners = progressListeners;
581       int count = listeners.size();
582       for (int i = 0; i < count; i++) {
583         ((ProgressListener) listeners.elementAt(i)).processFinished();
584       }
585     }
586   }
587   public synchronized void removeStatusListener(StatusListener l) {
588     if (statusListeners != null && statusListeners.contains(l)) {
589       Vector v = (Vector) statusListeners.clone();
590       v.removeElement(l);
591       statusListeners = v;
592     }
593   }
594   public synchronized void addStatusListener(StatusListener l) {
595     Vector v = statusListeners == null ? new Vector(2) : (Vector) statusListeners.clone();
596     if (!v.contains(l)) {
597       v.addElement(l);
598       statusListeners = v;
599     }
600   }
601   protected void fireStatusChanged(String e) {
602     if (statusListeners != null) {
603       Vector listeners = statusListeners;
604       int count = listeners.size();
605       for (int i = 0; i < count; i++) {
606         ((StatusListener) listeners.elementAt(i)).statusChanged(e);
607       }
608     }
609   }
610 
611   /**
612    * Sets the ontology to be used by the transducers
613    * @param ontology
614    */
615   public void setOntology(gate.creole.ontology.Ontology ontology) {
616     transducer.setOntology(ontology);
617   }
618   public boolean isEnableDebugging() {
619     return enableDebugging;
620   }
621   public void setEnableDebugging(boolean enableDebugging) {
622     this.enableDebugging = enableDebugging;
623     //propagate
624     if(transducer != null) transducer.setEnableDebugging(enableDebugging);
625   }
626 
627   
628   /*
629   private void writeObject(ObjectOutputStream oos) throws IOException {
630     Out.prln("writing batch");
631     oos.defaultWriteObject();
632     Out.prln("finished writing batch");
633   } // writeObject
634   */
635 
636 } // class Batch
637 
638