1   /*
2    *  TestJape.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 23/Feb/00
12   *
13   *  $Id: TestJape.java,v 1.50 2005/01/11 13:51:36 ian Exp $
14   */
15  
16  package gate.jape;
17  
18  import java.io.IOException;
19  import java.util.Date;
20  import java.util.Iterator;
21  
22  import junit.framework.*;
23  
24  import gate.*;
25  import gate.creole.ResourceInstantiationException;
26  import gate.creole.gazetteer.DefaultGazetteer;
27  import gate.creole.tokeniser.DefaultTokeniser;
28  import gate.util.*;
29  
30  
31  
32  /** Tests for the Corpus classes
33    */
34  public class TestJape extends TestCase
35  {
36    /** Debug flag */
37    private static final boolean DEBUG = false;
38  
39    /** Construction */
40    public TestJape(String name) { super(name); }
41  
42    /** Fixture set up */
43    public void setUp() {
44      //Out.println("TestJape.setUp()");
45    } // setUp
46  
47    /** Test using the large "combined" grammar from the gate/resources
48      * tree.
49      */
50    public void _testCombined() throws IOException, GateException, Exception {
51      DoTestBigGrammar("AveShort");
52  
53      /*
54      Corpus c = Factory.newCorpus("TestJape corpus");
55      c.add(
56        Factory.newDocument(Files.getResourceAsString("texts/doc0.html"))
57      );
58  
59      //add some annotations on the first (only) document in corpus c
60      Document doc = (Document) c.first();
61      AnnotationSet defaultAS = doc.getAnnotations();
62      FeatureMap feat = Factory.newFeatureMap();
63      defaultAS.add(new Long( 2), new Long( 4), "A",feat);
64      defaultAS.add(new Long( 4), new Long(6), "B",feat);
65      defaultAS.add(new Long(6), new Long(8), "C",feat);
66      defaultAS.add(new Long(8), new Long(10), "C",feat);
67  
68      // run the parser test
69      Gate.init();
70      Batch batch = null;
71      batch = new Batch("jape/combined/", "main.jape");
72  
73      // test the transducers
74      batch.transduce(c);
75      //Out.println(batch.getTransducer());
76  
77      // check the results
78      doc = (Document)c.first();
79      */
80    } // testCombined()
81  
82    /** Batch run */
83    public void testBatch() throws Exception{
84      Corpus c = Factory.newCorpus("TestJape corpus");
85      c.add(
86        Factory.newDocument(Files.getGateResourceAsString("texts/doc0.html"))
87      );
88      //add some annotations on the first (only) document in corpus c
89      Document doc = (Document)c.get(0);
90      AnnotationSet defaultAS = doc.getAnnotations();
91  
92      try {
93        FeatureMap feat = Factory.newFeatureMap();
94        // defaultAS.add(new Long( 0), new Long( 2), "A",feat);
95        defaultAS.add(new Long( 2), new Long( 4), "A",feat);
96        // defaultAS.add(new Long( 4), new Long( 6), "A",feat);
97        // defaultAS.add(new Long( 6), new Long( 8), "A",feat);
98        defaultAS.add(new Long( 4), new Long(6), "B",feat);
99        // defaultAS.add(new Long(10), new Long(12), "B",feat);
100       // defaultAS.add(new Long(12), new Long(14), "B",feat);
101       // defaultAS.add(new Long(14), new Long(16), "B",feat);
102       // defaultAS.add(new Long(16), new Long(18), "B",feat);
103       defaultAS.add(new Long(6), new Long(8), "C",feat);
104       defaultAS.add(new Long(8), new Long(10), "C",feat);
105       // defaultAS.add(new Long(22), new Long(24), "C",feat);
106       // defaultAS.add(new Long(24), new Long(26), "C",feat);
107     } catch(gate.util.InvalidOffsetException ioe) {
108       ioe.printStackTrace(Err.getPrintWriter());
109     }
110 /*
111     // run the parser test
112     Batch batch = null;
113     // String japeFileName = "/gate/jape/Test11.jape";
114     String japeFileName = Files.getResourcePath() + "/jape/TestABC.jape";
115     // String japeFileName = "/gate/jape/Country.jape";
116     InputStream japeFileStream = Files.getResourceAsStream(japeFileName);
117     if(japeFileStream == null)
118       throw new JapeException("couldn't open " + japeFileName);
119 */
120     Batch batch = new Batch(TestJape.class.getResource(
121               Files.getResourcePath() + "/jape/TestABC.jape"), "UTF-8");
122     // test code: print the first line of the jape stream
123     // Out.println(
124     //   new BufferedReader(new InputStreamReader(japeFileStream)).readLine()
125     // );
126 
127     // test the transducers
128     batch.transduce(c);
129     // check the results
130     doc = (Document)c.get(0);
131     // defaultAS = doc.getAnnotations();
132     // Out.println(defaultAS);
133   } // testBatch()
134 
135   public void DoTestBigGrammar(String textName) throws GateException, Exception{
136     long startCorpusLoad = 0, startCorpusTokenization = 0,
137          startGazeteerLoad = 0, startLookup = 0,
138          startJapeFileOpen = 0, startCorpusTransduce = 0,
139          endProcess = 0;
140     Out.print("Procesing " + textName + "...\n" +
141                      "Started at: " + (new Date()) + "\n");
142     startCorpusLoad = System.currentTimeMillis();
143     Out.print("Loading corpus... ");
144     Corpus corpus = Factory.newCorpus("Jape Corpus");
145     try {
146     corpus.add(Factory.newDocument(
147         Files.getGateResourceAsString("jape/InputTexts/" + textName)));
148     } catch(IOException ioe) {
149       ioe.printStackTrace(Err.getPrintWriter());
150     }
151 
152     if(corpus.isEmpty()) {
153       Err.println("Missing corpus !");
154       return;
155     }
156 
157     //tokenize all documents
158     gate.creole.tokeniser.DefaultTokeniser tokeniser = null;
159     try {
160       //create a default tokeniser
161       FeatureMap params = Factory.newFeatureMap();
162       tokeniser = (DefaultTokeniser) Factory.createResource(
163                             "gate.creole.tokeniser.DefaultTokeniser", params);
164       /*Files.getResourceAsStream("creole/tokeniser/DefaultTokeniser.rules"));*/
165     } catch(ResourceInstantiationException re) {
166       re.printStackTrace(Err.getPrintWriter());
167     }
168     startCorpusTokenization = System.currentTimeMillis();
169     Out.print(": " +
170                        (startCorpusTokenization - startCorpusLoad) +
171                        "ms\n");
172 
173     Out.print("Tokenizing the corpus... ");
174     int progress = 0;
175     int docCnt = corpus.size();
176     Iterator docIter = corpus.iterator();
177     Document currentDoc;
178     while(docIter.hasNext()){
179       currentDoc = (Document)docIter.next();
180       tokeniser.setDocument(currentDoc);
181       //use the default anotation set
182       tokeniser.setAnnotationSetName(null);
183       tokeniser.execute();
184       // Verfy if all annotations from the default annotation set are consistent
185       gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc);
186     }
187 
188     startJapeFileOpen = System.currentTimeMillis();
189     Out.print(": " + (startJapeFileOpen - startCorpusTokenization) +
190                      "ms\n");
191 
192     //Do gazeteer lookup
193     gate.creole.gazetteer.DefaultGazetteer gazeteer = null;
194     startGazeteerLoad = startLookup = System.currentTimeMillis();
195     Out.print("Loading gazeteer lists...");
196     try {
197       //create a default gazetteer
198       FeatureMap params = Factory.newFeatureMap();
199       gazeteer = (DefaultGazetteer) Factory.createResource(
200                             "gate.creole.gazetteer.DefaultGazetteer", params);
201       gazeteer.init();
202       startLookup = System.currentTimeMillis();
203       Out.print(": " +
204                          (startLookup - startGazeteerLoad) +
205                          "ms\n");
206 
207       Out.print("Doing gazeteer lookup... ");
208       docIter = corpus.iterator();
209       while(docIter.hasNext()){
210         currentDoc = (Document)docIter.next();
211         gazeteer.setDocument(currentDoc);
212         gazeteer.execute();
213         // Verfy if all annotations from the default annotation set are consistent
214         gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc);
215       }
216     } catch(ResourceInstantiationException re) {
217       Err.println("Cannot read the gazeteer lists!" +
218                          "\nAre the GATE resources in place?\n" + re);
219     }
220 
221     startJapeFileOpen = System.currentTimeMillis();
222     Out.print(": " + (startJapeFileOpen - startLookup) +
223                      "ms\n");
224 
225 
226     //do the jape stuff
227     Gate.init();
228 
229 
230     try {
231       Out.print("Opening Jape grammar... ");
232       Batch batch = new Batch(TestJape.class.getResource(
233         Files.getResourcePath() + "/jape/combined/main.jape"), "UTF-8");
234       /*
235       Batch batch = new Batch("jape/combined/", "brian-soc-loc1.jape");
236       Batch batch =
237         new Batch("z:/gate/src/gate/resources/jape/combined/main.jape");
238       Batch batch = new Batch("jape/", "Country.jape");
239       */
240       startCorpusTransduce = (new Date()).getTime();
241       Out.print(": " + (startCorpusTransduce - startJapeFileOpen) +
242                        "ms\n");
243       Out.print("Transducing the corpus... ");
244       batch.transduce(corpus);
245       endProcess = System.currentTimeMillis();
246       Out.print(": " + (endProcess - startCorpusTransduce) + "ms\n");
247     } catch(JapeException je) {
248       je.printStackTrace(Err.getPrintWriter());
249     }
250   } // DoBugTestGrammar
251 
252   /**
253    * This test sets up a JAPE transducer based on a grammar
254    * (RhsError.jape) that will throw a null pointer exception.
255    * The test succeeds so long as we get that exception.
256    */
257   public void testRhsErrorMessages() {
258     boolean gotException = false;
259 
260     try {
261       if(DEBUG) {
262         Out.print(
263           "Opening Jape grammar... " + Gate.getUrl("tests/RhsError.jape")
264         );
265       }
266       // a JAPE batcher
267       Batch batch = new Batch(Gate.getUrl("tests/RhsError.jape"), "UTF-8");
268 
269       // a document with an annotation
270       Document doc = Factory.newDocument("This is a Small Document.");
271       FeatureMap features = Factory.newFeatureMap();
272       features.put("orth", "upperInitial");
273       doc.getAnnotations().add(new Long(0), new Long(8), "Token", features);
274 
275       // run jape on the document
276       batch.transduce(doc);
277     } catch(Exception e) {
278       if(DEBUG) Out.prln(e);
279       gotException = true;
280     }
281 
282     assertTrue("Bad JAPE grammar didn't throw an exception", gotException);
283 
284   }  // testRhsErrorMessages
285 
286 //  /**
287 //   * This test sets up a JAPE transducer based on a grammar
288 //   * (RhsError2.jape) that will throw a compiler error.
289 //   * The test succeeds so long as we get that exception.
290 //   */
291 //  public void testRhsErrorMessages2() {
292 //    boolean gotException = false;
293 //
294 //    // disable System.out so that the compiler can't splash its error on screen
295 //    if(DEBUG) System.out.println("hello 1");
296 //    PrintStream sysout = System.out;
297 //    System.setOut(new PrintStream(new ByteArrayOutputStream()));
298 //    if(DEBUG) System.out.println("hello 2");
299 //
300 //    // run a JAPE batch on the faulty grammar
301 //    try {
302 //      if(DEBUG) {
303 //        Out.print(
304 //          "Opening Jape grammar... " + Gate.getUrl("tests/RhsError2.jape")
305 //        );
306 //      }
307 //      // a JAPE batcher
308 //      Batch batch = new Batch(Gate.getUrl("tests/RhsError2.jape"), "UTF-8");
309 //    } catch(Exception e) {
310 //      if(DEBUG) Out.prln(e);
311 //      gotException = true;
312 //    } finally {
313 //
314 //      // re-enable System.out
315 //      System.setOut(sysout);
316 //      if(DEBUG) System.out.println("hello 3");
317 //    }
318 //
319 //    assertTrue("Bad JAPE grammar (2) didn't throw an exception", gotException);
320 //
321 //  }  // testRhsErrorMessages2
322 //
323 
324   /** Test suite routine for the test runner */
325   public static Test suite() {
326     return new TestSuite(TestJape.class);
327   } // suite
328 
329   //main method for running this test as a standalone test
330   public static void main(String[] args) {
331     for(int i = 0; i < 6; i++){
332     System.gc();
333     Out.println("Run " + i + "   ==============");
334       try{
335         TestJape testJape = new TestJape("Test Jape");
336         testJape.setUp();
337         if(args.length < 1) testJape.DoTestBigGrammar("AveShort");
338        else testJape.DoTestBigGrammar(args[0]);
339       } catch(Exception e) {
340         e.printStackTrace(Err.getPrintWriter());
341       }
342     }
343   }
344 } // class TestJape
345