1
15
16 package gate.jape;
17
18 import java.io.IOException;
19 import java.util.Date;
20 import java.util.Iterator;
21
22 import junit.framework.*;
23
24 import gate.*;
25 import gate.creole.ResourceInstantiationException;
26 import gate.creole.gazetteer.DefaultGazetteer;
27 import gate.creole.tokeniser.DefaultTokeniser;
28 import gate.util.*;
29
30
31
32
34 public class TestJape extends TestCase
35 {
36
37 private static final boolean DEBUG = false;
38
39
40 public TestJape(String name) { super(name); }
41
42
43 public void setUp() {
44 }
47
50 public void _testCombined() throws IOException, GateException, Exception {
51 DoTestBigGrammar("AveShort");
52
53
80 }
82
83 public void testBatch() throws Exception{
84 Corpus c = Factory.newCorpus("TestJape corpus");
85 c.add(
86 Factory.newDocument(Files.getGateResourceAsString("texts/doc0.html"))
87 );
88 Document doc = (Document)c.get(0);
90 AnnotationSet defaultAS = doc.getAnnotations();
91
92 try {
93 FeatureMap feat = Factory.newFeatureMap();
94 defaultAS.add(new Long( 2), new Long( 4), "A",feat);
96 defaultAS.add(new Long( 4), new Long(6), "B",feat);
99 defaultAS.add(new Long(6), new Long(8), "C",feat);
104 defaultAS.add(new Long(8), new Long(10), "C",feat);
105 } catch(gate.util.InvalidOffsetException ioe) {
108 ioe.printStackTrace(Err.getPrintWriter());
109 }
110
120 Batch batch = new Batch(TestJape.class.getResource(
121 Files.getResourcePath() + "/jape/TestABC.jape"), "UTF-8");
122
127 batch.transduce(c);
129 doc = (Document)c.get(0);
131 }
135 public void DoTestBigGrammar(String textName) throws GateException, Exception{
136 long startCorpusLoad = 0, startCorpusTokenization = 0,
137 startGazeteerLoad = 0, startLookup = 0,
138 startJapeFileOpen = 0, startCorpusTransduce = 0,
139 endProcess = 0;
140 Out.print("Procesing " + textName + "...\n" +
141 "Started at: " + (new Date()) + "\n");
142 startCorpusLoad = System.currentTimeMillis();
143 Out.print("Loading corpus... ");
144 Corpus corpus = Factory.newCorpus("Jape Corpus");
145 try {
146 corpus.add(Factory.newDocument(
147 Files.getGateResourceAsString("jape/InputTexts/" + textName)));
148 } catch(IOException ioe) {
149 ioe.printStackTrace(Err.getPrintWriter());
150 }
151
152 if(corpus.isEmpty()) {
153 Err.println("Missing corpus !");
154 return;
155 }
156
157 gate.creole.tokeniser.DefaultTokeniser tokeniser = null;
159 try {
160 FeatureMap params = Factory.newFeatureMap();
162 tokeniser = (DefaultTokeniser) Factory.createResource(
163 "gate.creole.tokeniser.DefaultTokeniser", params);
164
165 } catch(ResourceInstantiationException re) {
166 re.printStackTrace(Err.getPrintWriter());
167 }
168 startCorpusTokenization = System.currentTimeMillis();
169 Out.print(": " +
170 (startCorpusTokenization - startCorpusLoad) +
171 "ms\n");
172
173 Out.print("Tokenizing the corpus... ");
174 int progress = 0;
175 int docCnt = corpus.size();
176 Iterator docIter = corpus.iterator();
177 Document currentDoc;
178 while(docIter.hasNext()){
179 currentDoc = (Document)docIter.next();
180 tokeniser.setDocument(currentDoc);
181 tokeniser.setAnnotationSetName(null);
183 tokeniser.execute();
184 gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc);
186 }
187
188 startJapeFileOpen = System.currentTimeMillis();
189 Out.print(": " + (startJapeFileOpen - startCorpusTokenization) +
190 "ms\n");
191
192 gate.creole.gazetteer.DefaultGazetteer gazeteer = null;
194 startGazeteerLoad = startLookup = System.currentTimeMillis();
195 Out.print("Loading gazeteer lists...");
196 try {
197 FeatureMap params = Factory.newFeatureMap();
199 gazeteer = (DefaultGazetteer) Factory.createResource(
200 "gate.creole.gazetteer.DefaultGazetteer", params);
201 gazeteer.init();
202 startLookup = System.currentTimeMillis();
203 Out.print(": " +
204 (startLookup - startGazeteerLoad) +
205 "ms\n");
206
207 Out.print("Doing gazeteer lookup... ");
208 docIter = corpus.iterator();
209 while(docIter.hasNext()){
210 currentDoc = (Document)docIter.next();
211 gazeteer.setDocument(currentDoc);
212 gazeteer.execute();
213 gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc);
215 }
216 } catch(ResourceInstantiationException re) {
217 Err.println("Cannot read the gazeteer lists!" +
218 "\nAre the GATE resources in place?\n" + re);
219 }
220
221 startJapeFileOpen = System.currentTimeMillis();
222 Out.print(": " + (startJapeFileOpen - startLookup) +
223 "ms\n");
224
225
226 Gate.init();
228
229
230 try {
231 Out.print("Opening Jape grammar... ");
232 Batch batch = new Batch(TestJape.class.getResource(
233 Files.getResourcePath() + "/jape/combined/main.jape"), "UTF-8");
234
240 startCorpusTransduce = (new Date()).getTime();
241 Out.print(": " + (startCorpusTransduce - startJapeFileOpen) +
242 "ms\n");
243 Out.print("Transducing the corpus... ");
244 batch.transduce(corpus);
245 endProcess = System.currentTimeMillis();
246 Out.print(": " + (endProcess - startCorpusTransduce) + "ms\n");
247 } catch(JapeException je) {
248 je.printStackTrace(Err.getPrintWriter());
249 }
250 }
252
257 public void testRhsErrorMessages() {
258 boolean gotException = false;
259
260 try {
261 if(DEBUG) {
262 Out.print(
263 "Opening Jape grammar... " + Gate.getUrl("tests/RhsError.jape")
264 );
265 }
266 Batch batch = new Batch(Gate.getUrl("tests/RhsError.jape"), "UTF-8");
268
269 Document doc = Factory.newDocument("This is a Small Document.");
271 FeatureMap features = Factory.newFeatureMap();
272 features.put("orth", "upperInitial");
273 doc.getAnnotations().add(new Long(0), new Long(8), "Token", features);
274
275 batch.transduce(doc);
277 } catch(Exception e) {
278 if(DEBUG) Out.prln(e);
279 gotException = true;
280 }
281
282 assertTrue("Bad JAPE grammar didn't throw an exception", gotException);
283
284 }
286
324
325 public static Test suite() {
326 return new TestSuite(TestJape.class);
327 }
329 public static void main(String[] args) {
331 for(int i = 0; i < 6; i++){
332 System.gc();
333 Out.println("Run " + i + " ==============");
334 try{
335 TestJape testJape = new TestJape("Test Jape");
336 testJape.setUp();
337 if(args.length < 1) testJape.DoTestBigGrammar("AveShort");
338 else testJape.DoTestBigGrammar(args[0]);
339 } catch(Exception e) {
340 e.printStackTrace(Err.getPrintWriter());
341 }
342 }
343 }
344 }