1
15
16
17 package gate.util;
18
19 import java.awt.Color;
20 import java.io.*;
21 import java.net.*;
22 import java.net.URI;
23 import java.net.URL;
24 import java.util.*;
25 import java.util.prefs.Preferences;
26 import java.util.zip.GZIPInputStream;
27 import java.util.zip.GZIPOutputStream;
28
29 import javax.swing.UIManager;
30 import org.pdfbox.pdmodel.PDDocument;
31 import org.pdfbox.util.PDFTextStripper;
32
33 import gate.*;
34 import gate.creole.*;
35 import gate.creole.ANNIEConstants;
36 import gate.creole.Transducer;
37 import gate.creole.gazetteer.DefaultGazetteer;
38 import gate.creole.ir.*;
39 import gate.creole.tokeniser.DefaultTokeniser;
40 import gate.gui.MainFrame;
41 import gate.gui.docview.AnnotationSetsView;
42 import gate.persist.SerialDataStore;
43 import gate.util.persistence.PersistenceManager;
44
45
47 public class Scratch
48 {
49
50 private static final boolean DEBUG = false;
51
52
53 public static void docFromString(){
54 try{
55 Gate.init();
56 SerialAnalyserController annie = (SerialAnalyserController)
57 PersistenceManager.loadObjectFromFile(new File("d:/tmp/annie.gapp"));
58
59 Corpus corpus = Factory.newCorpus("A Corpus");
60 Document doc = Factory.newDocument("US President George W Bush has said he is seeking a $600m (£323m) boost in aid to nations hit by the Asian tsunami.");
61 corpus.add(doc);
62 annie.setCorpus(corpus);
63 annie.execute();
64
65 Iterator annIter = doc.getAnnotations().iterator();
67 while(annIter.hasNext()){
68 System.out.println(annIter.next());
69 }
70
71 }catch(Exception e){
72 e.printStackTrace();
73 }
74 }
75
76
77 public static void main(String args[]) throws Exception {
78
79 File file = new File("Z:/gate/bin");
80 System.out.println("Canonical path: " + file.getCanonicalPath());
81 System.out.println("URL: " + file.toURL());
82
83 URL url = new URL("jar:file:/Z:/gate/bin/gate.jar!/gate/Gate.class");
84 System.out.println(url);
85 System.out.println("Path: " + url.getPath());
86 System.out.println("File: " + url.getFile());
87 System.out.println("Host: " + url.getHost());
88 System.out.println("Proto: " + url.getProtocol());
89
90 url = Thread.currentThread().getContextClassLoader().
91 getResource("gate/Gate.class");
92 System.out.println(url);
93 System.out.println("Path: " + url.getPath());
94 System.out.println("File: " + url.getFile());
95 System.out.println("Host: " + url.getHost());
96 System.out.println("Proto: " + url.getProtocol());
97
98 Map defaultsMap = UIManager.getLookAndFeelDefaults();
99 System.out.println(defaultsMap.keySet());
100
101
102
104 Gate.init();
105 Document doc = Factory.newDocument("ala bala portocala");
106 AnnotationSet set = doc.getAnnotations();
107 Integer annId =
108 set.add(new Long(3), new Long(5), "FooBar", Factory.newFeatureMap());
109 Annotation ann = set.get(annId);
110 set.remove(ann);
112
113 AnnotationSet resSet = set.get(new Long(0), new Long(10));
114
115 System.out.println(resSet);
117
118 System.out.println("==============================================");
119
120
121 Map listsMap = new HashMap();
122 listsMap.put("blah", new ArrayList());
123 List theList = (List)listsMap.get("blah");
124 System.out.println(theList);
125 theList.add("object");
126 theList = (List)listsMap.get("blah");
127 System.out.println(theList);
128
129
130
131 File home = new File("z:/gate/plugins");
132 File tok = new File(home, "ANNIE/resources/tokeniser/Default.rul");
133 System.out.println(tok);
134
135 Preferences prefRoot = Preferences.userNodeForPackage(AnnotationSetsView.class);
136 System.out.println(prefRoot.keys().length);
137 prefRoot.removeNode();
138 prefRoot = Preferences.userNodeForPackage(AnnotationSetsView.class);
139 System.out.println(prefRoot.keys().length);
140 Color col = new Color(100, 101, 102, 103);
141 int rgb = col.getRGB();
142 int alpha = col.getAlpha();
143 int rgba = rgb | (alpha << 24);
144 Color col1 = new Color(rgba, true);
145 System.out.println(col + " a: " + col.getAlpha());
146 System.out.println(col1+ " a: " + col1.getAlpha());
147 System.out.println(col.equals(col1));
148
151
152
157
162
226
231
238
239
243
246 }
249
250 public static void exitTimeHook() {
251 Runtime.getRuntime().addShutdownHook(new Thread() {
252 public void run() {
253 System.out.println("shutting down");
254 System.out.flush();
255
256 File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
258
259 try {
261 ObjectOutputStream oos = new ObjectOutputStream(
262 new GZIPOutputStream(new FileOutputStream(stateFile))
263 );
264 System.out.println("writing main frame");
265 System.out.flush();
266 oos.writeObject(Main.getMainFrame());
267 oos.close();
268 } catch(Exception e) {
269 System.out.println("Couldn't write to state file: " + e);
270 }
271
272 System.out.println("done");
273 System.out.flush();
274 }
275 });
276 }
278
284 public static void dumpGuiState() {
285 System.out.println("dumping gui state...");
286 System.out.flush();
287
288 File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
290
291 try {
293 ObjectOutputStream oos = new ObjectOutputStream(
294 new GZIPOutputStream(new FileOutputStream(stateFile))
295 );
296 MainFrame mf = Main.getMainFrame();
297
298 long startTime = System.currentTimeMillis();
300 long timeNow = System.currentTimeMillis();
301 while(timeNow - startTime < 3000){
302 try {
303 Thread.sleep(150);
304 timeNow = System.currentTimeMillis();
305 } catch(InterruptedException ie) {}
306 }
307
308 System.out.println("writing main frame");
309 System.out.flush();
310 oos.writeObject(mf);
311 oos.close();
312 } catch(Exception e) {
313 System.out.println("Couldn't write to state file: " + e);
314 }
315
316 System.out.println("...done gui dump");
317 System.out.flush();
318 }
320
325 public void runNerc() throws Exception {
326 long startTime = System.currentTimeMillis();
327
328 Out.prln("gate init");
329 Gate.setLocalWebServer(false);
330 Gate.setNetConnected(false);
331 Gate.init();
332
333 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
334 Out.prln("creating resources");
335
336 Controller c1 = (Controller) Factory.createResource(
338 "gate.creole.SerialController",
339 Factory.newFeatureMap()
340 );
341 c1.setName("Scratch controller");
342
343 FeatureMap params = Factory.newFeatureMap();
345 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html"));
346 params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
347 Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl",
348 params);
349
350 params = Factory.newFeatureMap();
352 params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME,
353 "gate:/creole/tokeniser/DefaultTokeniser.rules");
354 params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8");
355 params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
356 ProcessingResource tokeniser = (ProcessingResource) Factory.createResource(
357 "gate.creole.tokeniser.DefaultTokeniser", params
358 );
359
360 params = Factory.newFeatureMap();
362 params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc);
363 params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME,
364 "gate:/creole/gazeteer/default/lists.def");
365 ProcessingResource gaz = (ProcessingResource) Factory.createResource(
366 "gate.creole.gazetteer.DefaultGazetteer", params
367 );
368
369 params = Factory.newFeatureMap();
371 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc);
372 ProcessingResource trans = (ProcessingResource) Factory.createResource(
374 "gate.creole.Transducer", params
375 );
376
377 c1.getPRs().add(tokeniser);
379 c1.getPRs().add(gaz);
380 c1.getPRs().add(trans);
381
382 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
383 Out.prln("dumping state");
384
385 File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr");
387
388 try {
390 ObjectOutputStream oos = new ObjectOutputStream(
391 new GZIPOutputStream(new FileOutputStream(stateFile))
392 );
393 oos.writeObject(new SessionState());
394 oos.close();
395 } catch(IOException e) {
396 throw new GateException("Couldn't write to state file: " + e);
397 }
398
399 Out.prln(System.getProperty("user.home"));
400
401 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
402 Out.prln("reinstating");
403
404 try {
405 FileInputStream fis = new FileInputStream(stateFile);
406 GZIPInputStream zis = new GZIPInputStream(fis);
407 ObjectInputStream ois = new ObjectInputStream(zis);
408 SessionState state = (SessionState) ois.readObject();
409 ois.close();
410 } catch(IOException e) {
411 throw
412 new GateException("Couldn't read file "+stateFile+": "+e);
413 } catch(ClassNotFoundException ee) {
414 throw
415 new GateException("Couldn't find class: "+ee);
416 }
417
418 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
419 Out.prln("done");
420 }
422
423
424
425 class SessionState implements Serializable {
426 SessionState() {
427 cr = Gate.getCreoleRegister();
428 dsr = Gate.getDataStoreRegister();
429 }
430
431 CreoleRegister cr;
432
433 DataStoreRegister dsr;
434
435 }
438
439 protected static int random() {
440 return randomiser.nextInt(9999);
441 }
443
447 public static void createIndex() throws Exception{
448 String dsURLString = "file:///d:/temp/ds";
449 String indexLocation = "d:/temp/ds.idx";
450
451 Gate.init();
452
453 SerialDataStore sds = (SerialDataStore)Factory.openDataStore(
455 "gate.persist.SerialDataStore", dsURLString);
456 sds.open();
457 List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
458 IndexedCorpus corpus = (IndexedCorpus)
459 sds.getLr("gate.corpora.SerialCorpusImpl",
460
461 corporaIds.get(0));
462 DefaultIndexDefinition did = new DefaultIndexDefinition();
463 did.setIrEngineClassName(gate.creole.ir.lucene.
464 LuceneIREngine.class.getName());
465
466 did.setIndexLocation(indexLocation);
467 did.addIndexField(new IndexField("body", new ContentPropertyReader(), false));
468
469 corpus.setIndexDefinition(did);
470
471 Out.prln("removing old index");
472 corpus.getIndexManager().deleteIndex();
473 Out.prln("building new index");
474 corpus.getIndexManager().createIndex();
475 Out.prln("optimising new index");
476 corpus.getIndexManager().optimizeIndex();
477 Out.prln("saving corpus");
478 sds.sync(corpus);
479 Out.prln("done!");
480 }
481
482
486 public static void tokeniseFile(File file) throws Exception{
487 Gate.init();
489 Document doc = Factory.newDocument(file.toURL());
491 DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource(
493 "gate.creole.tokeniser.DefaultTokeniser");
494
495 tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
497 tokeniser.execute();
498
499 Set annotationTypes = new HashSet();
502 annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
503 annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE);
504
505 List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes));
506 Collections.sort(tokenList, new OffsetComparator());
507
508 Iterator tokIter = tokenList.iterator();
510 while(tokIter.hasNext()){
511 Annotation anAnnotation = (Annotation)tokIter.next();
512 System.out.println("Annotation: (" +
513 anAnnotation.getStartNode().getOffset().toString() +
514 ", " + anAnnotation.getEndNode().getOffset().toString() +
515 "[type: " + anAnnotation.getType() +
516 ", features: " + anAnnotation.getFeatures().toString()+
517 "]" );
518 }
519 }
520
521
522 public static class ContentPropertyReader implements PropertyReader{
523 public String getPropertyValue(gate.Document doc){
524 return doc.getContent().toString();
525 }
526 }
527
528
529 protected static Random randomiser = new Random();
530
531 }
533
534