1   /*
2    *  Scratch.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 22/03/00
12   *
13   *  $Id: Scratch.java,v 1.85 2005/02/14 16:36:30 valyt Exp $
14   */
15  
16  
17  package gate.util;
18  
19  import java.awt.Color;
20  import java.io.*;
21  import java.net.*;
22  import java.net.URI;
23  import java.net.URL;
24  import java.util.*;
25  import java.util.prefs.Preferences;
26  import java.util.zip.GZIPInputStream;
27  import java.util.zip.GZIPOutputStream;
28  
29  import javax.swing.UIManager;
30  import org.pdfbox.pdmodel.PDDocument;
31  import org.pdfbox.util.PDFTextStripper;
32  
33  import gate.*;
34  import gate.creole.*;
35  import gate.creole.ANNIEConstants;
36  import gate.creole.Transducer;
37  import gate.creole.gazetteer.DefaultGazetteer;
38  import gate.creole.ir.*;
39  import gate.creole.tokeniser.DefaultTokeniser;
40  import gate.gui.MainFrame;
41  import gate.gui.docview.AnnotationSetsView;
42  import gate.persist.SerialDataStore;
43  import gate.util.persistence.PersistenceManager;
44  
45  /** A scratch pad for experimenting.
46    */
47  public class Scratch
48  {
49    /** Debug flag */
50    private static final boolean DEBUG = false;
51  
52    
53    public static void docFromString(){
54      try{
55        Gate.init();
56        SerialAnalyserController annie = (SerialAnalyserController)
57          PersistenceManager.loadObjectFromFile(new File("d:/tmp/annie.gapp"));
58        
59        Corpus corpus = Factory.newCorpus("A Corpus");
60        Document doc = Factory.newDocument("US President George W Bush has said he is seeking a $600m (£323m) boost in aid to nations hit by the Asian tsunami.");
61        corpus.add(doc);
62        annie.setCorpus(corpus);
63        annie.execute();
64        
65        //get the annotations
66        Iterator annIter = doc.getAnnotations().iterator();
67        while(annIter.hasNext()){
68          System.out.println(annIter.next());
69        }
70        
71      }catch(Exception e){
72        e.printStackTrace();
73      }
74    }
75    
76      
77    public static void main(String args[]) throws Exception {   
78      
79      File file = new File("Z:/gate/bin");
80      System.out.println("Canonical path: " + file.getCanonicalPath());
81      System.out.println("URL: " + file.toURL());
82      
83      URL url = new URL("jar:file:/Z:/gate/bin/gate.jar!/gate/Gate.class");
84      System.out.println(url);
85      System.out.println("Path: " + url.getPath());
86      System.out.println("File: " + url.getFile());
87      System.out.println("Host: " + url.getHost());
88      System.out.println("Proto: " + url.getProtocol());
89      
90      url = Thread.currentThread().getContextClassLoader().
91        getResource("gate/Gate.class");
92      System.out.println(url);
93      System.out.println("Path: " + url.getPath());
94      System.out.println("File: " + url.getFile());
95      System.out.println("Host: " + url.getHost());
96      System.out.println("Proto: " + url.getProtocol());
97      
98      Map defaultsMap = UIManager.getLookAndFeelDefaults();
99      System.out.println(defaultsMap.keySet());
100 
101     
102     //test for a bug reported by Luc Plamondon
103     
104     Gate.init();
105     Document doc = Factory.newDocument("ala bala portocala");
106     AnnotationSet set = doc.getAnnotations();
107     Integer annId = 
108       set.add(new Long(3), new Long(5), "FooBar", Factory.newFeatureMap());
109     Annotation ann = set.get(annId);
110     //remove the annotation 
111     set.remove(ann);
112     
113     AnnotationSet resSet = set.get(new Long(0), new Long(10));
114     
115     //this set is empty so the bug was fixed.
116     System.out.println(resSet);
117     
118     System.out.println("==============================================");
119     
120     
121     Map listsMap = new HashMap();
122     listsMap.put("blah", new ArrayList());
123     List theList = (List)listsMap.get("blah");
124     System.out.println(theList);
125     theList.add("object");
126     theList = (List)listsMap.get("blah");
127     System.out.println(theList);
128     
129     
130     
131     File home = new File("z:/gate/plugins");
132     File tok = new File(home, "ANNIE/resources/tokeniser/Default.rul");
133     System.out.println(tok);
134     
135     Preferences prefRoot = Preferences.userNodeForPackage(AnnotationSetsView.class);
136     System.out.println(prefRoot.keys().length);
137     prefRoot.removeNode();
138     prefRoot = Preferences.userNodeForPackage(AnnotationSetsView.class);
139     System.out.println(prefRoot.keys().length);
140     Color col = new Color(100, 101, 102, 103);
141     int rgb = col.getRGB();
142     int alpha = col.getAlpha();
143     int rgba = rgb | (alpha << 24);
144     Color col1 = new Color(rgba, true);
145     System.out.println(col + " a: " + col.getAlpha());
146     System.out.println(col1+ " a: " + col1.getAlpha());
147     System.out.println(col.equals(col1));
148 //    Map defaultsMap = UIManager.getLookAndFeelDefaults();
149 //    System.out.println(defaultsMap.keySet());
150     
151     
152 //    double a = 16.99;
153 //    double b = 9.99;
154 //    double c = a - b;
155 //    System.out.println(c);
156 
157 //    Runtime.getRuntime().exec(new String[]{"cmd",
158 //                                           "C:\\Program Files\\GATE 2.2\\bin\\gate.bat"},
159 //                              null,
160 //                              new File("C:\\Program Files\\GATE 2.2\\bin"));
161 
162 //    Gate.init();
163 //    Document doc = Factory.newDocument("The quick brown fox jumped over the lazy dog");
164 //    AnnotationSet annSet1 = doc.getAnnotations("Set1");
165 //    annSet1.add(new Long(1), new Long(5), "Foo", Factory.newFeatureMap());
166 //
167 //    AnnotationSet annSet2 = doc.getAnnotations("Set2");
168 //    annSet2.add(new Long(1), new Long(5), "Bar", Factory.newFeatureMap());
169 //    annSet2.addAll(annSet1);
170 //
171 //    List annotations = new ArrayList(annSet2);
172 //    Collections.sort(annotations, new OffsetComparator());
173 //    Iterator annIter = annotations.iterator();
174 //    while(annIter.hasNext()){
175 //      Annotation ann =(Annotation)annIter.next();
176 //      System.out.print("Start node: ID = " + ann.getStartNode().getId());
177 //      System.out.println(" Offset = " + ann.getStartNode().getOffset());
178 //      System.out.print("End node: ID = " + ann.getEndNode().getId());
179 //      System.out.println(" Offset = " + ann.getEndNode().getOffset());
180 //
181 //    }
182 //    File tempFile = File.createTempFile("gaga", "");
183 //    tempFile.delete();
184 //    tempFile.mkdir();
185 //    tempFile.deleteOnExit();
186 //    File tempFile2 = File.createTempFile("fil", ".tmp", tempFile);
187 //    tempFile2.deleteOnExit();
188 //System.out.println(tempFile.getCanonicalPath());
189 //    Thread.sleep(100000);
190 //
191 //    Map charsets = java.nio.charset.Charset.availableCharsets();
192 //    Iterator namesIter = charsets.keySet().iterator();
193 //    while(namesIter.hasNext()){
194 //      String name = (String)namesIter.next();
195 //      System.out.println(name + " : " + charsets.get(name));
196 //    }
197 //    System.out.println(System.getProperty("file.encoding"));
198 //    System.out.println(java.nio.charset.Charset.forName(System.getProperty("file.encoding")).name());
199 //    System.out.println(new Character((char)0xa3));
200 //    Gate.init();
201 //
202 //    List classes = Tools.findSubclasses(gate.creole.ir.Search.class);
203 //    if(classes != null) for(int i = 0; i < classes.size(); i++){
204 //      Out.prln(classes.get(i).toString());
205 //    }
206 //    createIndex();
207 //    URL anURL = new URL("file:/z:/a/b/c/d.txt");
208 //    URL anotherURL = new URL("file:/z:/a/b/c/d.txt");
209 //    String relPath = gate.util.persistence.PersistenceManager.
210 //                     getRelativePath(anURL, anotherURL);
211 //    Out.prln("Context: " + anURL);
212 //    Out.prln("Target: " + anotherURL);
213 //    Out.prln("Relative path: " + relPath);
214 //    Out.prln("Result " + new URL(anURL, relPath));
215 //    javax.swing.text.FlowView fv;
216 //    javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName());
217 //    Map uidefaults  = (Map)javax.swing.UIManager.getDefaults();
218 //    List keys = new ArrayList(uidefaults.keySet());
219 //    Collections.sort(keys);
220 //    Iterator keyIter = keys.iterator();
221 //    while(keyIter.hasNext()){
222 //      Object key = keyIter.next();
223 //      System.out.println(key + " : " + uidefaults.get(key));
224 //    }
225 
226     // initialise the thing
227 //    Gate.setNetConnected(false);
228 //    Gate.setLocalWebServer(false);
229 //    Gate.init();
230 
231 //    Scratch oneOfMe = new Scratch();
232 //    try{
233 //      oneOfMe.runNerc();
234 //    } catch (Exception e) {
235 //      e.printStackTrace(Out.getPrintWriter());
236 //    }
237 
238 
239 //    CreoleRegister reg = Gate.getCreoleRegister();
240 //System.out.println("Instances for " + reg.getLrInstances("gate.creole.AnnotationSchema"));
241 //System.out.println("Instances for " + reg.getAllInstances ("gate.creole.AnnotationSchema"));
242 
243 //System.out.println("VRs for " + reg.getAnnotationVRs("Tree"));
244 //System.out.println("VRs for " + reg.getAnnotationVRs());
245 
246 //System.out.println(reg.getLargeVRsForResource("gate.corpora.DocumentImpl"));
247   } // main
248 
249   /** Example of using an exit-time hook. */
250   public static void exitTimeHook() {
251     Runtime.getRuntime().addShutdownHook(new Thread() {
252       public void run() {
253         System.out.println("shutting down");
254         System.out.flush();
255 
256         // create a File to store the state in
257         File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
258 
259         // dump the state into the new File
260         try {
261           ObjectOutputStream oos = new ObjectOutputStream(
262             new GZIPOutputStream(new FileOutputStream(stateFile))
263           );
264           System.out.println("writing main frame");
265           System.out.flush();
266           oos.writeObject(Main.getMainFrame());
267           oos.close();
268         } catch(Exception e) {
269           System.out.println("Couldn't write to state file: " + e);
270         }
271 
272         System.out.println("done");
273         System.out.flush();
274       }
275     });
276   } // exitTimeHook()
277 
278   /**
279    * ***** <B>Failed</B> *****
280    * attempt to serialise whole gui state - various swing components
281    * don't like to be serialised :-(. might be worth trying again when
282    * jdk1.4 arrives.
283    */
284   public static void dumpGuiState() {
285     System.out.println("dumping gui state...");
286     System.out.flush();
287 
288     // create a File to store the state in
289     File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
290 
291     // dump the state into the new File
292     try {
293       ObjectOutputStream oos = new ObjectOutputStream(
294         new GZIPOutputStream(new FileOutputStream(stateFile))
295       );
296       MainFrame mf = Main.getMainFrame();
297 
298       // wait for 1 sec
299       long startTime = System.currentTimeMillis();
300       long timeNow = System.currentTimeMillis();
301       while(timeNow - startTime < 3000){
302         try {
303           Thread.sleep(150);
304           timeNow = System.currentTimeMillis();
305         } catch(InterruptedException ie) {}
306       }
307 
308       System.out.println("writing main frame");
309       System.out.flush();
310       oos.writeObject(mf);
311       oos.close();
312     } catch(Exception e) {
313       System.out.println("Couldn't write to state file: " + e);
314     }
315 
316     System.out.println("...done gui dump");
317     System.out.flush();
318   } // dumpGuiState
319 
320   /**
321    * Run NERC and print out the various stages (doesn't actually
322    * use Nerc but the individual bits), and serialise then deserialise
323    * the NERC system.
324    */
325   public void runNerc() throws Exception {
326     long startTime = System.currentTimeMillis();
327 
328     Out.prln("gate init");
329     Gate.setLocalWebServer(false);
330     Gate.setNetConnected(false);
331     Gate.init();
332 
333     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
334     Out.prln("creating resources");
335 
336     // a controller
337     Controller c1 = (Controller) Factory.createResource(
338       "gate.creole.SerialController",
339       Factory.newFeatureMap()
340     );
341     c1.setName("Scratch controller");
342 
343     //get a document
344     FeatureMap params = Factory.newFeatureMap();
345     params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html"));
346     params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
347     Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl",
348                                                     params);
349 
350     //create a default tokeniser
351     params = Factory.newFeatureMap();
352     params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME,
353       "gate:/creole/tokeniser/DefaultTokeniser.rules");
354     params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8");
355     params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
356     ProcessingResource tokeniser = (ProcessingResource) Factory.createResource(
357       "gate.creole.tokeniser.DefaultTokeniser", params
358     );
359 
360     //create a default gazetteer
361     params = Factory.newFeatureMap();
362     params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc);
363     params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME,
364       "gate:/creole/gazeteer/default/lists.def");
365     ProcessingResource gaz = (ProcessingResource) Factory.createResource(
366       "gate.creole.gazetteer.DefaultGazetteer", params
367     );
368 
369     //create a default transducer
370     params = Factory.newFeatureMap();
371     params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc);
372     //params.put("grammarURL", new File("z:\\tmp\\main.jape").toURL());
373     ProcessingResource trans = (ProcessingResource) Factory.createResource(
374       "gate.creole.Transducer", params
375     );
376 
377     // get the controller to encapsulate the tok and gaz
378     c1.getPRs().add(tokeniser);
379     c1.getPRs().add(gaz);
380     c1.getPRs().add(trans);
381 
382     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
383     Out.prln("dumping state");
384 
385     // create a File to store the state in
386     File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr");
387 
388     // dump the state into the new File
389     try {
390       ObjectOutputStream oos = new ObjectOutputStream(
391         new GZIPOutputStream(new FileOutputStream(stateFile))
392       );
393       oos.writeObject(new SessionState());
394       oos.close();
395     } catch(IOException e) {
396       throw new GateException("Couldn't write to state file: " + e);
397     }
398 
399     Out.prln(System.getProperty("user.home"));
400 
401     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
402     Out.prln("reinstating");
403 
404     try {
405       FileInputStream fis = new FileInputStream(stateFile);
406       GZIPInputStream zis = new GZIPInputStream(fis);
407       ObjectInputStream ois = new ObjectInputStream(zis);
408       SessionState state = (SessionState) ois.readObject();
409       ois.close();
410     } catch(IOException e) {
411       throw
412         new GateException("Couldn't read file "+stateFile+": "+e);
413     } catch(ClassNotFoundException ee) {
414       throw
415         new GateException("Couldn't find class: "+ee);
416     }
417 
418     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
419     Out.prln("done");
420   } // runNerc()
421 
422   
423  
424   /** Inner class for holding CR and DSR for serialisation experiments */
425   class SessionState implements Serializable {
426     SessionState() {
427       cr = Gate.getCreoleRegister();
428       dsr = Gate.getDataStoreRegister();
429     }
430 
431     CreoleRegister cr;
432 
433     DataStoreRegister dsr;
434 
435     // other state from Gate? and elsewhere?
436   } // SessionState
437 
438   /** Generate a random integer for file naming. */
439   protected static int random() {
440     return randomiser.nextInt(9999);
441   } // random
442 
443   /**
444    * Generates an index for a corpus in a datastore on Valy's computer in order
445    * to have some test data.
446    */
447   public static void createIndex() throws Exception{
448     String dsURLString = "file:///d:/temp/ds";
449     String indexLocation = "d:/temp/ds.idx";
450 
451     Gate.init();
452 
453     //open the datastore
454     SerialDataStore sds = (SerialDataStore)Factory.openDataStore(
455                             "gate.persist.SerialDataStore", dsURLString);
456     sds.open();
457     List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
458     IndexedCorpus corpus = (IndexedCorpus)
459                            sds.getLr("gate.corpora.SerialCorpusImpl",
460 
461                                      corporaIds.get(0));
462     DefaultIndexDefinition did = new DefaultIndexDefinition();
463     did.setIrEngineClassName(gate.creole.ir.lucene.
464                              LuceneIREngine.class.getName());
465 
466     did.setIndexLocation(indexLocation);
467     did.addIndexField(new IndexField("body", new ContentPropertyReader(), false));
468 
469     corpus.setIndexDefinition(did);
470 
471     Out.prln("removing old index");
472     corpus.getIndexManager().deleteIndex();
473     Out.prln("building new index");
474     corpus.getIndexManager().createIndex();
475     Out.prln("optimising new index");
476     corpus.getIndexManager().optimizeIndex();
477     Out.prln("saving corpus");
478     sds.sync(corpus);
479     Out.prln("done!");
480   }
481 
482   /**
483    *
484    * @param file a TXT file containing the text
485    */
486   public static void tokeniseFile(File file) throws Exception{
487     //initialise GATE (only call it once!!)
488     Gate.init();
489     //create the document
490     Document doc = Factory.newDocument(file.toURL());
491     //create the tokeniser
492     DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource(
493       "gate.creole.tokeniser.DefaultTokeniser");
494 
495     //tokenise the document
496     tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
497     tokeniser.execute();
498 
499     //extract data from document
500     //we need tokens and spaces
501     Set annotationTypes = new HashSet();
502     annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
503     annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE);
504 
505     List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes));
506     Collections.sort(tokenList, new OffsetComparator());
507 
508     //iterate through the tokens
509     Iterator tokIter = tokenList.iterator();
510     while(tokIter.hasNext()){
511       Annotation anAnnotation = (Annotation)tokIter.next();
512       System.out.println("Annotation: (" +
513                         anAnnotation.getStartNode().getOffset().toString() +
514                         ", " + anAnnotation.getEndNode().getOffset().toString() +
515                         "[type: " + anAnnotation.getType() +
516                          ", features: " + anAnnotation.getFeatures().toString()+
517                          "]" );
518     }
519   }
520 
521 
522   public static class ContentPropertyReader implements PropertyReader{
523     public String getPropertyValue(gate.Document doc){
524       return doc.getContent().toString();
525     }
526   }
527 
528   /** Random number generator */
529   protected static Random randomiser = new Random();
530 
531 } // class Scratch
532 
533 
534