1   package gate.creole.tokeniser;
2   
3   import gate.*;
4   import gate.creole.*;
5   import gate.event.ProgressListener;
6   import gate.event.StatusListener;
7   import gate.util.Out;
8   
9   /**
10   * A composed tokeniser containing a {@link SimpleTokeniser} and a
11   * {@link gate.creole.Transducer}.
12   * The simple tokeniser tokenises the document and the transducer processes its
13   * output.
14   */
15  public class DefaultTokeniser extends AbstractLanguageAnalyser {
16  
17    public static final String
18      DEF_TOK_DOCUMENT_PARAMETER_NAME = "document";
19  
20    public static final String
21      DEF_TOK_ANNOT_SET_PARAMETER_NAME = "annotationSetName";
22  
23    public static final String
24      DEF_TOK_TOKRULES_URL_PARAMETER_NAME = "tokeniserRulesURL";
25  
26    public static final String
27      DEF_TOK_GRAMRULES_URL_PARAMETER_NAME = "transducerGrammarURL";
28  
29    public static final String
30      DEF_TOK_ENCODING_PARAMETER_NAME = "encoding";
31  
32    public DefaultTokeniser() {
33    }
34  
35  
36    /** Initialise this resource, and return it. */
37    public Resource init() throws ResourceInstantiationException{
38      try{
39        //init super object
40        super.init();
41        //create all the componets
42        FeatureMap params;
43        FeatureMap features;
44  
45        //tokeniser
46        fireStatusChanged("Creating a tokeniser");
47        params = Factory.newFeatureMap();
48        if(tokeniserRulesURL != null)
49          params.put(SimpleTokeniser.SIMP_TOK_RULES_URL_PARAMETER_NAME,
50                     tokeniserRulesURL);
51        params.put(SimpleTokeniser.SIMP_TOK_ENCODING_PARAMETER_NAME, encoding);
52        if(DEBUG) Out.prln("Parameters for the tokeniser: \n" + params);
53        features = Factory.newFeatureMap();
54        Gate.setHiddenAttribute(features, true);
55        tokeniser = (SimpleTokeniser)Factory.createResource(
56                      "gate.creole.tokeniser.SimpleTokeniser",
57                      params, features);
58        tokeniser.setName("Tokeniser " + System.currentTimeMillis());
59  
60        fireProgressChanged(50);
61  
62        //transducer
63        fireStatusChanged("Creating a Jape transducer");
64        params.clear();
65        if(transducerGrammarURL != null)
66         params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME,
67                                                    transducerGrammarURL);
68        params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, encoding);
69        if(DEBUG) Out.prln("Parameters for the transducer: \n" + params);
70        features.clear();
71        Gate.setHiddenAttribute(features, true);
72        transducer = (Transducer)Factory.createResource("gate.creole.Transducer",
73                                                        params, features);
74        fireProgressChanged(100);
75        fireProcessFinished();
76        transducer.setName("Transducer " + System.currentTimeMillis());
77      }catch(ResourceInstantiationException rie){
78        throw rie;
79      }catch(Exception e){
80        throw new ResourceInstantiationException(e);
81      }
82      return this;
83    }
84  
85    public void execute() throws ExecutionException{
86      interrupted = false;
87      //set the parameters
88      try{
89        FeatureMap params = Factory.newFeatureMap();
90        fireProgressChanged(0);
91        //tokeniser
92        params.put(SimpleTokeniser.SIMP_TOK_DOCUMENT_PARAMETER_NAME, document);
93        params.put(
94          SimpleTokeniser.SIMP_TOK_ANNOT_SET_PARAMETER_NAME, annotationSetName);
95        tokeniser.setParameterValues(params);
96  
97        //transducer
98        params.clear();
99        params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, document);
100       params.put(Transducer.TRANSD_INPUT_AS_PARAMETER_NAME, annotationSetName);
101       params.put(Transducer.TRANSD_OUTPUT_AS_PARAMETER_NAME, annotationSetName);
102       transducer.setParameterValues(params);
103     }catch(ResourceInstantiationException rie){
104       throw new ExecutionException(rie);
105     }
106 
107     ProgressListener pListener = null;
108     StatusListener sListener = null;
109     fireProgressChanged(5);
110     pListener = new IntervalProgressListener(5, 50);
111     sListener = new StatusListener(){
112       public void statusChanged(String text){
113         fireStatusChanged(text);
114       }
115     };
116 
117     //tokeniser
118     if(isInterrupted()) throw new ExecutionInterruptedException(
119         "The execution of the \"" + getName() +
120         "\" tokeniser has been abruptly interrupted!");
121     tokeniser.addProgressListener(pListener);
122     tokeniser.addStatusListener(sListener);
123     try{
124       tokeniser.execute();
125     }catch(ExecutionInterruptedException eie){
126       throw new ExecutionInterruptedException(
127         "The execution of the \"" + getName() +
128         "\" tokeniser has been abruptly interrupted!");
129     }
130     tokeniser.removeProgressListener(pListener);
131     tokeniser.removeStatusListener(sListener);
132 
133   //transducer
134     if(isInterrupted()) throw new ExecutionInterruptedException(
135         "The execution of the \"" + getName() +
136         "\" tokeniser has been abruptly interrupted!");
137     pListener = new IntervalProgressListener(50, 100);
138     transducer.addProgressListener(pListener);
139     transducer.addStatusListener(sListener);
140 
141     transducer.execute();
142     transducer.removeProgressListener(pListener);
143     transducer.removeStatusListener(sListener);
144   }//execute
145 
146 
147   /**
148    * Notifies all the PRs in this controller that they should stop their
149    * execution as soon as possible.
150    */
151   public synchronized void interrupt(){
152     interrupted = true;
153     tokeniser.interrupt();
154     transducer.interrupt();
155   }
156 
157   public void setTokeniserRulesURL(java.net.URL tokeniserRulesURL) {
158     this.tokeniserRulesURL = tokeniserRulesURL;
159   }
160   public java.net.URL getTokeniserRulesURL() {
161     return tokeniserRulesURL;
162   }
163   public void setEncoding(String encoding) {
164     this.encoding = encoding;
165   }
166   public String getEncoding() {
167     return encoding;
168   }
169   public void setTransducerGrammarURL(java.net.URL transducerGrammarURL) {
170     this.transducerGrammarURL = transducerGrammarURL;
171   }
172   public java.net.URL getTransducerGrammarURL() {
173     return transducerGrammarURL;
174   }
175  // init()
176 
177   private static final boolean DEBUG = false;
178 
179   /** the simple tokeniser used for tokenisation*/
180   protected SimpleTokeniser tokeniser;
181 
182   /** the transducer used for post-processing*/
183   protected Transducer transducer;
184   private java.net.URL tokeniserRulesURL;
185   private String encoding;
186   private java.net.URL transducerGrammarURL;
187   private String annotationSetName;
188 
189 
190   public void setAnnotationSetName(String annotationSetName) {
191     this.annotationSetName = annotationSetName;
192   }
193   public String getAnnotationSetName() {
194     return annotationSetName;
195   }/////////class CustomProgressListener implements ProgressListener
196 }