1 package gate.creole.tokeniser;
2
3 import gate.*;
4 import gate.creole.*;
5 import gate.event.ProgressListener;
6 import gate.event.StatusListener;
7 import gate.util.Out;
8
9
15 public class DefaultTokeniser extends AbstractLanguageAnalyser {
16
17 public static final String
18 DEF_TOK_DOCUMENT_PARAMETER_NAME = "document";
19
20 public static final String
21 DEF_TOK_ANNOT_SET_PARAMETER_NAME = "annotationSetName";
22
23 public static final String
24 DEF_TOK_TOKRULES_URL_PARAMETER_NAME = "tokeniserRulesURL";
25
26 public static final String
27 DEF_TOK_GRAMRULES_URL_PARAMETER_NAME = "transducerGrammarURL";
28
29 public static final String
30 DEF_TOK_ENCODING_PARAMETER_NAME = "encoding";
31
32 public DefaultTokeniser() {
33 }
34
35
36
37 public Resource init() throws ResourceInstantiationException{
38 try{
39 super.init();
41 FeatureMap params;
43 FeatureMap features;
44
45 fireStatusChanged("Creating a tokeniser");
47 params = Factory.newFeatureMap();
48 if(tokeniserRulesURL != null)
49 params.put(SimpleTokeniser.SIMP_TOK_RULES_URL_PARAMETER_NAME,
50 tokeniserRulesURL);
51 params.put(SimpleTokeniser.SIMP_TOK_ENCODING_PARAMETER_NAME, encoding);
52 if(DEBUG) Out.prln("Parameters for the tokeniser: \n" + params);
53 features = Factory.newFeatureMap();
54 Gate.setHiddenAttribute(features, true);
55 tokeniser = (SimpleTokeniser)Factory.createResource(
56 "gate.creole.tokeniser.SimpleTokeniser",
57 params, features);
58 tokeniser.setName("Tokeniser " + System.currentTimeMillis());
59
60 fireProgressChanged(50);
61
62 fireStatusChanged("Creating a Jape transducer");
64 params.clear();
65 if(transducerGrammarURL != null)
66 params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME,
67 transducerGrammarURL);
68 params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, encoding);
69 if(DEBUG) Out.prln("Parameters for the transducer: \n" + params);
70 features.clear();
71 Gate.setHiddenAttribute(features, true);
72 transducer = (Transducer)Factory.createResource("gate.creole.Transducer",
73 params, features);
74 fireProgressChanged(100);
75 fireProcessFinished();
76 transducer.setName("Transducer " + System.currentTimeMillis());
77 }catch(ResourceInstantiationException rie){
78 throw rie;
79 }catch(Exception e){
80 throw new ResourceInstantiationException(e);
81 }
82 return this;
83 }
84
85 public void execute() throws ExecutionException{
86 interrupted = false;
87 try{
89 FeatureMap params = Factory.newFeatureMap();
90 fireProgressChanged(0);
91 params.put(SimpleTokeniser.SIMP_TOK_DOCUMENT_PARAMETER_NAME, document);
93 params.put(
94 SimpleTokeniser.SIMP_TOK_ANNOT_SET_PARAMETER_NAME, annotationSetName);
95 tokeniser.setParameterValues(params);
96
97 params.clear();
99 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, document);
100 params.put(Transducer.TRANSD_INPUT_AS_PARAMETER_NAME, annotationSetName);
101 params.put(Transducer.TRANSD_OUTPUT_AS_PARAMETER_NAME, annotationSetName);
102 transducer.setParameterValues(params);
103 }catch(ResourceInstantiationException rie){
104 throw new ExecutionException(rie);
105 }
106
107 ProgressListener pListener = null;
108 StatusListener sListener = null;
109 fireProgressChanged(5);
110 pListener = new IntervalProgressListener(5, 50);
111 sListener = new StatusListener(){
112 public void statusChanged(String text){
113 fireStatusChanged(text);
114 }
115 };
116
117 if(isInterrupted()) throw new ExecutionInterruptedException(
119 "The execution of the \"" + getName() +
120 "\" tokeniser has been abruptly interrupted!");
121 tokeniser.addProgressListener(pListener);
122 tokeniser.addStatusListener(sListener);
123 try{
124 tokeniser.execute();
125 }catch(ExecutionInterruptedException eie){
126 throw new ExecutionInterruptedException(
127 "The execution of the \"" + getName() +
128 "\" tokeniser has been abruptly interrupted!");
129 }
130 tokeniser.removeProgressListener(pListener);
131 tokeniser.removeStatusListener(sListener);
132
133 if(isInterrupted()) throw new ExecutionInterruptedException(
135 "The execution of the \"" + getName() +
136 "\" tokeniser has been abruptly interrupted!");
137 pListener = new IntervalProgressListener(50, 100);
138 transducer.addProgressListener(pListener);
139 transducer.addStatusListener(sListener);
140
141 transducer.execute();
142 transducer.removeProgressListener(pListener);
143 transducer.removeStatusListener(sListener);
144 }
146
147
151 public synchronized void interrupt(){
152 interrupted = true;
153 tokeniser.interrupt();
154 transducer.interrupt();
155 }
156
157 public void setTokeniserRulesURL(java.net.URL tokeniserRulesURL) {
158 this.tokeniserRulesURL = tokeniserRulesURL;
159 }
160 public java.net.URL getTokeniserRulesURL() {
161 return tokeniserRulesURL;
162 }
163 public void setEncoding(String encoding) {
164 this.encoding = encoding;
165 }
166 public String getEncoding() {
167 return encoding;
168 }
169 public void setTransducerGrammarURL(java.net.URL transducerGrammarURL) {
170 this.transducerGrammarURL = transducerGrammarURL;
171 }
172 public java.net.URL getTransducerGrammarURL() {
173 return transducerGrammarURL;
174 }
175
177 private static final boolean DEBUG = false;
178
179
180 protected SimpleTokeniser tokeniser;
181
182
183 protected Transducer transducer;
184 private java.net.URL tokeniserRulesURL;
185 private String encoding;
186 private java.net.URL transducerGrammarURL;
187 private String annotationSetName;
188
189
190 public void setAnnotationSetName(String annotationSetName) {
191 this.annotationSetName = annotationSetName;
192 }
193 public String getAnnotationSetName() {
194 return annotationSetName;
195 }}