1
13
14
15 package gate.creole.splitter;
16
17 import gate.*;
18 import gate.creole.*;
19 import gate.creole.gazetteer.DefaultGazetteer;
20 import gate.event.ProgressListener;
21 import gate.event.StatusListener;
22 import gate.util.GateRuntimeException;
23 import gate.util.InvalidOffsetException;
24 import gate.util.LuckyException;
25
32 public class SentenceSplitter extends AbstractLanguageAnalyser{
33
34 public static final String
35 SPLIT_DOCUMENT_PARAMETER_NAME = "document";
36
37 public static final String
38 SPLIT_INPUT_AS_PARAMETER_NAME = "inputASName";
39
40 public static final String
41 SPLIT_OUTPUT_AS_PARAMETER_NAME = "outputASName";
42
43 public static final String
44 SPLIT_ENCODING_PARAMETER_NAME = "encoding";
45
46 public static final String
47 SPLIT_GAZ_URL_PARAMETER_NAME = "gazetteerListsURL";
48
49 public static final String
50 SPLIT_TRANSD_URL_PARAMETER_NAME = "transducerURL";
51
52 public Resource init()throws ResourceInstantiationException{
53 FeatureMap params;
55 FeatureMap features;
56
57 fireStatusChanged("Creating the gazetteer");
59 params = Factory.newFeatureMap();
60 if(gazetteerListsURL != null)
61 params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME,
62 gazetteerListsURL);
63 params.put(DefaultGazetteer.DEF_GAZ_ENCODING_PARAMETER_NAME, encoding);
64 features = Factory.newFeatureMap();
65 Gate.setHiddenAttribute(features, true);
66
67
68 gazetteer = (DefaultGazetteer)Factory.createResource(
69 "gate.creole.gazetteer.DefaultGazetteer",
70 params, features);
71 gazetteer.setName("Gazetteer " + System.currentTimeMillis());
72 fireProgressChanged(10);
73
74 fireStatusChanged("Creating the JAPE transducer");
76
77 params = Factory.newFeatureMap();
78 if(transducerURL != null)
79 params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME, transducerURL);
80 params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, encoding);
81 features = Factory.newFeatureMap();
82 Gate.setHiddenAttribute(features, true);
83
84 transducer = (Transducer)Factory.createResource(
85 "gate.creole.Transducer",
86 params, features);
87 transducer.setName("Transducer " + System.currentTimeMillis());
88
89 fireProgressChanged(100);
90 fireProcessFinished();
91
92 return this;
93 }
94
95 public void execute() throws ExecutionException{
96 interrupted = false;
97 FeatureMap params;
99 if(inputASName != null && inputASName.equals("")) inputASName = null;
100 if(outputASName != null && outputASName.equals("")) outputASName = null;
101 try{
102 fireProgressChanged(0);
103 params = Factory.newFeatureMap();
104 params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, document);
105 params.put(DefaultGazetteer.DEF_GAZ_ANNOT_SET_PARAMETER_NAME, inputASName);
106 gazetteer.setParameterValues(params);
107
108 params = Factory.newFeatureMap();
109 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, document);
110 params.put(Transducer.TRANSD_INPUT_AS_PARAMETER_NAME, inputASName);
111 params.put(Transducer.TRANSD_OUTPUT_AS_PARAMETER_NAME, inputASName);
112 transducer.setParameterValues(params);
113 }catch(Exception e){
114 throw new ExecutionException(e);
115 }
116 ProgressListener pListener = null;
117 StatusListener sListener = null;
118 fireProgressChanged(5);
119
120 if(isInterrupted()) throw new ExecutionInterruptedException(
122 "The execution of the \"" + getName() +
123 "\" sentence splitter has been abruptly interrupted!");
124 pListener = new IntervalProgressListener(5, 10);
125 sListener = new StatusListener(){
126 public void statusChanged(String text){
127 fireStatusChanged(text);
128 }
129 };
130 gazetteer.addProgressListener(pListener);
131 gazetteer.addStatusListener(sListener);
132 gazetteer.execute();
133 gazetteer.removeProgressListener(pListener);
134 gazetteer.removeStatusListener(sListener);
135
136 if(isInterrupted()) throw new ExecutionInterruptedException(
138 "The execution of the \"" + getName() +
139 "\" sentence splitter has been abruptly interrupted!");
140 pListener = new IntervalProgressListener(11, 90);
141 transducer.addProgressListener(pListener);
142 transducer.addStatusListener(sListener);
143 transducer.execute();
144 transducer.removeProgressListener(pListener);
145 transducer.removeStatusListener(sListener);
146
147 AnnotationSet inputAS = (inputASName == null) ?
149 document.getAnnotations() :
150 document.getAnnotations(inputASName);
151
152 AnnotationSet outputAS = (outputASName == null) ?
153 document.getAnnotations() :
154 document.getAnnotations(outputASName);
155
156 if(inputAS != outputAS){
158 outputAS.addAll(inputAS.get(SENTENCE_ANNOTATION_TYPE));
159 }
160
161 AnnotationSet sentences = outputAS.get(SENTENCE_ANNOTATION_TYPE);
163 if(sentences == null || sentences.isEmpty()){
164 try{
166 outputAS.add(new Long(0), document.getContent().size(),
167 SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
168 }catch(InvalidOffsetException ioe){
169 throw new GateRuntimeException(ioe);
170 }
171 }else{
172 Long endSentences = sentences.lastNode().getOffset();
174 AnnotationSet remainingTokens = inputAS.get(TOKEN_ANNOTATION_TYPE, endSentences,
175 inputAS.lastNode().getOffset());
176 if(remainingTokens != null && !remainingTokens.isEmpty()){
177 try{
178 outputAS.add(remainingTokens.firstNode().getOffset(),
179 remainingTokens.lastNode().getOffset(),
180 SENTENCE_ANNOTATION_TYPE,
181 Factory.newFeatureMap());
182 }catch(InvalidOffsetException ioe){
183 throw new ExecutionException(ioe);
184 }
185 }
186 }
187 fireProcessFinished();
188 }
190
194 public synchronized void interrupt(){
195 interrupted = true;
196 gazetteer.interrupt();
197 transducer.interrupt();
198 }
199
200 public void setTransducerURL(java.net.URL newTransducerURL) {
201 transducerURL = newTransducerURL;
202 }
203 public java.net.URL getTransducerURL() {
204 return transducerURL;
205 }
206 DefaultGazetteer gazetteer;
207 Transducer transducer;
208 private java.net.URL transducerURL;
209 private String encoding;
210 private java.net.URL gazetteerListsURL;
211
212
213 public void setEncoding(String newEncoding) {
214 encoding = newEncoding;
215 }
216 public String getEncoding() {
217 return encoding;
218 }
219 public void setGazetteerListsURL(java.net.URL newGazetteerListsURL) {
220 gazetteerListsURL = newGazetteerListsURL;
221 }
222 public java.net.URL getGazetteerListsURL() {
223 return gazetteerListsURL;
224 }
225 public void setInputASName(String newInputASName) {
226 inputASName = newInputASName;
227 }
228
229 public String getInputASName() {
230 return inputASName;
231 }
232 public void setOutputASName(String newOutputASName) {
233 outputASName = newOutputASName;
234 }
235 public String getOutputASName() {
236 return outputASName;
237 }
238
239
240
241 private static final boolean DEBUG = false;
242 private String inputASName;
243 private String outputASName;
244 }