1 package gate.creole.morph;
2
3
4
20
21
22 import java.net.URL;
23 import java.util.Iterator;
24
25 import gate.*;
26 import gate.creole.*;
27 import gate.util.GateRuntimeException;
28
29
33 public class Morph
34 extends AbstractLanguageAnalyser
35 implements ProcessingResource {
36
37
38
39 private gate.Document document;
40
41
42 private URL rulesFile;
43
44
45 private Interpret interpret;
46
47
48 private String rootFeatureName;
49
50
51 private String affixFeatureName;
52
53
54 private String annotationSetName;
55
56
57 private Boolean caseSensitive;
58
59 private Boolean considerPOSTag;
60
61
62 public Morph() {
63 }
64
65
72 public Resource init() throws ResourceInstantiationException {
73 interpret = new Interpret();
74 if (rulesFile == null) {
75 throw new ResourceInstantiationException("\n\n No Rule File Provided");
77 }
78
79 interpret.init(rulesFile);
81
82 return this;
83 }
84
85
96 public void execute() throws ExecutionException {
97 fireProgressChanged(0);
99
100 if (document == null) {
102 fireProcessFinished();
103 throw new GateRuntimeException("No document to process!");
104 }
105
106 AnnotationSet inputAs = (annotationSetName == null ||
109 annotationSetName.length() == 0) ?
110 document.getAnnotations() :
111 document.getAnnotations(annotationSetName);
112
113 AnnotationSet tokens = inputAs.get(TOKEN_ANNOTATION_TYPE);
116 if (tokens == null || tokens.isEmpty()) {
117 fireProcessFinished();
118 throw new ExecutionException("Either "+document.getName()+" does not have any contents or \n run the POS Tagger first and then Morpher");
119 }
122
123 Iterator tokensIter = tokens.iterator();
125
126 int tokenSize = tokens.size();
128 int tokensProcessed = 0;
129 int lastReport = 0;
130
131 while (tokensIter != null && tokensIter.hasNext()) {
133 Annotation currentToken = (Annotation) tokensIter.next();
134 String tokenValue = (String) (currentToken.getFeatures().
135 get(TOKEN_STRING_FEATURE_NAME));
136 if(considerPOSTag != null && considerPOSTag.booleanValue() && !currentToken.getFeatures().containsKey(TOKEN_CATEGORY_FEATURE_NAME)) {
137 fireProcessFinished();
138 throw new ExecutionException("please run the POS Tagger first and then Morpher");
139 }
142
143 String posCategory = (String) (currentToken.getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME));
144 if(posCategory == null) {
145 posCategory = "*";
146 }
147
148 if(considerPOSTag == null || !considerPOSTag.booleanValue()) {
149 posCategory = "*";
150 }
151
152 if(!caseSensitive.booleanValue()) {
154 tokenValue = tokenValue.toLowerCase();
155 }
156
157 String baseWord = interpret.runMorpher(tokenValue, posCategory);
158 String affixWord = interpret.getAffix();
159
160 if (affixWord != null) {
162 currentToken.getFeatures().put(affixFeatureName, affixWord);
163 }
164 currentToken.getFeatures().put(rootFeatureName, baseWord);
166
167 tokensProcessed++;
169 if(tokensProcessed - lastReport > 100){
170 lastReport = tokensProcessed;
171 fireProgressChanged(tokensProcessed * 100 /tokenSize);
172 }
173 }
174 fireProcessFinished();
176 }
177
178
183 public void setDocument(gate.Document document) {
184 this.document = document;
185 }
186
187
188
193 public String findBaseWord(String word, String cat) {
194 return interpret.runMorpher(word, cat);
195 }
196
197
202 public String findAffix(String word, String cat) {
203 interpret.runMorpher(word, cat);
204 return interpret.getAffix();
205 }
206
207
208
211 public gate.Document getDocument() {
212 return this.document;
213 }
214
215
219 public void setRulesFile(URL rulesFile) {
220 this.rulesFile = rulesFile;
221 }
222
223
226 public URL getRulesFile() {
227 return this.rulesFile;
228 }
229
230
234 public String getRootFeatureName() {
235 return rootFeatureName;
236 }
237
238
242 public void setRootFeatureName(String rootFeatureName) {
243 this.rootFeatureName = rootFeatureName;
244 }
245
246
250 public String getAffixFeatureName() {
251 return affixFeatureName;
252 }
253
254
258 public void setAffixFeatureName(String affixFeatureName) {
259 this.affixFeatureName = affixFeatureName;
260 }
261
262
266 public String getAnnotationSetName() {
267 return annotationSetName;
268 }
269
270
274 public void setAnnotationSetName(String annotationSetName) {
275 this.annotationSetName = annotationSetName;
276 }
277
278
282 public Boolean getCaseSensitive() {
283 return this.caseSensitive;
284 }
285
286
290 public void setCaseSensitive(java.lang.Boolean value) {
291 this.caseSensitive = value;
292 }
293
294 public Boolean getConsiderPOSTag() {
295 return this.considerPOSTag;
296 }
297
298 public void setConsiderPOSTag(Boolean value) {
299 this.considerPOSTag = value;
300 }
301 }
302