1   package gate.creole.morph;
2   
3   import java.io.File;
4   import java.net.MalformedURLException;
5   import java.net.URL;
6   import java.util.*;
7   
8   import junit.framework.*;
9   
10  import gate.*;
11  import gate.creole.*;
12  import gate.creole.tokeniser.DefaultTokeniser;
13  import gate.util.*;
14  import gate.util.Files;
15  import gate.util.OffsetComparator;
16  
17  /**
18   * <p>Title: TestMorph </p>
19   * <p>Description: </p>
20   * <p>Copyright: Copyright (c) 2000</p>
21   * <p>Company: University Of Sheffield</p>
22   * @author not attributable
23   * @version 1.0
24   */
25  
26  public class TestMorph
27      extends TestCase {
28  
29    private Morph morpher;
30    private Document verbDocumentToTest, verbDocumentWithAnswers,
31        nounDocumentToTest, nounDocumentWithAnswers;
32    private FeatureMap params;
33    private DefaultTokeniser tokeniser;
34    private int counter = 0;
35    private int outOf = 0;
36  
37    public TestMorph(String dummy) {
38      super(dummy);
39    }
40  
41    /**
42     * This method sets up the parameters for the files to be testes
43     * It initialises the Tokenizer and sets up the other parameters for
44     * the morph program
45     */
46    protected void setUp() {
47      try{
48        //make sure the right plugin is loaded
49        File pluginsHome = new File(System.getProperty(
50                GateConstants.GATE_HOME_PROPERTY_NAME), 
51                "plugins");
52        try{
53          Gate.getCreoleRegister().registerDirectories(
54                  new File(pluginsHome, "Tools").toURL());
55        }catch(Exception e){
56          throw new GateRuntimeException(e);
57        }
58        // creating documents
59        verbDocumentToTest = Factory.newDocument(
60          Gate.class.getResource(Files.getResourcePath() +
61          "/gate.ac.uk/tests/morph/verbTest.dat"));
62        verbDocumentWithAnswers = Factory.newDocument(
63                Gate.class.getResource(Files.getResourcePath() +
64                "/gate.ac.uk/tests/morph/verbAnswer.dat"));
65        nounDocumentToTest = Factory.newDocument(
66                Gate.class.getResource(Files.getResourcePath() +
67                "/gate.ac.uk/tests/morph/nounTest.dat"));
68        nounDocumentWithAnswers = Factory.newDocument(
69                Gate.class.getResource(Files.getResourcePath() +
70                "/gate.ac.uk/tests/morph/nounAnswer.dat"));
71        // create the instance of (Morphological analyzer)
72        morpher = (Morph)Factory.createResource("gate.creole.morph.Morph");
73      }catch (ResourceInstantiationException rie) {
74        throw new GateRuntimeException(rie);
75  //      fail("Resources cannot be created for the test and the answer file");
76      }
77  
78  
79  
80      // set the parameters for the morpher, feature names
81      morpher.setAffixFeatureName("affix");
82      morpher.setRootFeatureName("root");
83  
84  
85      try {
86        // finally create the Tokenizer
87        tokeniser = (DefaultTokeniser) Factory.createResource(
88            "gate.creole.tokeniser.DefaultTokeniser");
89      }
90      catch (ResourceInstantiationException rie) {
91        fail("Resources cannot be created fpr tokenizers");
92      }
93    }
94  
95    /**
96     * Test the morpher on verbs, if their roots are identified correctly or not
97     */
98    public void testVerbs() {
99  
100     // run the tokenizer on the verbTestDocument
101     tokeniser.setDocument(verbDocumentToTest);
102     tokeniser.setAnnotationSetName("TokeniserAS");
103     try {
104       tokeniser.execute();
105     }
106     catch (ExecutionException ee) {
107       fail("Error while executing Tokenizer on the test document");
108     }
109 
110     // run the tokenizer on the verbAnswerDocument
111     tokeniser.setDocument(verbDocumentWithAnswers);
112     tokeniser.setAnnotationSetName("TokeniserAS");
113     try {
114       tokeniser.execute();
115     }
116     catch (ExecutionException ee) {
117       fail("Error while executing Tokenizer on the test document");
118     }
119 
120     // check both documents are processed correctly by tokeniser
121     assertTrue(!verbDocumentToTest.getAnnotations("TokeniserAS").isEmpty());
122     assertTrue(!verbDocumentWithAnswers.getAnnotations("TokeniserAS").isEmpty());
123 
124 
125     // so we have finished running the tokenizer, now we need to test the
126     // morph program to test the document
127     morpher.setDocument(verbDocumentToTest);
128 
129     // compile the rules
130     // and check that the resource is being created successfully
131     try {
132       ProcessingResource pr = (ProcessingResource) (morpher.init());
133       assertTrue(pr != null);
134     }
135     catch (ResourceInstantiationException rie) {
136       fail("Error occured while compiling rules for morphological analyser" +
137            " using the default.rul file");
138     }
139 
140     // now check if the tokenizer was run properly on the document
141     AnnotationSet inputAs = verbDocumentToTest.getAnnotations("TokeniserAS");
142     List queryTokens = new ArrayList(inputAs.get(ANNIEConstants.
143                                                  TOKEN_ANNOTATION_TYPE));
144     Comparator offsetComparator = new OffsetComparator();
145     Collections.sort(queryTokens, offsetComparator);
146 
147     // same procedure with the answer document
148     AnnotationSet inputAs1 = verbDocumentWithAnswers.getAnnotations(
149         "TokeniserAS");
150     List answerTokens = new ArrayList(inputAs1.get(ANNIEConstants.
151         TOKEN_ANNOTATION_TYPE));
152     Collections.sort(answerTokens, offsetComparator);
153 
154     // create iterator to get access to each and every individual token
155     Iterator queryTokensIter = queryTokens.iterator();
156     Iterator answerTokensIter = answerTokens.iterator();
157 
158     while (queryTokensIter.hasNext() && answerTokensIter.hasNext()) {
159 
160       // get the word to test
161       Annotation currentQueryToken = (Annotation) queryTokensIter.next();
162       String queryTokenValue = (String) (currentQueryToken.getFeatures().
163                                          get(ANNIEConstants.
164                                              TOKEN_STRING_FEATURE_NAME));
165 
166       // get the answer of this word
167       Annotation currentAnswerToken = (Annotation) answerTokensIter.next();
168       String answerTokenValue = (String) (currentAnswerToken.getFeatures().
169                                           get(ANNIEConstants.
170                                               TOKEN_STRING_FEATURE_NAME));
171       // run the morpher
172       String rootWord = morpher.findBaseWord(queryTokenValue, "VB");
173 
174       // compare it with the answerTokenValue
175       assertEquals(rootWord, answerTokenValue);
176     }
177   }
178 
179   /**
180    * Test the morpher on nouns, if their roots are identified correctly or not
181    */
182   public void testNouns() {
183 
184     // run the tokenizer on the nounTestDocument
185     tokeniser.setDocument(nounDocumentToTest);
186     tokeniser.setAnnotationSetName("TokeniserAS");
187     try {
188       tokeniser.execute();
189     }
190     catch (ExecutionException ee) {
191       fail("Error while executing Tokenizer on the test document");
192     }
193 
194     // run the tokenizer on the nounAnswerDocument
195     tokeniser.setDocument(nounDocumentWithAnswers);
196     tokeniser.setAnnotationSetName("TokeniserAS");
197     try {
198       tokeniser.execute();
199     }
200     catch (ExecutionException ee) {
201       fail("Error while executing Tokenizer on the test document");
202     }
203 
204     // check both documents are processed correctly by tokeniser
205     assertTrue(!nounDocumentToTest.getAnnotations("TokeniserAS").isEmpty());
206     assertTrue(!nounDocumentWithAnswers.getAnnotations("TokeniserAS").isEmpty());
207 
208     // so we have finished running the tokenizer
209     // now we need to test the morph program
210 
211     // document to test
212     morpher.setDocument(nounDocumentToTest);
213 
214     // compile the rules
215     // and check that the resource is being created successfully
216     try {
217       ProcessingResource pr = (ProcessingResource) (morpher.init());
218       assertTrue(pr != null);
219     }
220     catch (ResourceInstantiationException rie) {
221       fail("Error occured while compiling rules for morphological analyser" +
222            " using the default.rul file");
223     }
224 
225     // now check if the tokenizer was run properly on the document
226     AnnotationSet inputAs = nounDocumentToTest.getAnnotations("TokeniserAS");
227     List queryTokens = new ArrayList(inputAs.get(ANNIEConstants.
228                                                  TOKEN_ANNOTATION_TYPE));
229     Comparator offsetComparator = new OffsetComparator();
230     Collections.sort(queryTokens, offsetComparator);
231 
232     // same procedure with the answer document
233     AnnotationSet inputAs1 = nounDocumentWithAnswers.getAnnotations(
234         "TokeniserAS");
235     List answerTokens = new ArrayList(inputAs1.get(ANNIEConstants.
236         TOKEN_ANNOTATION_TYPE));
237     Collections.sort(answerTokens, offsetComparator);
238 
239     // create iterator to get access to each and every individual token
240     Iterator queryTokensIter = queryTokens.iterator();
241     Iterator answerTokensIter = answerTokens.iterator();
242 
243     while (queryTokensIter.hasNext() && answerTokensIter.hasNext()) {
244 
245       // get the word to test
246       Annotation currentQueryToken = (Annotation) queryTokensIter.next();
247       String queryTokenValue = (String) (currentQueryToken.getFeatures().
248                                          get(ANNIEConstants.
249                                              TOKEN_STRING_FEATURE_NAME));
250 
251       // get the answer of this word
252       Annotation currentAnswerToken = (Annotation) answerTokensIter.next();
253       String answerTokenValue = (String) (currentAnswerToken.getFeatures().
254                                           get(ANNIEConstants.
255                                               TOKEN_STRING_FEATURE_NAME));
256       //String category = (String) (currentAnswerToken.getFeatures().get(ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME));
257 //      System.out.println(morpher+"  "+queryTokenValue);
258       // run the morpher
259       String rootWord = morpher.findBaseWord(queryTokenValue, "NN");
260 
261       // compare it with the answerTokenValue
262       assertEquals(rootWord, answerTokenValue);
263     }
264 
265   }
266 
267   public static Test suite() {
268     return new TestSuite(TestMorph.class);
269   }
270 }