1   package com.ontotext.gate.gazetteer;
2   /*
3    *  HashGazetteer.java
4    *
5    *  OntoText Lab.
6    *
7    *  borislav popov , 09/11/2001
8    *
9    *  $Id: TestHashGazetteer.java,v 1.2 2004/07/27 16:14:32 kalina Exp $
10   */
11  
12  import java.util.*;
13  import java.io.*;
14  import java.net.*;
15  import java.beans.*;
16  import java.lang.reflect.*;
17  import junit.framework.*;
18  
19  import gate.*;
20  import gate.util.*;
21  import gate.creole.*;
22  import gate.corpora.TestDocument;
23  
24  /**
25   * Tests the HashGazetteer.
26   */
27  public class TestHashGazetteer extends TestCase {
28  
29    private static final String GAZ_AS = "GazetteerAS";
30    public TestHashGazetteer(String name) {
31      super(name);
32    }
33  
34    /** Fixture set up */
35    public void setUp() throws Exception {
36    }
37  
38    public void tearDown() throws Exception {
39    } // tearDown
40  
41    /** Test the default tokeniser */
42    public void testHashGazetteer() throws Exception {
43      //get a document
44      Document doc = Factory.newDocument(
45        new URL(TestDocument.getTestServerName() + "tests/doc0.html")
46      );
47  
48      //create a default gazetteer
49      FeatureMap params = Factory.newFeatureMap();
50      HashGazetteer gaz = (HashGazetteer) Factory.createResource(
51                            "com.ontotext.gate.gazetteer.HashGazetteer", params);
52  
53      //runtime stuff
54      gaz.setDocument(doc);
55      gaz.setAnnotationSetName(GAZ_AS);
56      gaz.execute();
57  
58  //    dumpAnnotationSet(doc.getAnnotations(Gaz_AS));
59  
60      assertTrue("the Annotation set resulting of the execution of the OntoText "
61              +"Natural Gazetteer is empty."
62              ,!doc.getAnnotations(GAZ_AS).isEmpty());
63      //check whether the annotations are as expected
64  
65  
66  //    assertTrue("Found in "+ doc.getSourceUrl().getFile()+ " "+
67  //      doc.getAnnotations(GAZ_AS).size() +
68  //      " Lookup annotations, instead of the expected 53.",
69  //      doc.getAnnotations(GAZ_AS).size()== 53);
70  
71  /*very complex compare */
72  //    assertTrue("the Annotation set resulting from the OntoText Natural Gazetteer "
73  //        +"is not exactly the same as expected. Possible reasons: change in the test file "
74  //        +"doc0.html or malfunctioning of the gazetteer"
75  //        ,EqualAnnotationSets(doc.getAnnotations(GAZ_AS)));
76  
77    } // testHashGazetteer();
78  
79    /** Test suite routine for the test runner */
80    public static Test suite() {
81      return new TestSuite(TestHashGazetteer.class);
82    } // suite
83  
84    public static void main(String[] args) {
85      try{
86        Gate.init();
87        TestHashGazetteer testGaz = new TestHashGazetteer("");
88        testGaz.setUp();
89        testGaz.testHashGazetteer();
90        testGaz.tearDown();
91      } catch(Exception e) {
92        e.printStackTrace();
93      }
94    } // main
95  
96  
97    /** dumps the annotation set to system ouput
98     * @param marks an annotation set
99     */
100   private void dumpAnnotationSet(AnnotationSet marks) {
101     if (marks != null) {
102         Iterator iter = marks.iterator();
103         while(iter.hasNext()) {
104           Annotation lookup = (Annotation) iter.next();
105           FeatureMap lookFeats = lookup.getFeatures();
106           String majorStr = (String) lookFeats.get("majorType");
107           String minorStr = (String) lookFeats.get("minorType");
108           String position = " "+lookup.getStartNode().getOffset()+"-"+ lookup.getEndNode().getOffset();
109           System.out.println(position+":"+majorStr + "." + minorStr + Strings.getNl());
110         }
111     } //if
112   } // void dumpAnnotationSet(AnnotationSet set)
113 
114   /** Tests whether the annotation set has the same elements
115    *  as statet in DESIRED_ANNOTATIONS
116    *  @param marks an annotation set
117    *  @return true if they match, false otherwise.
118    */
119   private boolean EqualAnnotationSets(AnnotationSet marks) {
120     boolean areEqual = true;
121     String currentMark = null;
122     int index = 0;
123 
124     areEqual = areEqual && (marks.size() == DESIRED_ANNOTATIONS.length);
125 
126     if (marks != null) {
127       Iterator iter = marks.iterator();
128 
129       while(iter.hasNext() & areEqual) {
130         Annotation lookup = (Annotation) iter.next();
131         FeatureMap lookFeats = lookup.getFeatures();
132         String majorStr = (String) lookFeats.get("majorType");
133         String minorStr = (String) lookFeats.get("minorType");
134         String position = ""+lookup.getStartNode().getOffset()+"-"+ lookup.getEndNode().getOffset();
135 
136         currentMark = position+":"+majorStr + "." + minorStr;
137         areEqual = areEqual && (currentMark.equals(DESIRED_ANNOTATIONS[index]));
138         index++;
139       }
140     } else {
141       areEqual = false;
142     } // else
143 
144 
145     return areEqual;
146   } //  boolean testGazAnnotationSet(AnnotationSet marks) {
147 
148   private static String [] DESIRED_ANNOTATIONS =
149   {
150     "1067-1072:date_unit.null",
151 
152     "1033-1038:person_first.male",
153 
154     "1029-1032:title.male",
155 
156     "1014-1023:jobtitle.null",
157 
158     "1008-1013:jobtitle.null",
159 
160     "995-1003:jobtitle.null",
161 
162     "846-853:number.null",
163 
164     "814-822:date.month",
165 
166     "799-802:title.male",
167 
168     "765-768:org_ending.null",
169 
170     "765-768:cdg.null",
171 
172     "753-764:org_key.null",
173 
174     "738-741:org_ending.null",
175 
176     "738-741:cdg.null",
177 
178     "723-737:org_key.null",
179 
180     "713-722:organization.company",
181 
182     "696-701:cdg.null",
183 
184     "677-686:organization.company",
185 
186     "664-673:jobtitle.null",
187 
188     "658-663:jobtitle.null",
189 
190     "645-653:jobtitle.null",
191 
192     "636-641:date_unit.null",
193 
194     "614-616:stop.null",
195 
196     "603-613:organization.company",
197 
198     "582-587:cdg.null",
199 
200     "555-576:organization.company",
201 
202     "546-549:org_ending.null",
203 
204     "546-549:cdg.null",
205 
206     "529-538:jobtitle.null",
207 
208     "523-528:jobtitle.null",
209 
210     "510-518:jobtitle.null",
211 
212     "484-487:title.male",
213 
214     "465-473:jobtitle.null",
215 
216     "424-429:person_first.male",
217 
218     "414-420:person_first.male",
219 
220     "394-399:date_unit.null",
221 
222     "379-382:title.male",
223 
224     "350-373:jobtitle.null",
225 
226     "337-345:jobtitle.null",
227 
228     "320-325:person_first.male",
229 
230     "295-298:org_ending.null",
231 
232     "295-298:cdg.null",
233 
234     "274-277:location.province",
235 
236     "265-272:location.city",
237 
238     "182-189:cdg.null",
239 
240     "161-165:person_first.female",
241 
242     "100-115:title.civilian",
243 
244     "100-115:jobtitle.null",
245 
246     "87-95:title.civilian"
247   }; // private static String [] DESIRED_ANNOTATIONS
248 } // TestHashGazetteer
249