1
15
16 package gate.corpora;
17
18 import java.io.*;
19 import java.net.URL;
20 import java.net.UnknownHostException;
21 import java.util.*;
22
23 import junit.framework.*;
24
25 import gate.*;
26 import gate.util.Err;
27 import gate.util.GateException;
28 import gate.util.SimpleFeatureMapImpl;
29
30
32 public class TestDocument extends TestCase
33 {
34
35
36 private static final boolean DEBUG = false;
37
38
39 public TestDocument(String name) { super(name); setUp();}
40
41
42 protected static String testServer = null;
43
44
45 protected String testDocument1;
46
47
48 public void setUp() {
49
50 try{
51 testServer = Gate.getUrl().toExternalForm();
53 } catch (GateException e){
54 e.printStackTrace(Err.getPrintWriter());
55 }
56
57 testDocument1 = "tests/html/test2.htm";
58 }
60
61 public static String getTestServerName() {
62 if(testServer != null) return testServer;
63 else{
64 try { testServer = Gate.getUrl().toExternalForm(); }
65 catch(Exception e) { }
66 return testServer;
67 }
68 }
69
70
71 public void testCompareTo() throws Exception{
72 Document doc1 = null;
73 Document doc2 = null;
74 Document doc3 = null;
75
76
77 doc1 = Factory.newDocument(new URL(testServer + "tests/def"));
78 doc2 = Factory.newDocument(new URL(testServer + "tests/defg"));
79 doc3 = Factory.newDocument(new URL(testServer + "tests/abc"));
80
81 assertTrue(doc1.compareTo(doc2) < 0);
82 assertTrue(doc1.compareTo(doc1) == 0);
83 assertTrue(doc1.compareTo(doc3) > 0);
84
85 }
87
88
89 public void testOriginalContentPreserving() throws Exception {
90 Document doc = null;
91 FeatureMap params;
92 String encoding = "UTF-8";
93 String origContent;
94
95 params = Factory.newFeatureMap();
97 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(testServer + testDocument1));
98 params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
99 doc =
100 (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
101
102 origContent = (String) doc.getFeatures().get(
103 GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
104
105 assertNull(
106 "The original content should not be preserved without demand.",
107 origContent);
108
109 params = Factory.newFeatureMap();
110 params.put(Document.DOCUMENT_URL_PARAMETER_NAME,
111 new URL(testServer + testDocument1));
112 params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
113 params.put(Document.DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME, new Boolean(true));
114 doc =
115 (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
116
117 origContent = (String) doc.getFeatures().get(
118 GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
119
120 assertNotNull("The original content is not preserved on demand.",
121 origContent);
122
123 assertTrue("The original content size is zerro.", origContent.length()>0);
124 }
126
127 public void testLotsOfThings() {
128
129 URL u = null;
131 try{
132 u = new URL(testServer + testDocument1);
133 } catch (Exception e){
134 e.printStackTrace(Err.getPrintWriter());
135 }
136
137 BufferedReader uReader = null;
139 try {
140 uReader = new BufferedReader(new InputStreamReader(u.openStream()));
141 assertEquals(uReader.readLine(), "<HTML>");
142 } catch(UnknownHostException e) { return;
144 } catch(IOException e) {
145 fail(e.toString());
146 }
147
157 }
159
160 public void testDocRender() throws Exception
161 {
162 Document doc = Factory.newDocument("Hi Mom");
163 doc.getAnnotations().add(new Long(0), new Long(2),
164 "Foo", new SimpleFeatureMapImpl());
165 String content = doc.toXml(doc.getAnnotations(), false);
166
167 assertEquals("<Foo>Hi</Foo> Mom", content);
169 }
170
171
172
176 public static void verifyNodeIdConsistency(gate.Document doc)throws Exception{
177 if (doc == null) return;
178 Map offests2NodeId = new HashMap();
179 AnnotationSet annotSet = doc.getAnnotations();
181 verifyNodeIdConsistency(annotSet,offests2NodeId, doc);
182 if (doc.getNamedAnnotationSets() != null){
184 Iterator namedAnnotSetsIter =
185 doc.getNamedAnnotationSets().values().iterator();
186 while(namedAnnotSetsIter.hasNext()){
187 verifyNodeIdConsistency((gate.AnnotationSet) namedAnnotSetsIter.next(),
188 offests2NodeId,
189 doc);
190 } } offests2NodeId = null;
194 }
196
202 public static void verifyNodeIdConsistency(gate.AnnotationSet annotSet,
203 Map offests2NodeId,
204 gate.Document doc)
205 throws Exception{
206
207 if (annotSet == null || offests2NodeId == null) return;
208
209 Iterator iter = annotSet.iterator();
210 while(iter.hasNext()){
211 Annotation annot = (Annotation) iter.next();
212 String annotSetName = (annotSet.getName() == null)? "Default":
213 annotSet.getName();
214 if (offests2NodeId.containsKey(annot.getStartNode().getOffset())){
216 assertEquals("Found two different node IDs for the same offset( "+
217 annot.getStartNode().getOffset()+ " ).\n" +
218 "START NODE is buggy for annotation(" + annot +
219 ") from annotation set " + annotSetName + " of GATE document :" +
220 doc.getSourceUrl(),
221 annot.getStartNode().getId(),
222 (Integer) offests2NodeId.get(annot.getStartNode().getOffset()));
223 } if (offests2NodeId.containsKey(annot.getEndNode().getOffset())){
226 assertEquals("Found two different node IDs for the same offset("+
227 annot.getEndNode().getOffset()+ ").\n" +
228 "END NODE is buggy for annotation(" + annot+ ") from annotation"+
229 " set " + annotSetName +" of GATE document :" + doc.getSourceUrl(),
230 annot.getEndNode().getId(),
231 (Integer) offests2NodeId.get(annot.getEndNode().getOffset()));
232 } offests2NodeId.put(annot.getStartNode().getOffset(),
234 annot.getStartNode().getId());
235 offests2NodeId.put(annot.getEndNode().getOffset(),
236 annot.getEndNode().getId());
237 } }
240
241 public static Test suite() {
242 return new TestSuite(TestDocument.class);
243 }
245 }