1   /*
2    *  TestDocument.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 21/Jan/00
12   *
13   *  $Id: TestDocument.java,v 1.31 2005/03/30 16:01:08 julien Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.io.*;
19  import java.net.URL;
20  import java.net.UnknownHostException;
21  import java.util.*;
22  
23  import junit.framework.*;
24  
25  import gate.*;
26  import gate.util.Err;
27  import gate.util.GateException;
28  import gate.util.SimpleFeatureMapImpl;
29  
30  /** Tests for the Document classes
31    */
32  public class TestDocument extends TestCase
33  {
34  
35    /** Debug flag */
36    private static final boolean DEBUG = false;
37  
38    /** Construction */
39    public TestDocument(String name) { super(name); setUp();}
40  
41    /** Base of the test server URL */
42    protected static String testServer = null;
43  
44    /** Name of test document 1 */
45    protected String testDocument1;
46  
47    /** Fixture set up */
48    public void setUp() {
49  
50      try{
51  //      Gate.init();
52        testServer = Gate.getUrl().toExternalForm();
53      } catch (GateException e){
54        e.printStackTrace(Err.getPrintWriter());
55      }
56  
57      testDocument1 = "tests/html/test2.htm";
58    } // setUp
59  
60    /** Get the name of the test server */
61    public static String getTestServerName() {
62      if(testServer != null) return testServer;
63      else{
64        try { testServer = Gate.getUrl().toExternalForm(); }
65        catch(Exception e) { }
66        return testServer;
67      }
68    }
69  
70    /** Test ordering */
71    public void testCompareTo() throws Exception{
72      Document doc1 = null;
73      Document doc2 = null;
74      Document doc3 = null;
75  
76  
77      doc1 = Factory.newDocument(new URL(testServer + "tests/def"));
78      doc2 = Factory.newDocument(new URL(testServer + "tests/defg"));
79      doc3 = Factory.newDocument(new URL(testServer + "tests/abc"));
80  
81      assertTrue(doc1.compareTo(doc2) < 0);
82      assertTrue(doc1.compareTo(doc1) == 0);
83      assertTrue(doc1.compareTo(doc3) > 0);
84  
85    } // testCompareTo()
86  
87    /** Test loading of the original document content */
88  
89    public void testOriginalContentPreserving() throws Exception {
90      Document doc = null;
91      FeatureMap params;
92      String encoding = "UTF-8";
93      String origContent;
94  
95      // test the default value of preserve content flag
96      params = Factory.newFeatureMap();
97      params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(testServer + testDocument1));
98      params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
99      doc =
100       (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
101 
102     origContent = (String) doc.getFeatures().get(
103       GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
104 
105     assertNull(
106       "The original content should not be preserved without demand.",
107       origContent);
108 
109     params = Factory.newFeatureMap();
110     params.put(Document.DOCUMENT_URL_PARAMETER_NAME,
111       new URL(testServer + testDocument1));
112     params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
113     params.put(Document.DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME, new Boolean(true));
114     doc =
115       (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
116 
117     origContent = (String) doc.getFeatures().get(
118       GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
119 
120     assertNotNull("The original content is not preserved on demand.",
121               origContent);
122 
123     assertTrue("The original content size is zerro.", origContent.length()>0);
124   } // testOriginalContentPreserving()
125 
126   /** A comprehensive test */
127   public void testLotsOfThings() {
128 
129     // check that the test URL is available
130     URL u = null;
131     try{
132       u = new URL(testServer + testDocument1);
133     } catch (Exception e){
134       e.printStackTrace(Err.getPrintWriter());
135     }
136 
137     // get some text out of the test URL
138     BufferedReader uReader = null;
139     try {
140       uReader = new BufferedReader(new InputStreamReader(u.openStream()));
141       assertEquals(uReader.readLine(), "<HTML>");
142     } catch(UnknownHostException e) { // no network connection
143       return;
144     } catch(IOException e) {
145       fail(e.toString());
146     }
147     /*
148     Document doc = new TextualDocument(testServer + testDocument1);
149     AnnotationGraph ag = new AnnotationGraphImpl();
150 
151     Tokeniser t = ...   doc.getContent()
152     tokenise doc using java stream tokeniser
153 
154     add several thousand token annotation
155     select a subset
156     */
157   } // testLotsOfThings
158 
159   
160   public void testDocRender() throws Exception
161   {
162       Document doc = Factory.newDocument("Hi Mom");
163       doc.getAnnotations().add(new Long(0), new Long(2),
164           "Foo", new SimpleFeatureMapImpl());
165       String content = doc.toXml(doc.getAnnotations(), false);
166 
167       // Will fail, content is "<Foo>Hi Mom</Foo>"
168       assertEquals("<Foo>Hi</Foo> Mom", content);
169   }
170   
171   
172   /** The reason this is method begins with verify and not with test is that it
173    *  gets called by various other test methods. It is somehow a utility test
174    *  method. It should be called on all gate documents having annotation sets.
175    */
176   public static void verifyNodeIdConsistency(gate.Document doc)throws Exception{
177       if (doc == null) return;
178       Map offests2NodeId = new HashMap();
179       // Test the default annotation set
180       AnnotationSet annotSet = doc.getAnnotations();
181       verifyNodeIdConsistency(annotSet,offests2NodeId, doc);
182       // Test all named annotation sets
183       if (doc.getNamedAnnotationSets() != null){
184         Iterator namedAnnotSetsIter =
185                               doc.getNamedAnnotationSets().values().iterator();
186         while(namedAnnotSetsIter.hasNext()){
187          verifyNodeIdConsistency((gate.AnnotationSet) namedAnnotSetsIter.next(),
188                                                                  offests2NodeId,
189                                                                  doc);
190         }// End while
191       }// End if
192       // Test suceeded. The map is not needed anymore.
193       offests2NodeId = null;
194   }// verifyNodeIdConsistency();
195 
196   /** This metod runs the test over an annotation Set. It is called from her
197    *  older sister. Se above.
198    *  @param annotSet is the annotation set being tested.
199    *  @param offests2NodeId is the Map used to test the consistency.
200    *  @param doc is used in composing the assert error messsage.
201    */
202   public static void verifyNodeIdConsistency(gate.AnnotationSet annotSet,
203                                              Map  offests2NodeId,
204                                              gate.Document doc)
205                                                               throws Exception{
206 
207       if (annotSet == null || offests2NodeId == null) return;
208 
209       Iterator iter = annotSet.iterator();
210       while(iter.hasNext()){
211         Annotation annot = (Annotation) iter.next();
212         String annotSetName = (annotSet.getName() == null)? "Default":
213                                                           annotSet.getName();
214         // check the Start node
215         if (offests2NodeId.containsKey(annot.getStartNode().getOffset())){
216              assertEquals("Found two different node IDs for the same offset( "+
217              annot.getStartNode().getOffset()+ " ).\n" +
218              "START NODE is buggy for annotation(" + annot +
219              ") from annotation set " + annotSetName + " of GATE document :" +
220              doc.getSourceUrl(),
221              annot.getStartNode().getId(),
222              (Integer) offests2NodeId.get(annot.getStartNode().getOffset()));
223         }// End if
224         // Check the End node
225         if (offests2NodeId.containsKey(annot.getEndNode().getOffset())){
226              assertEquals("Found two different node IDs for the same offset("+
227              annot.getEndNode().getOffset()+ ").\n" +
228              "END NODE is buggy for annotation(" + annot+ ") from annotation"+
229              " set " + annotSetName +" of GATE document :" + doc.getSourceUrl(),
230              annot.getEndNode().getId(),
231              (Integer) offests2NodeId.get(annot.getEndNode().getOffset()));
232         }// End if
233         offests2NodeId.put(annot.getStartNode().getOffset(),
234                                                   annot.getStartNode().getId());
235         offests2NodeId.put(annot.getEndNode().getOffset(),
236                                                     annot.getEndNode().getId());
237     }// End while
238   }//verifyNodeIdConsistency();
239 
240   /** Test suite routine for the test runner */
241   public static Test suite() {
242     return new TestSuite(TestDocument.class);
243   } // suite
244 
245 } // class TestDocument
246