1   /*
2    *  Copyright (c) 1998-2005, The University of Sheffield.
3    *
4    *  This file is part of GATE (see http://gate.ac.uk/), and is free
5    *  software, licenced under the GNU Library General Public License,
6    *  Version 2, June 1991 (in the distribution as file licence.html,
7    *  and also available at http://gate.ac.uk/gate/licence.html).
8    *
9    *  Valentin Tablan 17/05/01
10   *
11   *  $Id: TestSplitterTagger.java,v 1.13 2005/01/11 13:51:31 ian Exp $
12   */
13  package gate.creole;
14  
15  import java.net.URL;
16  import java.util.Iterator;
17  
18  import junit.framework.*;
19  
20  import gate.*;
21  import gate.corpora.TestDocument;
22  import gate.creole.splitter.SentenceSplitter;
23  import gate.creole.tokeniser.DefaultTokeniser;
24  import gate.util.GateException;
25  
26  /**
27   * Test code for the SentenceSplitter and the POS tagger.
28   */
29  public class TestSplitterTagger extends TestCase{
30  
31  /** Construction */
32    public TestSplitterTagger(String name) { super(name); }
33  
34    /** Fixture set up */
35    public void setUp() throws GateException {
36    } // setUp
37  
38    /** Put things back as they should be after running tests
39      * (reinitialise the CREOLE register).
40      */
41    public void tearDown() throws Exception {
42    } // tearDown
43  
44    /** Test suite routine for the test runner */
45    public static Test suite() {
46      return new TestSuite(TestSplitterTagger.class);
47    } // suite
48  
49  
50  
51    public void testSplitterTagger() throws Exception{
52      //get a document
53      Document doc = Factory.newDocument(
54        new URL(TestDocument.getTestServerName() + "tests/doc0.html")
55      );
56  
57      //tokenise the document
58      //create a tokeniser
59      FeatureMap params = Factory.newFeatureMap();
60      DefaultTokeniser tokeniser = (DefaultTokeniser) Factory.createResource(
61                            "gate.creole.tokeniser.DefaultTokeniser", params);
62      //runtime stuff
63      tokeniser.setDocument(doc);
64      tokeniser.setAnnotationSetName("testAS");
65      tokeniser.execute();
66  
67  
68      //create a splitter
69      params = Factory.newFeatureMap();
70      SentenceSplitter splitter = (SentenceSplitter) Factory.createResource(
71                            "gate.creole.splitter.SentenceSplitter", params);
72  
73      //runtime stuff
74      splitter.setDocument(doc);
75      splitter.setOutputASName("testAS");
76      splitter.setInputASName("testAS");
77      splitter.execute();
78      assertTrue(!doc.getAnnotations("testAS").
79        get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).isEmpty());
80  
81      //now check the tagger
82      //create a tagger
83      params = Factory.newFeatureMap();
84      POSTagger tagger = (POSTagger) Factory.createResource(
85                            "gate.creole.POSTagger", params);
86  
87      //runtime stuff
88      tagger.setDocument(doc);
89      tagger.setInputASName("testAS");
90      tagger.execute();
91      Iterator tokIter =doc.getAnnotations("testAS").
92        get(ANNIEConstants.TOKEN_ANNOTATION_TYPE).iterator();
93      while(tokIter.hasNext()){
94        Annotation token = (Annotation)tokIter.next();
95        String kind = (String)token.getFeatures().
96          get(ANNIEConstants.TOKEN_KIND_FEATURE_NAME);
97        if(kind.equals(ANNIEConstants.TOKEN_KIND_FEATURE_NAME))
98          assertNotNull(token.getFeatures().
99            get(ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME));
100     }
101   }
102 }