| Lexicon.java |
1 /*
2 * Lexicon.java
3 *
4 * Copyright (c) 2001-2005, The University of Sheffield.
5 *
6 * This file is part of GATE (see http://gate.ac.uk/), and is free
7 * software, licenced under the GNU Library General Public License,
8 * Version 2, June 1991 (in the distribution as file licence.html,
9 * and also available at http://gate.ac.uk/gate/licence.html).
10 *
11 * HepTag was originally written by Mark Hepple, this version contains
12 * modifications by Valentin Tablan and Niraj Aswani.
13 *
14 * $Id: Lexicon.java,v 1.1 2005/09/30 14:48:12 ian_roberts Exp $
15 */
16 package hepple.postag;
17
18 /**
19 * Title: HepTag
20 * Description: Mark Hepple's POS tagger
21 * Copyright: Copyright (c) 2001
22 * Company: University of Sheffield
23 * @author Mark Hepple
24 * @version 1.0
25 */
26
27 import java.util.*;
28 import java.io.*;
29 import java.net.URL;
30
31 /**
32 * A {@link java.util.HashMap} that maps from lexical entry
33 * ({@link java.lang.String}) to possible POS categories
34 * ({@link java.util.List}
35 */
36 class Lexicon extends HashMap {
37
38 /** Niraj */
39 private String encoding;
40
41
42 /**
43 * @deprecated The lexicon file is read at construction time, so setting the
44 * encoding later will have no effect. Use the two argument constructor to
45 * set the encoding.
46 */
47 public void setEncoding(String encoding) {
48 throw new IllegalStateException("Cannot change encoding once POS tagger "
49 + "has been constructed. Use the three "
50 + "argument constructor to specify "
51 + "encoding.");
52 }
53 /* End */
54
55 /**
56 * Constructor.
57 * @param lexiconURL an URL for the file contianing the lexicon.
58 */
59 public Lexicon(URL lexiconURL) throws IOException{
60 this(lexiconURL, null);
61 }
62
63 /**
64 * Constructor.
65 * @param lexiconURL an URL for the file contianing the lexicon.
66 * @param encoding the character encoding to use for reading the lexicon.
67 */
68 public Lexicon(URL lexiconURL, String encoding) throws IOException{
69 this.encoding = encoding;
70 String line;
71 BufferedReader lexiconReader;
72 if(encoding == null) {
73 lexiconReader = new BufferedReader(new InputStreamReader(lexiconURL.openStream()));
74 } else {
75 lexiconReader = new BufferedReader(new InputStreamReader(lexiconURL.openStream(),encoding));
76 }
77
78 line = lexiconReader.readLine();
79 String entry;
80 List categories;
81 while(line != null){
82 StringTokenizer tokens = new StringTokenizer(line);
83 entry = tokens.nextToken();
84 categories = new ArrayList();
85 while(tokens.hasMoreTokens()) categories.add(tokens.nextToken());
86 put(entry, categories);
87
88 line = lexiconReader.readLine();
89 }//while(line != null)
90 }//public Lexicon(URL lexiconURL) throws IOException
91
92 }//class Lexicon
93