Lexicon.java |
1 /* 2 * Lexicon.java 3 * 4 * Copyright (c) 2001-2005, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * HepTag was originally written by Mark Hepple, this version contains 12 * modifications by Valentin Tablan and Niraj Aswani. 13 * 14 * $Id: Lexicon.java,v 1.1 2005/09/30 14:48:12 ian_roberts Exp $ 15 */ 16 package hepple.postag; 17 18 /** 19 * Title: HepTag 20 * Description: Mark Hepple's POS tagger 21 * Copyright: Copyright (c) 2001 22 * Company: University of Sheffield 23 * @author Mark Hepple 24 * @version 1.0 25 */ 26 27 import java.util.*; 28 import java.io.*; 29 import java.net.URL; 30 31 /** 32 * A {@link java.util.HashMap} that maps from lexical entry 33 * ({@link java.lang.String}) to possible POS categories 34 * ({@link java.util.List} 35 */ 36 class Lexicon extends HashMap { 37 38 /** Niraj */ 39 private String encoding; 40 41 42 /** 43 * @deprecated The lexicon file is read at construction time, so setting the 44 * encoding later will have no effect. Use the two argument constructor to 45 * set the encoding. 46 */ 47 public void setEncoding(String encoding) { 48 throw new IllegalStateException("Cannot change encoding once POS tagger " 49 + "has been constructed. Use the three " 50 + "argument constructor to specify " 51 + "encoding."); 52 } 53 /* End */ 54 55 /** 56 * Constructor. 57 * @param lexiconURL an URL for the file contianing the lexicon. 58 */ 59 public Lexicon(URL lexiconURL) throws IOException{ 60 this(lexiconURL, null); 61 } 62 63 /** 64 * Constructor. 65 * @param lexiconURL an URL for the file contianing the lexicon. 66 * @param encoding the character encoding to use for reading the lexicon. 67 */ 68 public Lexicon(URL lexiconURL, String encoding) throws IOException{ 69 this.encoding = encoding; 70 String line; 71 BufferedReader lexiconReader; 72 if(encoding == null) { 73 lexiconReader = new BufferedReader(new InputStreamReader(lexiconURL.openStream())); 74 } else { 75 lexiconReader = new BufferedReader(new InputStreamReader(lexiconURL.openStream(),encoding)); 76 } 77 78 line = lexiconReader.readLine(); 79 String entry; 80 List categories; 81 while(line != null){ 82 StringTokenizer tokens = new StringTokenizer(line); 83 entry = tokens.nextToken(); 84 categories = new ArrayList(); 85 while(tokens.hasMoreTokens()) categories.add(tokens.nextToken()); 86 put(entry, categories); 87 88 line = lexiconReader.readLine(); 89 }//while(line != null) 90 }//public Lexicon(URL lexiconURL) throws IOException 91 92 }//class Lexicon 93