GateEventStream.java |
1 /* 2 * Copyright (c) 2004, The University of Sheffield. 3 * 4 * This file is part of GATE (see http://gate.ac.uk/), and is free 5 * software, licenced under the GNU Library General Public License, 6 * Version 2, June 1991 (in the distribution as file licence.html, 7 * and also available at http://gate.ac.uk/gate/licence.html). 8 * 9 * Mike Dowman 30-03-2004 10 * 11 * $Id: GateEventStream.java,v 1.2 2004/04/28 11:25:29 valyt Exp $ 12 * 13 */ 14 15 package gate.creole.ml.maxent; 16 17 /** 18 * This class is used by MaxentWrapper. When created, it is passed a data 19 * structure containg all the training data for the classifier. It can then 20 * provide this data to the maxent model itself, as needed. 21 */ 22 public class GateEventStream implements opennlp.maxent.EventStream { 23 24 boolean DEBUG=false; 25 26 final java.util.List trainingData; 27 final int indexOfOutcome; 28 29 int index=0; 30 31 /** 32 * This constructor stores all the training data in the object when the object 33 * is created. 34 * 35 * @param newTrainingData A List of Lists of String objects. Each String is 36 * a maxent feature or outcome. 37 * @param newIndexOfOutcome This is the index of the String objects that are 38 * the outcomes. 39 */ 40 GateEventStream(java.util.List newTrainingData, int newIndexOfOutcome) { 41 trainingData=newTrainingData; 42 indexOfOutcome=newIndexOfOutcome; 43 } 44 45 /** 46 * Extract the next instance from those stored in this object, and advance 47 * the objects internal index to point at the next instance. 48 * 49 * An exception will be thrown if this method is called when there are no 50 * more instances to extract. 51 * 52 * @return The next instance. 53 */ 54 public opennlp.maxent.Event nextEvent() { 55 ++index; 56 return instance2Event((java.util.List)trainingData.get(index-1)); 57 } 58 59 /** 60 * See whether there are any more instances to be extracted from this object. 61 * 62 * @return true if there are more instances, false otherwise. 63 */ 64 public boolean hasNext() { 65 return index<trainingData.size(); 66 } 67 68 /** 69 * Convert an instance into an Event object, taking note of the position of 70 * the outcome (class attribute) stored in this object. 71 * 72 * @param instance The instance in the form of a list of String objects. 73 * @return A maxent Event object containing the outcome (class attribute) and 74 * the features (other attributes). 75 */ 76 private opennlp.maxent.Event instance2Event(java.util.List instance) { 77 // Store the outcome separately - and make sure that if it's null then 78 // it gets converted to the String "null". 79 java.lang.String outcome=""+(java.lang.String)instance.get(indexOfOutcome); 80 81 // Then make a new list which doesn't contain the outcome. 82 java.util.List features= 83 new java.util.ArrayList(instance.subList(0, indexOfOutcome)); 84 features.addAll(instance.subList(indexOfOutcome+1, instance.size())); 85 86 if (DEBUG) { 87 System.out.println("New event: outcome="+outcome); 88 System.out.println("features="+instance); 89 } 90 91 // Now make the Event and return it. 92 return new opennlp.maxent.Event(outcome, 93 (String[])features.toArray(new String[0])); 94 } 95 } 96 97