1   /*
2    *  Copyright (c) 2004, The University of Sheffield.
3    *
4    *  This file is part of GATE (see http://gate.ac.uk/), and is free
5    *  software, licenced under the GNU Library General Public License,
6    *  Version 2, June 1991 (in the distribution as file licence.html,
7    *  and also available at http://gate.ac.uk/gate/licence.html).
8    *
9    *  Mike Dowman 30-03-2004
10   *
11   *  $Id: GateEventStream.java,v 1.2 2004/04/28 11:25:29 valyt Exp $
12   *
13   */
14  
15  package gate.creole.ml.maxent;
16  
17  /**
18   * This class is used by MaxentWrapper. When created, it is passed a data
19   * structure containg all the training data for the classifier. It can then
20   * provide this data to the maxent model itself, as needed.
21   */
22  public class GateEventStream implements opennlp.maxent.EventStream {
23  
24    boolean DEBUG=false;
25  
26    final java.util.List trainingData;
27    final int indexOfOutcome;
28  
29    int index=0;
30  
31    /**
32     * This constructor stores all the training data in the object when the object
33     * is created.
34     *
35     * @param newTrainingData A List of Lists of String objects. Each String is
36     * a maxent feature or outcome.
37     * @param newIndexOfOutcome This is the index of the String objects that are
38     * the outcomes.
39     */
40    GateEventStream(java.util.List newTrainingData, int newIndexOfOutcome) {
41      trainingData=newTrainingData;
42      indexOfOutcome=newIndexOfOutcome;
43    }
44  
45    /**
46     * Extract the next instance from those stored in this object, and advance
47     * the objects internal index to point at the next instance.
48     *
49     * An exception will be thrown if this method is called when there are no
50     * more instances to extract.
51     *
52     * @return The next instance.
53     */
54    public opennlp.maxent.Event nextEvent() {
55      ++index;
56      return instance2Event((java.util.List)trainingData.get(index-1));
57    }
58  
59    /**
60     * See whether there are any more instances to be extracted from this object.
61     *
62     * @return true if there are more instances, false otherwise.
63     */
64    public boolean hasNext() {
65      return index<trainingData.size();
66    }
67  
68    /**
69     * Convert an instance into an Event object, taking note of the position of
70     * the outcome (class attribute) stored in this object.
71     *
72     * @param instance The instance in the form of a list of String objects.
73     * @return A maxent Event object containing the outcome (class attribute) and
74     * the features (other attributes).
75     */
76    private opennlp.maxent.Event instance2Event(java.util.List instance) {
77      // Store the outcome separately - and make sure that if it's null then
78      // it gets converted to the String "null".
79      java.lang.String outcome=""+(java.lang.String)instance.get(indexOfOutcome);
80  
81      // Then make a new list which doesn't contain the outcome.
82      java.util.List features=
83          new java.util.ArrayList(instance.subList(0, indexOfOutcome));
84      features.addAll(instance.subList(indexOfOutcome+1, instance.size()));
85  
86      if (DEBUG) {
87        System.out.println("New event: outcome="+outcome);
88        System.out.println("features="+instance);
89      }
90  
91      // Now make the Event and return it.
92      return new opennlp.maxent.Event(outcome,
93                                      (String[])features.toArray(new String[0]));
94    }
95  }
96  
97