gate.creole.tokeniser.DFSMState (Java2HTML)

1   /*
2    *  DFSMState.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Valentin Tablan, 27/06/2000
12   *
13   *  $Id: DFSMState.java,v 1.21 2005/01/11 13:51:33 ian Exp $
14   */
15  
16   /*
17      modified by OntoText, Aug 29
18  
19   */
20  
21  package gate.creole.tokeniser;
22  
23  import java.util.*;
24  
25  /** Implements a state of the deterministic finite state machine of the
26    * tokeniser.
27    * It differs from {@link FSMState FSMState} by the definition of the
28    * transition function which in this case maps character types to other states
29    * as oposed to the transition function from FSMState which maps character
30    * types to sets of states, hence the nondeterministic character.
31    * @see FSMState
32    */
33  class DFSMState implements java.io.Serializable { //extends FSMState{
34  
35    /** Debug flag */
36    private static final boolean DEBUG = false;
37  
38    /** Constructs a new DFSMState object and adds it to the list of deterministic
39      * states of the {@link DefaultTokeniser DefaultTokeniser} provided as owner.
40      * @param owner a {@link DefaultTokeniser DefaultTokeniser} object
41      */
42    public DFSMState(SimpleTokeniser owner){
43      myIndex = index++;
44      owner.dfsmStates.add(this);
45    }
46  
47    /** Adds a new mapping in the transition function of this state
48      * @param type the UnicodeType for this mapping
49      * @param state the next state of the FSM Machine when a character of type type
50      * is read from the input.
51      */
52    void put(UnicodeType type, DFSMState state){
53      put(type.type, state);
54    } // put(UnicodeType type, DFSMState state)
55  
56    /** Adds a new mapping using the actual index in the internal array.
57      * This method is for internal use only. Use
58      * {@link #put(gate.creole.tokeniser.UnicodeType,
59      *             gate.creole.tokeniser.DFSMState)} instead.
60      */
61    void put(int index, DFSMState state){
62      transitionFunction[index] = state;
63    } // put(int index, DFSMState state)
64  
65    /** This method is used to access the transition function of this state.
66      * @param type the Unicode type identifier as the corresponding static value
67      * on {@link java.lang.Character}
68      */
69    DFSMState next(int type){//UnicodeType type){
70      return transitionFunction[type];
71    } // next
72  
73    /** Returns a GML (Graph Modelling Language) representation of the edges
74      * emerging from this state
75      */
76    String getEdgesGML(){
77      ///String res = "";
78      //OT
79      StringBuffer res = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
80      Set nextSet;
81      Iterator nextSetIter;
82      DFSMState nextState;
83  
84      for(int i = 0; i< transitionFunction.length; i++){
85        nextState = transitionFunction[i];
86        if(null != nextState){
87          /*
88          res += "edge [ source " + myIndex +
89          " target " + nextState.getIndex() +
90          " label \"";
91          res += SimpleTokeniser.typeMnemonics[i];
92          res += "\" ]\n";
93          */
94          //OT
95          res.append("edge [ source ");
96          res.append(myIndex);
97          res.append(" target ");
98          res.append(nextState.getIndex());
99          res.append(" label \"");
100         res.append(SimpleTokeniser.typeMnemonics[i]);
101         res.append("\" ]\n");
102       }
103     };
104     return res.toString();
105   } // getEdgesGML
106 
107   /** Builds the token description for the token that will be generated when
108     * this <b>final</b> state will be reached and the action associated with it
109     * will be fired.
110     * See also {@link #setRhs(String)}.
111     */
112   void buildTokenDesc() throws TokeniserException{
113     String ignorables = " \t\f";
114     String token = null,
115            type = null,
116            attribute = null,
117            value = null
118            ///prefix = null,
119            ///read =""
120            ;
121     //OT
122     StringBuffer prefix = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
123     StringBuffer read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
124 
125     LinkedList attributes = new LinkedList(),
126                values = new LinkedList();
127     StringTokenizer mainSt =
128       new StringTokenizer(rhs, ignorables + "\\\";=", true);
129 
130     int descIndex = 0;
131     //phase means:
132     //0 == looking for type;
133     //1 == looking for attribute;
134     //2 == looking for value;
135     //3 == write the attr/value pair
136     int phase = 0;
137 
138     while(mainSt.hasMoreTokens()) {
139       token = SimpleTokeniser.skipIgnoreTokens(mainSt);
140 
141       if(token.equals("\\")){
142         if(null == prefix)
143             ///prefix = mainSt.nextToken();
144         //OT
145             prefix = new StringBuffer(mainSt.nextToken());
146         else ///prefix += mainSt.nextToken();
147         //OT
148             prefix.append(mainSt.nextToken());
149         continue;
150       } else if(null != prefix) {
151         ///read += prefix;
152         //OT
153         read.append(prefix.toString());
154         prefix = null;
155       }
156 
157       if(token.equals("\"")){
158         ///read = mainSt.nextToken("\"");
159         //OT
160         read = new StringBuffer(mainSt.nextToken("\""));
161         if(read.equals("\"")) ///read = "";
162             read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
163         else {
164           //delete the remaining enclosing quote and restore the delimiters
165           mainSt.nextToken(ignorables + "\\\";=");
166         }
167 
168       } else if(token.equals("=")) {
169 
170         if(phase == 1){
171           ///attribute = read;
172           //OT
173           attribute = read.toString();
174           ///read = "";
175           //OT
176           read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
177           phase = 2;
178         }else throw new TokeniserException("Invalid attribute format: " +
179                                            read);
180       } else if(token.equals(";")) {
181         if(phase == 0){
182           ///type = read;
183           type = read.toString();
184           ///read = "";
185           read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
186           //Out.print("Type: " + type);
187           attributes.addLast(type);
188           values.addLast("");
189           phase = 1;
190         } else if(phase == 2) {
191           ///value = read;
192           value = read.toString();
193           ///read = "";
194           read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
195           phase = 3;
196         } else throw new TokeniserException("Invalid value format: " +
197                                            read);
198       } else ///read += token;
199             read.append(token);
200 
201       if(phase == 3) {
202         // Out.print("; " + attribute + "=" + value);
203         attributes.addLast(attribute);
204         values.addLast(value);
205         phase = 1;
206       }
207     }
208     //Out.println();
209     if(attributes.size() < 1)
210       throw new InvalidRuleException("Invalid right hand side " + rhs);
211     tokenDesc = new String[attributes.size()][2];
212 
213     for(int i = 0; i < attributes.size(); i++) {
214       tokenDesc[i][0] = (String)attributes.get(i);
215       tokenDesc[i][1] = (String)values.get(i);
216     }
217 
218     // for(int i = 0; i < attributes.size(); i++){
219     //    Out.println(tokenDesc[i][0] + "=" +
220     //                  tokenDesc[i][1]);
221     // }
222   } // buildTokenDesc
223 
224   /** Sets the right hand side associated with this state. The RHS is
225     * represented as a string value that will be parsed by the
226     * {@link #buildTokenDesc()} method being converted in a table of strings
227     * with 2 columns and as many lines as necessary.
228     * @param rhs the RHS string
229     */
230   void setRhs(String rhs) { this.rhs = rhs; }
231 
232   /** Returns the RHS string*/
233   String getRhs(){return rhs;}
234 
235   /** Checks whether this state is a final one*/
236   boolean isFinal() { return (null != rhs); }
237 
238   /** Returns the unique ID of this state.*/
239   int getIndex() { return myIndex; }
240 
241   /** Returns the token description associated with this state. This description
242     * is built by {@link #buildTokenDesc()} method and consists of a table of
243     * strings having two columns.
244     * The first line of the table contains the annotation type on the first
245     * position and nothing on the second.
246     * Each line after the first one contains a attribute on the first position
247     * and its associated value on the second.
248     */
249   String[][] getTokenDesc() {
250     return tokenDesc;
251   }
252 
253   /** A table of strings describing an annotation.
254     * The first line of the table contains the annotation type on the first
255     * position and nothing on the second.
256     * Each line after the first one contains a attribute on the first position
257     * and its associated value on the second.
258     */
259   String[][] tokenDesc;
260 
261   /** The transition function of this state.
262     */
263   DFSMState[] transitionFunction = new DFSMState[SimpleTokeniser.maxTypeId];
264 
265   /** The string of the RHS of the rule from which the token
266     * description is built
267     */
268   String rhs;
269 
270   /** The unique index of this state*/
271   int myIndex;
272 
273   /** Used to generate unique indices for all the objects of this class*/
274   static int index;
275 
276   static {
277     index = 0;
278   }
279 
280 } // class DFSMState
281