DFSMState.java |
1 /* 2 * DFSMState.java 3 * 4 * Copyright (c) 1998-2005, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Valentin Tablan, 27/06/2000 12 * 13 * $Id: DFSMState.java,v 1.21 2005/01/11 13:51:33 ian Exp $ 14 */ 15 16 /* 17 modified by OntoText, Aug 29 18 19 */ 20 21 package gate.creole.tokeniser; 22 23 import java.util.*; 24 25 /** Implements a state of the deterministic finite state machine of the 26 * tokeniser. 27 * It differs from {@link FSMState FSMState} by the definition of the 28 * transition function which in this case maps character types to other states 29 * as oposed to the transition function from FSMState which maps character 30 * types to sets of states, hence the nondeterministic character. 31 * @see FSMState 32 */ 33 class DFSMState implements java.io.Serializable { //extends FSMState{ 34 35 /** Debug flag */ 36 private static final boolean DEBUG = false; 37 38 /** Constructs a new DFSMState object and adds it to the list of deterministic 39 * states of the {@link DefaultTokeniser DefaultTokeniser} provided as owner. 40 * @param owner a {@link DefaultTokeniser DefaultTokeniser} object 41 */ 42 public DFSMState(SimpleTokeniser owner){ 43 myIndex = index++; 44 owner.dfsmStates.add(this); 45 } 46 47 /** Adds a new mapping in the transition function of this state 48 * @param type the UnicodeType for this mapping 49 * @param state the next state of the FSM Machine when a character of type type 50 * is read from the input. 51 */ 52 void put(UnicodeType type, DFSMState state){ 53 put(type.type, state); 54 } // put(UnicodeType type, DFSMState state) 55 56 /** Adds a new mapping using the actual index in the internal array. 57 * This method is for internal use only. Use 58 * {@link #put(gate.creole.tokeniser.UnicodeType, 59 * gate.creole.tokeniser.DFSMState)} instead. 60 */ 61 void put(int index, DFSMState state){ 62 transitionFunction[index] = state; 63 } // put(int index, DFSMState state) 64 65 /** This method is used to access the transition function of this state. 66 * @param type the Unicode type identifier as the corresponding static value 67 * on {@link java.lang.Character} 68 */ 69 DFSMState next(int type){//UnicodeType type){ 70 return transitionFunction[type]; 71 } // next 72 73 /** Returns a GML (Graph Modelling Language) representation of the edges 74 * emerging from this state 75 */ 76 String getEdgesGML(){ 77 ///String res = ""; 78 //OT 79 StringBuffer res = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 80 Set nextSet; 81 Iterator nextSetIter; 82 DFSMState nextState; 83 84 for(int i = 0; i< transitionFunction.length; i++){ 85 nextState = transitionFunction[i]; 86 if(null != nextState){ 87 /* 88 res += "edge [ source " + myIndex + 89 " target " + nextState.getIndex() + 90 " label \""; 91 res += SimpleTokeniser.typeMnemonics[i]; 92 res += "\" ]\n"; 93 */ 94 //OT 95 res.append("edge [ source "); 96 res.append(myIndex); 97 res.append(" target "); 98 res.append(nextState.getIndex()); 99 res.append(" label \""); 100 res.append(SimpleTokeniser.typeMnemonics[i]); 101 res.append("\" ]\n"); 102 } 103 }; 104 return res.toString(); 105 } // getEdgesGML 106 107 /** Builds the token description for the token that will be generated when 108 * this <b>final</b> state will be reached and the action associated with it 109 * will be fired. 110 * See also {@link #setRhs(String)}. 111 */ 112 void buildTokenDesc() throws TokeniserException{ 113 String ignorables = " \t\f"; 114 String token = null, 115 type = null, 116 attribute = null, 117 value = null 118 ///prefix = null, 119 ///read ="" 120 ; 121 //OT 122 StringBuffer prefix = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 123 StringBuffer read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 124 125 LinkedList attributes = new LinkedList(), 126 values = new LinkedList(); 127 StringTokenizer mainSt = 128 new StringTokenizer(rhs, ignorables + "\\\";=", true); 129 130 int descIndex = 0; 131 //phase means: 132 //0 == looking for type; 133 //1 == looking for attribute; 134 //2 == looking for value; 135 //3 == write the attr/value pair 136 int phase = 0; 137 138 while(mainSt.hasMoreTokens()) { 139 token = SimpleTokeniser.skipIgnoreTokens(mainSt); 140 141 if(token.equals("\\")){ 142 if(null == prefix) 143 ///prefix = mainSt.nextToken(); 144 //OT 145 prefix = new StringBuffer(mainSt.nextToken()); 146 else ///prefix += mainSt.nextToken(); 147 //OT 148 prefix.append(mainSt.nextToken()); 149 continue; 150 } else if(null != prefix) { 151 ///read += prefix; 152 //OT 153 read.append(prefix.toString()); 154 prefix = null; 155 } 156 157 if(token.equals("\"")){ 158 ///read = mainSt.nextToken("\""); 159 //OT 160 read = new StringBuffer(mainSt.nextToken("\"")); 161 if(read.equals("\"")) ///read = ""; 162 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 163 else { 164 //delete the remaining enclosing quote and restore the delimiters 165 mainSt.nextToken(ignorables + "\\\";="); 166 } 167 168 } else if(token.equals("=")) { 169 170 if(phase == 1){ 171 ///attribute = read; 172 //OT 173 attribute = read.toString(); 174 ///read = ""; 175 //OT 176 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 177 phase = 2; 178 }else throw new TokeniserException("Invalid attribute format: " + 179 read); 180 } else if(token.equals(";")) { 181 if(phase == 0){ 182 ///type = read; 183 type = read.toString(); 184 ///read = ""; 185 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 186 //Out.print("Type: " + type); 187 attributes.addLast(type); 188 values.addLast(""); 189 phase = 1; 190 } else if(phase == 2) { 191 ///value = read; 192 value = read.toString(); 193 ///read = ""; 194 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 195 phase = 3; 196 } else throw new TokeniserException("Invalid value format: " + 197 read); 198 } else ///read += token; 199 read.append(token); 200 201 if(phase == 3) { 202 // Out.print("; " + attribute + "=" + value); 203 attributes.addLast(attribute); 204 values.addLast(value); 205 phase = 1; 206 } 207 } 208 //Out.println(); 209 if(attributes.size() < 1) 210 throw new InvalidRuleException("Invalid right hand side " + rhs); 211 tokenDesc = new String[attributes.size()][2]; 212 213 for(int i = 0; i < attributes.size(); i++) { 214 tokenDesc[i][0] = (String)attributes.get(i); 215 tokenDesc[i][1] = (String)values.get(i); 216 } 217 218 // for(int i = 0; i < attributes.size(); i++){ 219 // Out.println(tokenDesc[i][0] + "=" + 220 // tokenDesc[i][1]); 221 // } 222 } // buildTokenDesc 223 224 /** Sets the right hand side associated with this state. The RHS is 225 * represented as a string value that will be parsed by the 226 * {@link #buildTokenDesc()} method being converted in a table of strings 227 * with 2 columns and as many lines as necessary. 228 * @param rhs the RHS string 229 */ 230 void setRhs(String rhs) { this.rhs = rhs; } 231 232 /** Returns the RHS string*/ 233 String getRhs(){return rhs;} 234 235 /** Checks whether this state is a final one*/ 236 boolean isFinal() { return (null != rhs); } 237 238 /** Returns the unique ID of this state.*/ 239 int getIndex() { return myIndex; } 240 241 /** Returns the token description associated with this state. This description 242 * is built by {@link #buildTokenDesc()} method and consists of a table of 243 * strings having two columns. 244 * The first line of the table contains the annotation type on the first 245 * position and nothing on the second. 246 * Each line after the first one contains a attribute on the first position 247 * and its associated value on the second. 248 */ 249 String[][] getTokenDesc() { 250 return tokenDesc; 251 } 252 253 /** A table of strings describing an annotation. 254 * The first line of the table contains the annotation type on the first 255 * position and nothing on the second. 256 * Each line after the first one contains a attribute on the first position 257 * and its associated value on the second. 258 */ 259 String[][] tokenDesc; 260 261 /** The transition function of this state. 262 */ 263 DFSMState[] transitionFunction = new DFSMState[SimpleTokeniser.maxTypeId]; 264 265 /** The string of the RHS of the rule from which the token 266 * description is built 267 */ 268 String rhs; 269 270 /** The unique index of this state*/ 271 int myIndex; 272 273 /** Used to generate unique indices for all the objects of this class*/ 274 static int index; 275 276 static { 277 index = 0; 278 } 279 280 } // class DFSMState 281