ParsingFunctions.java |
1 package gate.creole.morph; 2 3 import java.util.Stack; 4 /** 5 * <p>Title: ParsingFunctions.java </p> 6 * <p>Description: This class implements all static methods, which can be used 7 * for various purposes, like converting rules defined by users into the regular 8 * expressions, finding varilable type from its value type etc. </p> 9 */ 10 public class ParsingFunctions { 11 12 /** 13 * Default Constructor 14 */ 15 public ParsingFunctions() { 16 17 } 18 19 /** 20 * This method takes the value of the variable and tells the user 21 * what type of value is from CharacterRange, CharacterSet, StringSet 22 * @param varValue value for which to find the variable type 23 * @return ERROR_CODE = -4, STRING_SET_CODE = 0, CHARACTER_RANGE_CODE = 1, 24 * CHARACTER_SET_CODE = 2; 25 */ 26 public static int findVariableType(String varValue) { 27 // if the value starts with " it is string set 28 // if the value starts with "[-" it is a character range 29 // if the value starts with "[" it is a character set 30 // otherwise error 31 if(varValue==null) { 32 return Codes.ERROR_CODE; 33 } 34 35 if(varValue.length()>=3 && varValue.charAt(0)=='\"' 36 && (varValue.lastIndexOf('\"')==(varValue.length()-1))) { 37 // for string set it should be greater than 3 because 38 // it requires at least one character to make the string 39 // first and the last character should be " 40 return Codes.STRING_SET_CODE; 41 42 } else if(varValue.length()>=6 && (((varValue.length()-3)%3)==0) 43 && varValue.substring(0,2).equals("[-") 44 && varValue.charAt(varValue.length()-1)==']') { 45 // for the character range it should be greater than 6 because 46 // three characters as "[-" and "]" 47 // and finally to define the range character-character 48 return Codes.CHARACTER_RANGE_CODE; 49 50 } else if(varValue.length()>=3 && varValue.charAt(0)=='[' 51 && varValue.charAt(varValue.length()-1)==']') { 52 // for the character set it should be greater than 3 characters because 53 // it requires at least one character 54 // first and the last character should be [ and ] respectively 55 if(varValue.charAt(1)=='-') { 56 return Codes.ERROR_CODE; 57 } else { 58 return Codes.CHARACTER_SET_CODE; 59 } 60 61 } else { 62 // there are some errors 63 return Codes.ERROR_CODE; 64 } 65 66 } 67 68 /** 69 * This method checks for the string if it is a valid integer value 70 * @param value value to be checked for its type to be integer 71 * @return if value is an integer returns true, false otherwise 72 */ 73 public static boolean isInteger(String value) { 74 try { 75 int no = Integer.parseInt(value); 76 } catch(NumberFormatException nfe) { 77 return false; 78 } 79 return true; 80 } 81 82 /** 83 * This method checks for the string if it is a valid integer value 84 * @param value value to be checked for its type to be integer 85 * @return if value is an integer returns true, false otherwise 86 */ 87 public static boolean isBoolean(String value) { 88 if(value.equals("false") || value.equals("true")) { 89 return true; 90 } else { 91 return false; 92 } 93 } 94 95 /** 96 * This method convert the expression which has been entered by the user 97 * in the .rul file (i.e. rules defined by the user), into the expression 98 * which are recognized by the regular expression Patterns 99 * @param line rule defined by the user 100 * @param storage this method internally requires values of the used 101 * variables to replace the them with their values in the expression 102 * @return newly generated regular expression 103 */ 104 public static String convertToRegExp(String line,Storage storage) { 105 // replace all OR with | 106 line = line.replaceAll("( OR )", "|"); 107 line = line.replaceAll("(\\[\\-)","["); 108 109 // we will use the stack concept here 110 // for every occurence of '{', or '(' we will add that into the stack 111 // and for every occurence of '}' or ')' we will remove that element from 112 // the stack 113 // if the value found between the bracket is an integer value 114 // we won't replace those brackets 115 StringBuffer newExpr = new StringBuffer(line); 116 Stack stack = new Stack(); 117 Stack bracketIndexes = new Stack(); 118 119 for (int i = 0; i < newExpr.length(); i++) { 120 if (newExpr.charAt(i) == '{') { 121 // add it to the stack 122 stack.add("{"); 123 bracketIndexes.add(new Integer(i)); 124 125 } 126 else if (newExpr.charAt(i) == '(') { 127 // add it to the stack 128 stack.add("("); 129 bracketIndexes.add(new Integer(i)); 130 131 } 132 else if (newExpr.charAt(i) == '[') { 133 // add it to the stack 134 stack.add("["); 135 bracketIndexes.add(new Integer(i)); 136 137 } 138 else if (newExpr.charAt(i) == '\"') { 139 // before adding it to the stack, check if this is the closing one 140 if (stack.isEmpty() || !(((String)(stack.get(stack.size() - 1))).equals("\""))) { 141 // yes this is the opening one 142 // add it to the stack 143 stack.add("\""); 144 bracketIndexes.add(new Integer(i)); 145 } else { 146 // this is the closing one 147 String bracket = (String)(stack.pop()); 148 int index = ((Integer)(bracketIndexes.pop())).intValue(); 149 newExpr.setCharAt(index, '('); 150 newExpr.setCharAt(i, ')'); 151 } 152 } 153 else if (newExpr.charAt(i) == '}') { 154 // remove the element from the stack 155 // it must be '{', otherwise generate the error 156 String bracket = (String) (stack.pop()); 157 int index = ((Integer)(bracketIndexes.pop())).intValue(); 158 if (!bracket.equals("{")) { 159 return null; 160 } 161 162 // now check if the value between these brackets is integer, that means 163 // we don't need to change the brackets, otherwise change them to 164 // '(' and ')' 165 if (isInteger(newExpr.substring(index + 1, i))) { 166 // yes it is an integer 167 // continue 168 continue; 169 } 170 else { 171 // no it is string 172 newExpr.setCharAt(index, '('); 173 newExpr.setCharAt(i, ')'); 174 } 175 176 } 177 else if (newExpr.charAt(i) == ')') { 178 // remove the element from the stack 179 // it must be ')', otherwise generate the error 180 String bracket = (String) (stack.pop()); 181 int index = ( (Integer) (bracketIndexes.pop())).intValue(); 182 if (!bracket.equals("(")) { 183 return null; 184 } 185 continue; 186 } 187 else if (newExpr.charAt(i) == ']') { 188 // remove the element from the stack 189 // it must be '[', otherwise generate the error 190 String bracket = (String) (stack.pop()); 191 int index = ( (Integer) (bracketIndexes.pop())).intValue(); 192 if (!bracket.equals("[")) { 193 return null; 194 } 195 } 196 } 197 // check if all the stacks are empty then and only then the written 198 // expression is true, otherwise it is incorrect 199 if(!stack.empty() || !bracketIndexes.empty()) { 200 return null; 201 } 202 //System.out.println(line+" "+newExpr); 203 // now we need to replace the variables with their values 204 // but how would we know which is the variable 205 // so get the variable list and check if it is available in the expression 206 String [] varNames = storage.getVarNames(); 207 for(int i=0;i<varNames.length;i++) { 208 // check for the occurance of each varName in the expression 209 int index = -1; 210 String myString = "{[()]} "; 211 while((index=newExpr.indexOf(varNames[i],index+1))!=-1) { 212 //System.out.println(index + " "+newExpr.length()); 213 // now check for the left and right characters 214 if(index>0) { 215 if(myString.indexOf(newExpr.charAt(index-1))==-1) { 216 index = index +varNames[i].length()-1; 217 // this is not the varilable 218 continue; 219 } 220 } 221 if((varNames[i].length()+index)<newExpr.length()) { 222 if(myString.indexOf(newExpr.charAt(varNames[i].length()+index))==-1) { 223 index = index +varNames[i].length()-1; 224 // this is not the variable 225 continue; 226 } 227 } 228 229 // yes it is a variable 230 String replaceWith = "("+(String)(storage.get(varNames[i]))+")"; 231 newExpr.replace(index,(varNames[i].length()+index),replaceWith); 232 index = index + replaceWith.length(); 233 } 234 } 235 return new String(newExpr); 236 } 237 }