1   package gate.creole.morph;
2   
3   import java.util.Stack;
4   /**
5    * <p>Title: ParsingFunctions.java </p>
6    * <p>Description: This class implements all static methods, which can be used
7    * for various purposes, like converting rules defined by users into the regular
8    * expressions, finding varilable type from its value type etc. </p>
9    */
10  public class ParsingFunctions {
11  
12    /**
13     * Default Constructor
14     */
15    public ParsingFunctions() {
16  
17    }
18  
19    /**
20     * This method takes the value of the variable and tells the user
21     * what type of value is from CharacterRange, CharacterSet, StringSet
22     * @param varValue value for which to find the variable type
23     * @return ERROR_CODE = -4, STRING_SET_CODE = 0, CHARACTER_RANGE_CODE = 1,
24     * CHARACTER_SET_CODE = 2;
25     */
26    public static int findVariableType(String varValue) {
27      // if the value starts with " it is string set
28      // if the value starts with "[-" it is a character range
29      // if the value starts with "[" it is a character set
30      // otherwise error
31      if(varValue==null) {
32        return Codes.ERROR_CODE;
33      }
34  
35      if(varValue.length()>=3 && varValue.charAt(0)=='\"'
36         && (varValue.lastIndexOf('\"')==(varValue.length()-1))) {
37        // for string set it should be greater than 3 because
38        // it requires at least one character to make the string
39        // first and the last character should be "
40        return Codes.STRING_SET_CODE;
41  
42      } else if(varValue.length()>=6 && (((varValue.length()-3)%3)==0)
43                && varValue.substring(0,2).equals("[-")
44                && varValue.charAt(varValue.length()-1)==']') {
45       // for the character range it should be greater than 6 because
46       // three characters as "[-" and "]"
47       // and finally to define the range character-character
48       return Codes.CHARACTER_RANGE_CODE;
49  
50      } else if(varValue.length()>=3 && varValue.charAt(0)=='['
51                && varValue.charAt(varValue.length()-1)==']') {
52          // for the character set it should be greater than 3 characters because
53          // it requires at least one character
54          // first and the last character should be [ and ] respectively
55          if(varValue.charAt(1)=='-') {
56            return Codes.ERROR_CODE;
57          } else {
58            return Codes.CHARACTER_SET_CODE;
59          }
60  
61      } else {
62        // there are some errors
63        return Codes.ERROR_CODE;
64      }
65  
66    }
67  
68    /**
69     * This method checks for the string if it is a valid integer value
70     * @param value value to be checked for its type to be integer
71     * @return if value is an integer returns true, false otherwise
72     */
73    public static boolean isInteger(String value) {
74      try {
75        int no = Integer.parseInt(value);
76      } catch(NumberFormatException nfe) {
77        return false;
78      }
79      return true;
80    }
81  
82    /**
83      * This method checks for the string if it is a valid integer value
84      * @param value value to be checked for its type to be integer
85      * @return if value is an integer returns true, false otherwise
86      */
87     public static boolean isBoolean(String value) {
88       if(value.equals("false") || value.equals("true")) {
89         return true;
90       } else {
91         return false;
92       }
93     }
94  
95     /**
96      * This method convert the expression which has been entered by the user
97      * in the .rul file (i.e. rules defined by the user), into the expression
98      * which are recognized by the regular expression Patterns
99      * @param line rule defined by the user
100     * @param storage this method internally requires values of the used
101     * variables to replace the them with their values in the expression
102     * @return newly generated regular expression
103     */
104    public static String convertToRegExp(String line,Storage storage) {
105      // replace all OR with |
106      line = line.replaceAll("( OR )", "|");
107      line = line.replaceAll("(\\[\\-)","[");
108 
109      // we will use the stack concept here
110      // for every occurence of '{', or '(' we will add that into the stack
111      // and for every occurence of '}' or ')' we will remove that element from
112      // the stack
113      // if the value found between the bracket is an integer value
114      // we won't replace those brackets
115      StringBuffer newExpr = new StringBuffer(line);
116      Stack stack = new Stack();
117      Stack bracketIndexes = new Stack();
118 
119      for (int i = 0; i < newExpr.length(); i++) {
120        if (newExpr.charAt(i) == '{') {
121          // add it to the stack
122          stack.add("{");
123          bracketIndexes.add(new Integer(i));
124 
125        }
126        else if (newExpr.charAt(i) == '(') {
127          // add it to the stack
128          stack.add("(");
129          bracketIndexes.add(new Integer(i));
130 
131        }
132        else if (newExpr.charAt(i) == '[') {
133          // add it to the stack
134          stack.add("[");
135          bracketIndexes.add(new Integer(i));
136 
137        }
138        else if (newExpr.charAt(i) == '\"') {
139          // before adding it to the stack, check if this is the closing one
140          if (stack.isEmpty() || !(((String)(stack.get(stack.size() - 1))).equals("\""))) {
141            // yes this is the opening one
142            // add it to the stack
143            stack.add("\"");
144            bracketIndexes.add(new Integer(i));
145          } else {
146            // this is the closing one
147            String bracket = (String)(stack.pop());
148            int index = ((Integer)(bracketIndexes.pop())).intValue();
149            newExpr.setCharAt(index, '(');
150            newExpr.setCharAt(i, ')');
151         }
152        }
153        else if (newExpr.charAt(i) == '}') {
154          // remove the element from the stack
155          // it must be '{', otherwise generate the error
156          String bracket = (String) (stack.pop());
157          int index = ((Integer)(bracketIndexes.pop())).intValue();
158          if (!bracket.equals("{")) {
159            return null;
160          }
161 
162          // now check if the value between these brackets is integer, that means
163          // we don't need to change the brackets, otherwise change them to
164          // '(' and ')'
165          if (isInteger(newExpr.substring(index + 1, i))) {
166            // yes it is an integer
167            // continue
168            continue;
169          }
170          else {
171            // no it is string
172            newExpr.setCharAt(index, '(');
173            newExpr.setCharAt(i, ')');
174          }
175 
176        }
177        else if (newExpr.charAt(i) == ')') {
178          // remove the element from the stack
179          // it must be ')', otherwise generate the error
180          String bracket = (String) (stack.pop());
181          int index = ( (Integer) (bracketIndexes.pop())).intValue();
182          if (!bracket.equals("(")) {
183            return null;
184          }
185          continue;
186        }
187        else if (newExpr.charAt(i) == ']') {
188          // remove the element from the stack
189          // it must be '[', otherwise generate the error
190          String bracket = (String) (stack.pop());
191          int index = ( (Integer) (bracketIndexes.pop())).intValue();
192          if (!bracket.equals("[")) {
193            return null;
194          }
195        }
196      }
197      // check if all the stacks are empty then and only then the written
198      // expression is true, otherwise it is incorrect
199      if(!stack.empty() || !bracketIndexes.empty()) {
200        return null;
201      }
202      //System.out.println(line+"  "+newExpr);
203      // now we need to replace the variables with their values
204      // but how would we know which is the variable
205      // so get the variable list and check if it is available in the expression
206      String [] varNames = storage.getVarNames();
207      for(int i=0;i<varNames.length;i++) {
208        // check for the occurance of each varName in the expression
209        int index = -1;
210        String myString = "{[()]} ";
211        while((index=newExpr.indexOf(varNames[i],index+1))!=-1) {
212          //System.out.println(index + "  "+newExpr.length());
213          // now check for the left and right characters
214          if(index>0) {
215            if(myString.indexOf(newExpr.charAt(index-1))==-1) {
216              index = index +varNames[i].length()-1;
217              // this is not the varilable
218              continue;
219            }
220          }
221          if((varNames[i].length()+index)<newExpr.length()) {
222            if(myString.indexOf(newExpr.charAt(varNames[i].length()+index))==-1) {
223              index = index +varNames[i].length()-1;
224              // this is not the variable
225              continue;
226           }
227          }
228 
229          // yes it is a variable
230          String replaceWith = "("+(String)(storage.get(varNames[i]))+")";
231          newExpr.replace(index,(varNames[i].length()+index),replaceWith);
232          index = index + replaceWith.length();
233        }
234      }
235      return new String(newExpr);
236    }
237 }