1   package gate.creole.morph;
2   
3   import java.lang.reflect.Method;
4   import java.net.MalformedURLException;
5   import java.net.URL;
6   import java.util.regex.Matcher;
7   import java.util.regex.Pattern;
8   
9   import gate.creole.ResourceInstantiationException;
10  
11  /**
12   * <p>Title: Interpret.java </p>
13   * <p>Description: This is the main class which which should be invoked
14   * to load the rule file in the system and then to execute the program
15   * to find the root word and the affix to it.</p>
16   */
17  public class Interpret {
18  
19    /**
20     * instance of the ReadFile class which reads the file and stores each line of
21     * the given program in the arraylist which can be read using different
22     * methods of the ReadFile class
23     */
24    private ReadFile file;
25  
26    /** Boolean variables to keep track on which section is being read */
27    private boolean isDefineVarSession, isDefineRulesSession;
28  
29    /** Instance of Storage class, which is used store all the variables details*/
30    private Storage variables;
31  
32    /** This variables keeps the record of available methods for the morphing */
33    private Method[] methods;
34  
35    /** This varilable stores the compiles versions of rules */
36    private CompiledRules rules;
37  
38    /** This variables holds the affix */
39    private String affix;
40  
41    /**
42     * Constructor
43     */
44    public Interpret() {
45  
46    }
47  
48    /**
49     * It starts the actual program
50     * @param ruleFileName
51     */
52    public void init(URL ruleFileURL) throws ResourceInstantiationException {
53      variables = new Storage();
54      prepareListOfMorphMethods();
55      rules = new CompiledRules();
56      file = new ReadFile(ruleFileURL);
57      affix = null;
58      isDefineRulesSession = false;
59      isDefineVarSession = false;
60      readProgram();
61      interpretProgram();
62    }
63  
64    /**
65     * Once all the rules have been loaded in the system, now its time to start
66     * the morpher, which will find out the base word rule
67     * @param word input to the program
68     * @return root word
69     */
70    public String runMorpher(String word, String category) {
71      affix = null;
72      rules.resetPointer();
73      // do the pattern matching process with each and every available pattern
74      // until the pattern is found which can accomodate the given word
75      while (rules.hasNext()) {
76  
77  
78        // load the pattern
79        MyPattern myPatInst = rules.getNext();
80  
81        // first check if this pattern should be considered to match
82        if(!myPatInst.isSameCategory(category)) {
83          continue;
84        }
85  
86        // proceed only iof the pattern is not null
87        if (myPatInst != null) {
88  
89          // find the actual pattern
90          Pattern pat = myPatInst.getPattern();
91          Matcher m = pat.matcher(word);
92  
93          // check if this pattern can accomodate the given word
94          if (m.matches()) {
95            // yes it can, so find the name of the function which should be
96            // called to find it's root word
97            String function = myPatInst.getFunction();
98  
99            // call the appropriate function
100           String methodName = getMethodName(function);
101           String[] parameters = getParameterValues(function);
102 
103           // check all the available in built methods and run the
104           // appropriate one
105           for (int i = 0; i < methods.length; i++) {
106 
107             // preparing paramters for the comparision of two
108             // methods
109             int len = methodName.length();
110             String currentMethod = methods[i].toString();
111             int len1 = currentMethod.length();
112 
113             if (len < len1) {
114               currentMethod = currentMethod.substring(len1 - len + 1, len1);
115               if (currentMethod.trim().equals(methodName.trim())) {
116 
117                 // yes two methods are equivalent
118                 // so call that method of MorphFunctions
119                 MorphFunctions morphInst = new MorphFunctions();
120 
121                 // set the given word in that morph program
122                 morphInst.setInput(word);
123 
124                 // and finally call the appropriate method to find the root
125                 // word and the affix
126                 if (methods[i].getName().equals("irreg_stem")) {
127                   String answer = morphInst.irreg_stem(parameters[0],
128                                                        parameters[1]);
129                   this.affix = morphInst.getAffix();
130                   return answer;
131                 }
132                 else if (methods[i].getName().equals("null_stem")) {
133                   //return word;
134                   String answer = morphInst.null_stem();
135                   this.affix = morphInst.getAffix();
136                   return answer;
137                 }
138                 else if (methods[i].getName().equals("semi_reg_stem")) {
139                   String answer = morphInst.semi_reg_stem(
140                                     Integer.parseInt(parameters[0]),
141                                     parameters[1]);
142                   this.affix = morphInst.getAffix();
143                   return answer;
144                 }
145                 else if (methods[i].getName().equals("stem")) {
146                   String answer = morphInst.stem(
147                                     Integer.parseInt(parameters[0]),
148                                     parameters[1],
149                                     parameters[2]);
150                   this.affix = morphInst.getAffix();
151                   return answer;
152                 }
153               }
154             }
155           }
156         }
157       }
158     }
159     // no rule matched so say no matching found
160     affix = null;
161     return word;
162 
163   }
164 
165   /**
166    * This method is used to find the method definition
167    * But it can recognize only String, boolean and int types
168    * for Example: stem(2,"ed","d") ==>
169    *              stem(int,java.lang.String,java.lang.String);
170    * @param method
171    * @return the definition of the method
172    */
173   private String getMethodName(String method) {
174     // find the first index of '('
175     int index = method.indexOf('(');
176     String methodName = method.substring(0, index) + "(";
177 
178     // now get the parameter types
179     String[] parameters =
180         method.substring(index + 1, method.length() - 1).split(",");
181 
182     // find the approapriate type
183     for (int i = 0; i < parameters.length; i++) {
184       if (parameters[i].startsWith("\"") && parameters[i].endsWith("\"")) {
185         methodName = methodName + "java.lang.String";
186       }
187       else if (ParsingFunctions.isBoolean(parameters[i])) {
188         methodName = methodName + "boolean";
189       }
190       else if (ParsingFunctions.isInteger(parameters[i])) {
191         methodName = methodName + "int";
192       }
193       if ( (i + 1) < parameters.length) {
194         methodName = methodName + ",";
195       }
196     }
197     methodName = methodName + ")";
198     return methodName;
199   }
200 
201   /**
202    * This method finds the actual parameter values
203    * @param method from which parameters are required to be found
204    * @return parameter values
205    */
206   private String[] getParameterValues(String method) {
207     // now first find the name of the method
208     // their parameters and their types
209     int index = method.indexOf("(");
210 
211     // now get the parameters
212     String[] parameters =
213         method.substring(index + 1, method.length() - 1).split(",");
214 
215     // process each parameter
216     for (int i = 0; i < parameters.length; i++) {
217       // we need to remove " from String
218       if (parameters[i].startsWith("\"") && parameters[i].endsWith("\"")) {
219         parameters[i] = parameters[i].substring(1, parameters[i].length() - 1);
220         continue;
221       }
222     }
223     return parameters;
224   }
225 
226   /**
227    * This method prepares the list of available methods in the MorphFunctions
228    * class
229    */
230   private void prepareListOfMorphMethods()
231       throws ResourceInstantiationException  {
232     methods = MorphFunctions.class.getDeclaredMethods();
233   }
234 
235   /**
236    * read the program file
237    */
238   private void readProgram() throws ResourceInstantiationException {
239     // read the program file
240     boolean readStatus = file.read();
241 
242     // check if read was success
243     if (!readStatus) {
244       //not it wasn't so simply display the message and ask user to check it
245       generateError("Some errors reading program file.. please check the" +
246                          "program and try again");
247     }
248   }
249 
250   /**
251    * This method reads each line of the program and interpret them
252    */
253   private void interpretProgram() throws ResourceInstantiationException  {
254     // read each line and parse it
255     while (file.hasNext()) {
256       String currentLine = file.getNext();
257 
258       if (currentLine == null || currentLine.trim().length() == 0) {
259         continue;
260       }
261 
262       // remove all the leading spaces
263       currentLine = currentLine.trim();
264 
265       /* if commandType is 0 ==> defineVars command
266        * if commandType is 1 ==> defineRules command
267        * if commandType is 2 ==> variable declaration
268        * if commandType is 3 ==> rule declaration
269        * otherwise // unknown generate error
270        */
271       int commandType = findCommandType(currentLine);
272       switch (commandType) {
273         case -1:
274           //comment command
275           continue;
276         case 0:
277           //defineVars command
278           defineVarsCommand();
279           break;
280         case 1:
281           //defineRules command
282           defineRulesCommand();
283           break;
284         case 2:
285           //variable declaration
286           variableDeclarationCommand(currentLine);
287           break;
288         case 3:
289           // rule declaration
290           ruleDeclarationCommand(currentLine);
291           break;
292         default:
293           generateError("Syntax Error at line " + file.getPointer()
294                         + " : " + currentLine);
295           break;
296       }
297     } // end while
298   }
299 
300   /**
301    * This method interprets the line and finds out the type of command and
302    * returns the integer indicating the type of the command
303    * @param line The program command to be interpreted
304    * @return and <tt>int</tt> value
305    */
306   private int findCommandType(String line) {
307 
308     // check for the comment command
309     if (line.substring(0, 2).equals("//") || line.charAt(0)=='#') {
310       return -1;
311     }
312     else if (line.equals("defineVars")) {
313       return 0;
314     }
315     else if (line.equals("defineRules")) {
316       return 1;
317     }
318     else if (isDefineVarSession && line.split("==>").length == 2) {
319       return 2;
320     }
321     else if (isDefineRulesSession &&
322              /*(line.charAt(0) == '{' ||
323               line.charAt(0) == '[' ||
324               line.charAt(0) == '(' ||
325               line.charAt(0) == '\"')*/ (line.charAt(0) == '<') &&
326              line.split("==>").length == 2) {
327       return 3;
328     }
329     else {
330       return Codes.ERROR_CODE;
331     }
332   }
333 
334   /**
335    * This method processes the command to define the variable section
336    */
337   private void defineVarsCommand()  throws ResourceInstantiationException {
338 
339     // variable section can only be defined once
340     if (isDefineVarSession) {
341       generateError("Variable Section already defined - " +
342                     "see line " + file.getPointer());
343     }
344     else if (isDefineRulesSession) {
345       generateError("Variable section must be declared before the Rule " +
346                     "Section - see line " + file.getPointer());
347     }
348     else {
349       isDefineVarSession = true;
350     }
351   }
352 
353   /**
354    * This method processes the command to define the rule section
355    */
356   private void defineRulesCommand() throws ResourceInstantiationException  {
357     if (isDefineRulesSession) {
358       generateError("Rule Section already defined - see " +
359                     "line " + file.getPointer());
360     }
361     else {
362       isDefineVarSession = false;
363       isDefineRulesSession = true;
364     }
365   }
366 
367   /**
368    * This method processes the command to declare the variable
369    * @param line
370    */
371   private void variableDeclarationCommand(String line)
372       throws ResourceInstantiationException  {
373     // ok so first find the variable name and the value for it
374     String varName = (line.split("==>"))[0].trim();
375     String varValue = (line.split("==>"))[1].trim();
376 
377     // find the type of variable it is
378     int valueType = ParsingFunctions.findVariableType(varValue.trim());
379     if (valueType == Codes.ERROR_CODE) {
380       generateError(varName + " - Variable Syntax Error - see " +
381                     "line" + file.getPointer() + " : " + line);
382     }
383 
384     // based on the variable type create the instance
385     Variable varInst = null;
386     switch (valueType) {
387       case Codes.CHARACTER_RANGE_CODE:
388         varInst = new CharacterRange();
389         break;
390       case Codes.CHARACTER_SET_CODE:
391         varInst = new CharacterSet();
392         break;
393       case Codes.STRING_SET_CODE:
394         varInst = new StringSet();
395         break;
396     }
397 
398     // set the values in the variable
399     if (!varInst.set(varName, varValue)) {
400       generateError(varName + " - Syntax Error while assigning value to the " +
401                     "variable - see line" + file.getPointer() + " : " + line);
402     }
403 
404     // and finally add the variable in
405     if (!variables.add(varName, varInst.getPattern())) {
406       generateError(varName.trim() + " - Variable already defined - see " +
407                     "line " + file.getPointer() + " : " + line);
408     }
409 
410     varInst.resetPointer();
411   }
412 
413   /**
414    * This method processes the command to declare the rule
415    * @param line
416    */
417   private void ruleDeclarationCommand(String line)
418       throws ResourceInstantiationException  {
419     // lets divide the rule into two parts
420     // LHS and RHS.
421     // LHS is a part which requires to be parsed and
422     // RHS should be checked for the legal function name and valid arguments
423     // we process RHS first and then the LHS
424     int LHS = 0;
425     int RHS = 1;
426     String[] ruleParts = line.split("==>");
427     if (ruleParts.length != 2) {
428       generateError("Error in declaring rule at line : " +
429                     file.getPointer() + " : " + line);
430     }
431 
432     // now check if the method which has been called in this rule actually
433     // available in the MorphFunction Class
434     String methodCalled = ruleParts[1].trim();
435     if (!isMethodAvailable(methodCalled)) {
436 
437       // no method is not available so print the syntax error
438       generateError("Syntax error - method does not exists - see " +
439                     "line " + file.getPointer() + " : " + line);
440     }
441 
442     // so RHS part is Ok
443     // now we need to check if LHS is written properly
444     // and convert it to the pattern that is recognized by the java
445     String category = "";
446     // we need to find out the category
447     int i = 1;
448     for(;i<ruleParts[0].length();i++) {
449       if(ruleParts[0].charAt(i) == '>')
450         break;
451       category = category + ruleParts[0].charAt(i);
452     }
453     if(i >= ruleParts[0].length()) {
454       generateError("Syntax error - pattern not written properly - see " +
455                     "line " + file.getPointer() + " : " + line);
456     }
457 
458     ruleParts[0] = ruleParts[0].substring(i+1, ruleParts[0].length());
459     String newPattern = ParsingFunctions.convertToRegExp(ruleParts[0],
460                                                          variables);
461     if (newPattern == null) {
462       generateError("Syntax error - pattern not written properly - see " +
463                     "line " + file.getPointer() + " : " + line);
464     }
465 
466     // we need to compile this pattern and finally add into the compiledRules
467     boolean result = rules.add(newPattern, ruleParts[1], category);
468     if (!result) {
469       // there was some error in the expression so generate the error
470       generateError("Syntax error - pattern not declared properly - see" +
471                     "line " + file.getPointer() + " : " + line);
472     }
473   }
474 
475   /**
476    * This method takes a method signature and searches if the method
477    * @param method
478    * @return a <tt>boolean</tt> value.
479    */
480   private boolean isMethodAvailable(String method) {
481     // now first find the name of the method
482     // their parameters and their types
483     int index = method.indexOf("(");
484     if (index == -1 || index == 0 ||
485         method.charAt(method.length() - 1) != ')') {
486       return false;
487     }
488 
489     String methodName = method.substring(0, index);
490     // now get the parameters
491 
492     String[] parameters;
493     int[] userMethodParams;
494 
495     String arguments = method.substring(index + 1, method.length() - 1);
496     if (arguments == null || arguments.trim().length() == 0) {
497       parameters = null;
498       userMethodParams = null;
499     }
500     else {
501       parameters = method.substring(index + 1, method.length() - 1).split(",");
502       userMethodParams = new int[parameters.length];
503     }
504 
505     // find the parameter types
506     // here we define only three types of arguments
507     // String, boolean and int
508     if (parameters != null) {
509       for (int i = 0; i < parameters.length; i++) {
510         if (parameters[i].startsWith("\"") && parameters[i].endsWith("\"")) {
511           userMethodParams[i] = 7;
512           parameters[i] = "java.lang.String";
513           continue;
514         }
515         else if (ParsingFunctions.isBoolean(parameters[i])) {
516           userMethodParams[i] = 6;
517           parameters[i] = "boolean";
518         }
519         else if (ParsingFunctions.isInteger(parameters[i])) {
520           userMethodParams[i] = 2;
521           parameters[i] = "int";
522         }
523         else {
524           // type cannot be recognized so generate error
525           return false;
526         }
527       }
528     }
529 
530     // now parameters have been found, so check them with the available methods
531     // in the morph function
532     Outer:for (int i = 0; i < methods.length; i++) {
533       if (methods[i].getName().equals(methodName)) {
534         // yes method has found now check for the parameters compatibility
535         Class[] methodParams = methods[i].getParameterTypes();
536         // first check for the number of parameters
537         if (methods[i].getName().equals("null_stem")) {
538           return true;
539         }
540         if (methodParams.length == parameters.length) {
541           // yes arity has matched
542           // now set the precedence
543           int[] paramPrecedence = new int[methodParams.length];
544 
545           // assign precedence
546           for (int j = 0; j < methodParams.length; j++) {
547             if (methodParams[j].getName().equals("java.lang.String"))
548               paramPrecedence[j] = 7;
549             else if (methodParams[j].getName().equals("boolean"))
550               paramPrecedence[j] = 6;
551             else if (methodParams[j].getName().equals("int"))
552               paramPrecedence[j] = 2;
553             else
554               return false;
555           }
556 
557           // if we are here that means all the type matched
558           // so valid method declaration
559           return true;
560         }
561       }
562     }
563     // if we are here that means method doesnot found
564     return false;
565   }
566 
567   /**
568    * Generates the error and stop the execution
569    * @param mess - message to be displayed as an error on the standard output
570    */
571   private void generateError(String mess)
572       throws ResourceInstantiationException {
573     System.out.println("\n\n" + mess);
574     System.out.println("Program terminated...");
575     throw new ResourceInstantiationException("\n\n"+mess);
576   }
577 
578   /**
579    * Main method
580    * @param args
581    */
582   public static void main(String[] args)
583       throws ResourceInstantiationException {
584     if (args == null || args.length < 3) {
585       System.out.println("Usage : Interpret <Rules fileName> <word> <POS>");
586       System.exit( -1);
587     }
588     Interpret interpret = new Interpret();
589     try{
590       interpret.init(new URL((String)args[0]));
591     }catch(MalformedURLException mue){
592       throw new RuntimeException(mue);
593     }
594     String rootWord = interpret.runMorpher(args[1], args[2]);
595     String affix = interpret.getAffix();
596     System.out.println("Root : "+rootWord);
597     System.out.println("affix : "+affix);
598   }
599 
600   /**
601    * This method tells what was the affix to the provided word
602    * @return affix
603    */
604   public String getAffix() {
605     return this.affix;
606   }
607 }