1   /*
2    *  GateFormatXmlDocumentHandler.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Cristian URSU,  22 Nov 2000
12   *
13   *  $Id: GateFormatXmlDocumentHandler.java,v 1.32 2006/02/21 17:31:46 cursu Exp $
14   */
15  
16  package gate.xml;
17  
18  import java.lang.reflect.Constructor;
19  import java.util.*;
20  
21  import org.xml.sax.*;
22  import org.xml.sax.helpers.DefaultHandler;
23  
24  import gate.*;
25  import gate.corpora.DocumentContentImpl;
26  import gate.corpora.DocumentImpl;
27  import gate.event.StatusListener;
28  import gate.util.*;
29  
30  
31  /**
32    * Implements the behaviour of the XML reader. This is the reader for
33    * Gate Xml documents saved with DocumentImplementation.toXml() method.
34    */
35  public class GateFormatXmlDocumentHandler extends DefaultHandler{
36    /** Debug flag */
37    private static final boolean DEBUG = false;
38  
39    /** This is used to capture all data within two tags before calling the actual characters method */
40    private StringBuffer contentBuffer = new StringBuffer("");
41  
42    /** This is a variable that shows if characters have been read */
43    private boolean readCharacterStatus = false;
44  
45  
46    /** An OLD GATE XML format is the one in which Annotations IDs are not present */
47    private static final int OLD = 1;
48    /** A NEW GATE XML format is the one in which Annotations IDs are present */
49    private static final int NEW = 2;
50    /** This value signifies that the document being read can be either OLD or NEW*/
51    private static final int UNDEFINED = 0;
52  
53    /** In the beginning we don't know the type of GATE XML format that we read.
54     * We need to be able to read both types, but not a mixture of them
55     */
56    private int gateXmlFormatType = UNDEFINED;
57  
58    /** A Set recording every annotation ID read from the XML file.
59     * It is used to check the consistency of the annotations being read.
60     * At the end we need the maximum ID in order to set the annotation
61     * ID generator on the document. This is why we need a TreeSet.
62     */
63    private TreeSet annotationIdSet = new TreeSet();
64  
65    /**
66      */
67    public GateFormatXmlDocumentHandler(gate.Document aDocument){
68      // This string contains the plain text (the text without markup)
69      tmpDocContent = new StringBuffer(aDocument.getContent().size().intValue());
70  
71      // Colector is used later to transform all custom objects into annotation
72      // objects
73      colector = new LinkedList();
74  
75      // The Gate document
76      doc = aDocument;
77      currentAnnotationSet = doc.getAnnotations();
78    }//GateFormatXmlDocumentHandler
79  
80    /**
81      * This method is called when the SAX parser encounts the beginning of the
82      * XML document.
83      */
84    public void startDocument() throws org.xml.sax.SAXException {
85    }// startDocument
86  
87    /**
88      * This method is called when the SAX parser encounts the end of the
89      * XML document.
90      * Here we set the content of the gate Document to be the one generated
91      * inside this class (tmpDocContent).
92      * After that we use the colector to generate all the annotation reffering
93      * this new gate document.
94      */
95    public void endDocument() throws org.xml.sax.SAXException {
96  
97      // replace the document content with the one without markups
98      doc.setContent(new DocumentContentImpl(tmpDocContent.toString()));
99      //long docSize = doc.getContent().size().longValue();
100 
101     // If annotations were present in the NEW GATE XML document format,
102     // set the document generator to start from th next MAX Annot ID value
103     if (gateXmlFormatType == NEW && !annotationIdSet.isEmpty()){
104       // Because  annotationIdSet is a TreeSet its elements are already sorted.
105       // The last element will contain the maximum value
106       Integer maxAnnotID = (Integer) annotationIdSet.last();
107       // Set the document generator to start from the maxAnnotID value
108       ((DocumentImpl)doc).setNextAnnotationId(maxAnnotID.intValue() + 1);
109       // Dispose of the annotationIdSet
110       annotationIdSet = null;
111     }//fi
112 
113     // fire the status listener
114     fireStatusChangedEvent("Total elements: " + elements);
115 
116   }// endDocument
117 
118   /**
119     * This method is called when the SAX parser encounts the beginning of an
120     * XML element.
121     */
122   public void startElement (String uri, String qName, String elemName,
123                                                              Attributes atts) throws SAXException {
124 
125     // call characterActions
126     if(readCharacterStatus) {
127       readCharacterStatus = false;
128       charactersAction(new String(contentBuffer).toCharArray(),0,contentBuffer.length());
129     }
130 
131     // Inform the progress listener to fire only if no of elements processed
132     // so far is a multiple of ELEMENTS_RATE
133     if ((++elements % ELEMENTS_RATE) == 0 )
134         fireStatusChangedEvent("Processed elements : " + elements);
135 
136     // Set the curent element being processed
137     currentElementStack.add(elemName);
138 
139     if("AnnotationSet".equals(elemName))
140       processAnnotationSetElement(atts);
141 
142     if("Annotation".equals(elemName))
143       processAnnotationElement(atts);
144 
145     if("Feature".equals(elemName))
146       processFeatureElement(atts);
147 
148     if("Name".equals(elemName))
149       processNameElement(atts);
150 
151     if("Value".equals(elemName))
152       processValueElement(atts);
153 
154     if("Node".equals(elemName))
155       processNodeElement(atts);
156   }// startElement
157 
158   /**
159     * This method is called when the SAX parser encounts the end of an
160     * XML element.
161     */
162     public void endElement (String uri, String qName, String elemName )
163                                                            throws SAXException{
164 
165       // call characterActions
166       if(readCharacterStatus) {
167         readCharacterStatus = false;
168         charactersAction(new String(contentBuffer).toCharArray(),0,contentBuffer.length());
169       }
170 
171     currentElementStack.pop();
172     // Deal with Annotation
173     if ("Annotation".equals(elemName)){
174       if (currentFeatureMap == null)
175         currentFeatureMap = Factory.newFeatureMap();
176       currentAnnot.setFM(currentFeatureMap);
177       colector.add(currentAnnot);
178       // Reset current Annot and current featue map
179       currentAnnot = null;
180       currentFeatureMap = null;
181       return;
182     }// End if
183     // Deal with Value
184     if ("Value".equals(elemName) && "Feature".equals(
185                         (String)currentElementStack.peek())){
186       // If the Value tag was empty, then an empty string will be created.
187       if (currentFeatureValue == null) currentFeatureValue = "";
188     }// End if
189     // Deal with Feature
190     if ("Feature".equals(elemName)){
191       if(currentFeatureName == null){
192         // Cannot add the (key,value) pair to the map
193         // One of them is null something was wrong in the XML file.
194         throw new GateSaxException("A feature name was empty." +
195           "The annotation that cause it is " +
196           currentAnnot +
197           ".Please check the document with a text editor before trying again.");
198       }else {
199         if (currentFeatureMap == null){
200           // The XMl file was somehow altered and a start Feature wasn't found.
201           throw new GateSaxException("Document not consistent. A start"+
202           " feature element is missing. " +
203           "The annotation that cause it is " +
204           currentAnnot +
205           "Please check the document with a text editor before trying again.");
206         }// End if
207         // Create the appropiate feature name and values
208         // If those object cannot be created, their string representation will
209         // be used.
210         currentFeatureMap.put(createFeatKey(),createFeatValue());
211 //        currentFeatureMap.put(currentFeatureName,currentFeatureValue);
212         // Reset current key
213         currentFeatureKeyClassName = null;
214         currentFeatureKeyItemClassName = null;
215         currentFeatureName = null;
216         // Reset current value
217         currentFeatureValueClassName = null;
218         currentFeatureValueItemClassName = null;
219         currentFeatureValue = null;
220       }// End if
221       // Reset the Name & Value pair.
222       currentFeatureName = null;
223       currentFeatureValue = null;
224       return;
225     }//End if
226     // Deal GateDocumentFeatures
227     if ("GateDocumentFeatures".equals(elemName)){
228       if (currentFeatureMap == null)
229         currentFeatureMap = Factory.newFeatureMap();
230       doc.setFeatures(currentFeatureMap);
231       currentFeatureMap = null;
232       return;
233     }// End if
234 
235     // Deal with AnnotationSet
236     if ("AnnotationSet".equals(elemName)){
237       // Create and add annotations to the currentAnnotationSet
238       Iterator iterator = colector.iterator();
239       while (iterator.hasNext()){
240         AnnotationObject annot = (AnnotationObject) iterator.next();
241         // Clear the annot from the colector
242         iterator.remove();
243 
244         // Create a new annotation and add it to the annotation set
245         try{
246 
247           // This is the result of a code-fix.The XML writter has been modified
248           // to serialize the annotation ID.In order to keep backward compatibility
249           // with previously saved documents we had to keep the old code(where the id
250           // is not added) in place.
251           // If the document presents a mixture of the two formats, then error is signaled
252 
253           // Check if the Annotation ID is present or not
254           if (annot.getId() == null){
255             //Annotation without ID. We assume the OLD format.
256 
257             // If we previously detected a NEW format, then we have a mixture of the two
258             if (gateXmlFormatType == NEW)
259               // Signal the error to the user
260               throw new GateSaxException("Found an annotation without ID while " +
261                       "previous annotations had one." + "The NEW GATE XML document format requires" +
262                       " all annotations to have an UNIQUE ID." +
263                       " The offending annotation was of [type=" + annot.getElemName() +
264                       ", startOffset=" + annot.getStart() +
265                       ", endOffset=" + annot.getEnd() + "]");
266 
267             // We are reading OLD format document
268             gateXmlFormatType = OLD;
269             currentAnnotationSet.add( annot.getStart(),
270                                       annot.getEnd(),
271                                       annot.getElemName(),
272                                       annot.getFM());
273           }else{
274             // Annotation with ID. We assume the NEW format
275 
276             // If we previously detected an OLD format, then it means we have a mixture of the two
277             if (gateXmlFormatType == OLD)
278               // Signal the error to the user
279               throw new GateSaxException("Found an annotation with ID while " +
280                     "previous annotations didn't have one." + "The OLD GATE XML" +
281                      "document format requires all annotations NOT to have an ID." +
282                     " The offending annotation was of [Id=" + annot.getId() +
283                     ", type=" + annot.getElemName() +
284                     ", startOffset=" + annot.getStart() +
285                     ", endOffset=" + annot.getEnd() + "]");
286 
287             gateXmlFormatType = NEW;
288             // Test for the unicity of the annotation ID being used
289             // If the ID is not Unique, the method will throw an exception
290             testAnnotationIdUnicity(annot.getId());
291 
292             // Add the annotation
293             currentAnnotationSet.add( annot.getId(),
294                                       annot.getStart(),
295                                       annot.getEnd(),
296                                       annot.getElemName(),
297                                       annot.getFM());
298           }
299         }catch (gate.util.InvalidOffsetException e){
300           throw new GateSaxException(e);
301         }// End try
302       }// End while
303       // The colector is empty and ready for the next AnnotationSet
304       return;
305     }// End if
306 
307 
308   }//endElement
309 
310   /**
311     * This method is called when the SAX parser encounts text in the XML doc.
312     * Here we calculate the end indices for all the elements present inside the
313     * stack and update with the new values.
314     */
315    public void characters(char [] text,int start,int length) throws SAXException {
316      if(!readCharacterStatus) {
317        contentBuffer = new StringBuffer(new String(text,start,length));
318      } else {
319        contentBuffer.append(new String(text,start,length));
320      }
321      readCharacterStatus = true;
322    }
323 
324    /**
325      * This method is called when all characters between specific tags have been read completely
326      */
327   public void charactersAction( char[] text,int start,int length) throws SAXException{
328     // Create a string object based on the reported text
329     String content = new String(text, start, length);
330     if ("TextWithNodes".equals((String)currentElementStack.peek())){
331       processTextOfTextWithNodesElement(content);
332       return;
333     }// End if
334     if ("Name".equals((String)currentElementStack.peek())){
335       processTextOfNameElement(content);
336       return;
337     }// End if
338     if ("Value".equals((String)currentElementStack.peek())){
339 //if (currentFeatureName != null && "string".equals(currentFeatureName) &&
340 //currentAnnot!= null && "Token".equals(currentAnnot.getElemName()) &&
341 //currentAnnot.getEnd().longValue() == 1063)
342 //System.out.println("Content=" + content + " start="+ start + " length=" + length);
343       processTextOfValueElement(content);
344       return;
345     }// End if
346   }//characters
347 
348   /**
349     * This method is called when the SAX parser encounts white spaces
350     */
351   public void ignorableWhitespace(char ch[],int start,int length) throws
352                                                                    SAXException{
353   }//ignorableWhitespace
354 
355   /**
356     * Error method.We deal with this exception inside SimpleErrorHandler class
357     */
358   public void error(SAXParseException ex) throws SAXException {
359     // deal with a SAXParseException
360     // see SimpleErrorhandler class
361     _seh.error(ex);
362   }//error
363 
364   /**
365     * FatalError method.
366     */
367   public void fatalError(SAXParseException ex) throws SAXException {
368     // deal with a SAXParseException
369     // see SimpleErrorhandler class
370     _seh.fatalError(ex);
371   }//fatalError
372 
373   /**
374     * Warning method comment.
375     */
376   public void warning(SAXParseException ex) throws SAXException {
377     // deal with a SAXParseException
378     // see SimpleErrorhandler class
379     _seh.warning(ex);
380   }//warning
381 
382   // Custom methods section
383 
384 
385   /** This method deals with a AnnotationSet element. */
386   private void processAnnotationSetElement(Attributes atts){
387     if (atts != null){
388       for (int i = 0; i < atts.getLength(); i++) {
389        // Extract name and value
390        String attName  = atts.getLocalName(i);
391        String attValue = atts.getValue(i);
392        if ("Name".equals(attName))
393           currentAnnotationSet = doc.getAnnotations(attValue);
394       }// End for
395     }// End if
396   }//processAnnotationSetElement
397 
398   /** This method deals with the start of a Name element*/
399   private void processNameElement(Attributes atts){
400     if (atts == null) return;
401     currentFeatureKeyClassName = atts.getValue("className");
402     currentFeatureKeyItemClassName = atts.getValue("itemClassName");
403   }// End processNameElement();
404 
405   /** This method deals with the start of a Value element*/
406   private void processValueElement(Attributes atts){
407     if (atts == null) return;
408     currentFeatureValueClassName = atts.getValue("className");
409     currentFeatureValueItemClassName = atts.getValue("itemClassName");
410   }// End processValueElement();
411 
412   /** This method deals with a Annotation element. */
413   private void processAnnotationElement(Attributes atts){
414     if (atts != null){
415       currentAnnot = new AnnotationObject();
416       for (int i = 0; i < atts.getLength(); i++) {
417        // Extract name and value
418        String attName  = atts.getLocalName(i);
419        String attValue = atts.getValue(i);
420 
421        if ("Id".equals(attName))
422          currentAnnot.setId(new Integer(attValue));
423 
424        if ("Type".equals(attName))
425          currentAnnot.setElemName(attValue);
426 
427        try{
428          if ("StartNode".equals(attName)){
429           Integer id = new Integer(attValue);
430           Long offset = (Long)id2Offset.get(id);
431           if (offset == null){
432             throw new GateRuntimeException("Couldn't found Node with id = " +
433             id +
434             ".It was specified in annot " +
435             currentAnnot+
436             " as a start node!" +
437             "Check the document with a text editor or something"+
438             " before trying again.");
439 
440           }else
441             currentAnnot.setStart(offset);
442          }// Endif
443          if ("EndNode".equals(attName)){
444           Integer id = new Integer(attValue);
445           Long offset = (Long) id2Offset.get(id);
446           if (offset == null){
447             throw new GateRuntimeException("Couldn't found Node with id = " +
448             id+
449             ".It was specified in annot " +
450             currentAnnot+
451             " as a end node!" +
452             "Check the document with a text editor or something"+
453             " before trying again.");
454           }else
455             currentAnnot.setEnd(offset);
456          }// End if
457        } catch (NumberFormatException e){
458           throw new GateRuntimeException("Offsets problems.Couldn't create"+
459           " Integers from" + " id[" +
460           attValue + "]) in annot " +
461           currentAnnot+
462           "Check the document with a text editor or something,"+
463           " before trying again");
464        }// End try
465       }// End For
466     }// End if
467   }//processAnnotationElement
468 
469   /** This method deals with a Features element. */
470   private void processFeatureElement(Attributes atts){
471     // The first time feature is calle it will create a features map.
472     if (currentFeatureMap == null)
473       currentFeatureMap = Factory.newFeatureMap();
474   }//processFeatureElement
475 
476   /** This method deals with a Node element. */
477   private void processNodeElement(Attributes atts){
478     if (atts != null){
479       for (int i = 0; i < atts.getLength(); i++) {
480         // Extract name and value
481         String attName  = atts.getLocalName(i);
482         String attValue = atts.getValue(i);
483 //System.out.println("Node : " + attName + "=" +attValue);
484         if ("id".equals(attName)){
485           try{
486             Integer id = new Integer(attValue);
487             id2Offset.put(id,new Long(tmpDocContent.length()));
488           }catch(NumberFormatException e){
489             throw new GateRuntimeException("Coudn't create a node from " +
490                         attValue + " Expected an integer.");
491           }// End try
492         }// End if
493       }// End for
494     }// End if
495   }// processNodeElement();
496 
497   /** This method deals with a Text belonging to TextWithNodes element. */
498   private void processTextOfTextWithNodesElement(String text){
499     text = recoverNewLineSequence(text);
500     tmpDocContent.append(text);
501   }//processTextOfTextWithNodesElement
502 
503   /** Restore new line as in the original document if needed */
504   private String recoverNewLineSequence(String text) {
505     String result = text;
506 
507     // check for new line
508     if(text.indexOf('\n') != -1) {
509       String newLineType =
510         (String) doc.getFeatures().get(GateConstants.DOCUMENT_NEW_LINE_TYPE);
511 
512       if("LF".equalsIgnoreCase(newLineType)) {
513         newLineType = null;
514       }
515 
516       // exit with the same text if the change isn't necessary
517       if(newLineType == null) return result;
518 
519       String newLine = "\n";
520       if("CRLF".equalsIgnoreCase(newLineType)) {
521         newLine = "\r\n";
522       }
523       if("CR".equalsIgnoreCase(newLineType)) {
524         newLine = "\r";
525       }
526       if("LFCR".equalsIgnoreCase(newLineType)) {
527         newLine = "\n\r";
528       }
529 
530       StringBuffer buff = new StringBuffer(text);
531       int index = text.lastIndexOf('\n');
532       while(index != -1) {
533         buff.replace(index, index+1, newLine);
534         index = text.lastIndexOf('\n', index-1);
535       } // while
536       result = buff.toString();
537     } // if
538 
539     return result;
540   } // recoverNewLineSequence(String text)
541 
542   /** This method deals with a Text belonging to Name element. */
543   private void processTextOfNameElement(String text) throws GateSaxException{
544     if (currentFeatureMap == null)
545       throw new GateSaxException("GATE xml format processing error:" +
546       " Found a Name element that is not enclosed into a Feature one while" +
547       " analyzing the annotation " +
548       currentAnnot +
549       "Please check the document with a text editor or something before" +
550       " trying again.");
551     else{
552       // In the entities case, characters() gets called separately for each
553       // entity so the text needs to be appended.
554       if (currentFeatureName == null)
555           currentFeatureName = text;
556       else
557         currentFeatureName = currentFeatureName + text;
558     }// End If
559   }//processTextOfNameElement();
560 
561   /** This method deals with a Text belonging to Value element. */
562   private void processTextOfValueElement(String text) throws GateSaxException{
563     if (currentFeatureMap == null)
564       throw new GateSaxException("GATE xml format processing error:" +
565       " Found a Value element that is not enclosed into a Feature one while" +
566       " analyzing the annotation " +
567       currentAnnot+
568       "Please check the document with a text editor or something before" +
569       " trying again.");
570     else{
571       // In the entities case, characters() gets called separately for each
572       // entity so the text needs to be appended.
573       if (currentFeatureValue == null)
574         currentFeatureValue = text;
575       else
576         currentFeatureValue = currentFeatureValue + text;
577     }// End If
578   }//processTextOfValueElement();
579 
580   /** Creates a feature key using this information:
581     * currentFeatureKeyClassName, currentFeatureKeyItemClassName,
582     * currentFeatureName. See createFeatObject() method for more details.
583     */
584   private Object createFeatKey(){
585     return createFeatObject(currentFeatureKeyClassName,
586                             currentFeatureKeyItemClassName,
587                             currentFeatureName);
588   }//createFeatKey()
589 
590   /** Creates a feature value using this information:
591     * currentFeatureValueClassName, currentFeatureValueItemClassName,
592     * currentFeatureValue. See createFeatObject() method for more details.
593     */
594   private Object createFeatValue(){
595     return createFeatObject(currentFeatureValueClassName,
596                             currentFeatureValueItemClassName,
597                             currentFeatureValue);
598   }//createFeatValue()
599 
600   /** This method tries to reconstruct an object given its class name and its
601    *  string representation. If the object is a Collection then the items
602    *  from its string representation must be separated by a ";". In that
603    *  case, the currentFeatureValueItemClassName is used to create items
604    *  belonging to this class.
605    *  @param aFeatClassName represents the name of the class of
606    *  the feat object being created. If it is null then the javaLang.String will
607    *  be used as default.
608    *  @param aFeatItemClassName is it used only if aFeatClassName is a
609    *  collection.If it is null then java.lang.String will be used as default;
610    *  @param aFeatStringRepresentation sais it all
611    *  @return an Object created from  aFeatClassName and its
612    *  aFeatStringRepresentation. If not possible, then aFeatStringRepresentation
613    *  is returned.
614    *  @throws GateRuntimeException If it can't create an item, that
615    *  does not comply with its class definition, to add to the
616    *  collection.
617    */
618   private Object createFeatObject( String aFeatClassName,
619                                    String aFeatItemClassName,
620                                    String aFeatStringRepresentation){
621     // If the string rep is null then the object will be null;
622     if (aFeatStringRepresentation == null) return null;
623     if (aFeatClassName == null) aFeatClassName = "java.lang.String";
624     if (aFeatItemClassName == null) aFeatItemClassName = "java.lang.String";
625     Class currentFeatClass = null;
626     try{
627       currentFeatClass = Gate.getClassLoader().loadClass(aFeatClassName);
628     }catch (ClassNotFoundException cnfex){
629       return aFeatStringRepresentation;
630     }// End try
631     if (java.util.Collection.class.isAssignableFrom(currentFeatClass)){
632       Class itemClass = null;
633       Collection featObject = null;
634       try{
635         featObject = (Collection) currentFeatClass.newInstance();
636         try{
637           itemClass = Gate.getClassLoader().loadClass(aFeatItemClassName);
638         }catch(ClassNotFoundException cnfex){
639           Out.prln("Warning: Item class "+ aFeatItemClassName + " not found."+
640           "Adding items as Strings to the feature called \"" + currentFeatureName
641           + "\" in the annotation " + currentAnnot);
642           itemClass = java.lang.String.class;
643         }// End try
644         // Let's detect if itemClass takes a constructor with a String as param
645         Class[] paramsArray = new Class[1];
646         paramsArray[0] = java.lang.String.class;
647         Constructor itemConstructor = null;
648         boolean addItemAsString = false;
649         try{
650          itemConstructor = itemClass.getConstructor(paramsArray);
651         }catch (NoSuchMethodException  nsme){
652           addItemAsString = true;
653         }catch (SecurityException se){
654           addItemAsString = true;
655         }// End try
656         StringTokenizer strTok = new StringTokenizer(
657                                                 aFeatStringRepresentation,";");
658         Object[] params = new Object[1];
659         Object itemObj = null;
660         while (strTok.hasMoreTokens()){
661           String itemStrRep = strTok.nextToken();
662           if (addItemAsString) featObject.add(itemStrRep);
663           else{
664             params[0] = itemStrRep;
665             try{
666               itemObj = itemConstructor.newInstance(params);
667             }catch (Exception e){
668               throw new GateRuntimeException("An item("+
669                itemStrRep +
670               ")  does not comply with its class" +
671               " definition("+aFeatItemClassName+").Happened while tried to"+
672               " add feature: " +
673               aFeatStringRepresentation + " to the annotation " + currentAnnot);
674             }// End try
675             featObject.add(itemObj);
676           }// End if
677         }// End while
678       }catch(InstantiationException instex ){
679         return aFeatStringRepresentation;
680       }catch (IllegalAccessException iae){
681         return aFeatStringRepresentation;
682       }// End try
683       return featObject;
684     }// End if
685     // If currentfeatClass is not a Collection,test to see if
686     // it has a constructor that takes a String as param
687     Class[] params = new Class[1];
688     params[0] = java.lang.String.class;
689     try{
690       Constructor featConstr = currentFeatClass.getConstructor(params);
691       Object[] featConstrParams = new Object[1];
692       featConstrParams[0] = aFeatStringRepresentation;
693       Object featObject = featConstr.newInstance(featConstrParams);
694       return featObject;
695     } catch(Exception e){
696       return aFeatStringRepresentation;
697     }// End try
698   }// createFeatObject()
699 
700   /**
701    * This method tests if the Annotation ID has been used previously (in which case
702    * will rase an exception) and also adds the ID being tested to the annotationIdSet
703    * @param anAnnotId An Integer representing an annotation ID to be tested
704    * @throws GateSaxException if there is already an annotation wit the same ID
705    */
706   private void testAnnotationIdUnicity(Integer anAnnotId) throws GateSaxException{
707 
708     if (annotationIdSet.contains(anAnnotId))
709       throw new GateSaxException("Found two or possibly more annotations with" +
710               " the same ID! The offending ID was " + anAnnotId );
711     else  annotationIdSet.add(anAnnotId);
712   }// End of testAnnotationIdUnicity()
713 
714 
715   /**
716     * This method is called when the SAX parser encounts a comment
717     * It works only if the XmlDocumentHandler implements a
718     * com.sun.parser.LexicalEventListener
719     */
720   public void comment(String text) throws SAXException {
721   }//comment
722 
723   /**
724     * This method is called when the SAX parser encounts a start of a CDATA
725     * section
726     * It works only if the XmlDocumentHandler implements a
727     * com.sun.parser.LexicalEventListener
728     */
729   public void startCDATA()throws SAXException {
730   }//startCDATA
731 
732   /**
733     * This method is called when the SAX parser encounts the end of a CDATA
734     * section.
735     * It works only if the XmlDocumentHandler implements a
736     * com.sun.parser.LexicalEventListener
737     */
738   public void endCDATA() throws SAXException {
739   }//endCDATA
740 
741   /**
742     * This method is called when the SAX parser encounts a parsed Entity
743     * It works only if the XmlDocumentHandler implements a
744     * com.sun.parser.LexicalEventListener
745     */
746   public void startParsedEntity(String name) throws SAXException {
747   }//startParsedEntity
748 
749   /**
750     * This method is called when the SAX parser encounts a parsed entity and
751     * informs the application if that entity was parsed or not
752     * It's working only if the CustomDocumentHandler implements a
753     *  com.sun.parser.LexicalEventListener
754     */
755   public void endParsedEntity(String name, boolean included)throws SAXException{
756   }//endParsedEntity
757 
758   //StatusReporter Implementation
759 
760   /**
761     * This methos is called when a listener is registered with this class
762     */
763   public void addStatusListener(StatusListener listener){
764     myStatusListeners.add(listener);
765   }//addStatusListener
766   /**
767     * This methos is called when a listener is removed
768     */
769   public void removeStatusListener(StatusListener listener){
770     myStatusListeners.remove(listener);
771   }//removeStatusListener
772   /**
773     * This methos is called whenever we need to inform the listener about an
774     * event.
775   */
776   protected void fireStatusChangedEvent(String text){
777     Iterator listenersIter = myStatusListeners.iterator();
778     while(listenersIter.hasNext())
779       ((StatusListener)listenersIter.next()).statusChanged(text);
780   }//fireStatusChangedEvent
781 
782   // XmlDocumentHandler member data
783 
784   /** This constant indicates when to fire the status listener.
785     * This listener will add an overhead and we don't want a big overhead.
786     * It will be callled from ELEMENTS_RATE to ELEMENTS_RATE
787     */
788   final static  int ELEMENTS_RATE = 128;
789 
790   /** This object indicates what to do when the parser encounts an error */
791   private SimpleErrorHandler _seh = new SimpleErrorHandler();
792 
793   /** The content of the XML document, without any tag */
794   private StringBuffer tmpDocContent = new StringBuffer("");
795 
796   /** A gate document */
797   private gate.Document doc = null;
798 
799   /** Listeners for status report */
800   protected List myStatusListeners = new LinkedList();
801 
802   /** This reports the the number of elements that have beed processed so far*/
803   private int elements = 0;
804 
805   /** We need a colection to retain all the CustomObjects that will be
806     * transformed into annotation over the gate document...
807     * At the end of every annotation set read the objects in the colector are
808     * transformed into annotations...
809     */
810   private List colector = null;
811   /** Maps nodes Ids to their offset in the document text. Those offsets will
812     * be used when creating annotations
813     */
814   private Map id2Offset = new TreeMap();
815   /** Holds the current element read.*/
816   private Stack currentElementStack = new Stack();
817   /** This inner objects maps an annotation object. When an annotation from the
818     * xml document was read this structure is filled out
819     */
820   private AnnotationObject currentAnnot = null;
821   /** A map holding current annotation's features*/
822   private FeatureMap  currentFeatureMap = null;
823   /** A key of the current feature*/
824   private String currentFeatureName = null;
825   /** The value of the current feature*/
826   private String currentFeatureValue = null;
827   /** The class name of the key in the current feature*/
828   private String currentFeatureKeyClassName = null;
829   /** If the key is a collection then we need to know the class name of the
830     * items present in this collection. The next field holds just that.
831     */
832   private String currentFeatureKeyItemClassName = null;
833   /** The class name for the value in the current feature*/
834   private String currentFeatureValueClassName = null;
835   /** If the value is a collection then we need to know the class name of the
836     * items present in this collection. The next field holds just that.
837     */
838   private String currentFeatureValueItemClassName = null;
839   /** the current annotation set that is being created and filled with
840     * annotations
841     */
842   private AnnotationSet currentAnnotationSet = null;
843 
844   /** An inner class modeling the information contained by an annotation.*/
845   class  AnnotationObject {
846     /** Constructor */
847     public AnnotationObject(){}//AnnotationObject
848 
849     /** Accesor for the annotation type modeled here as ElemName */
850     public String getElemName(){
851       return elemName;
852     }//getElemName
853     /** Accesor for the feature map*/
854     public FeatureMap getFM(){
855       return fm;
856     }// getFM()
857     /** Accesor for the start ofset*/
858     public Long getStart(){
859       return start;
860     }// getStart()
861     /** Accesor for the end offset*/
862     public Long getEnd(){
863       return end;
864     }// getEnd()
865     /** Mutator for the annotation type */
866     public void setElemName(String anElemName){
867       elemName = anElemName;
868     }// setElemName();
869     /** Mutator for the feature map*/
870     public void setFM(FeatureMap aFm){
871       fm = aFm;
872     }// setFM();
873     /** Mutator for the start offset*/
874     public void setStart(Long aStart){
875       start = aStart;
876     }// setStart();
877     /** Mutator for the end offset*/
878     public void setEnd(Long anEnd){
879       end = anEnd;
880     }// setEnd();
881     /** Accesor for the id*/
882     public Integer getId() {
883       return id;
884     }// End of getId()
885     /** Mutator for the id*/
886     public void setId(Integer anId) {
887       id = anId;
888     }// End of setId()
889 
890     public String toString(){
891       return " [id =" + id +
892       " type=" + elemName +
893       " startNode=" + start+
894       " endNode=" + end+
895       " features="+ fm +"] ";
896     }
897 
898     // Data fields
899     private String elemName = null;
900     private FeatureMap fm = null;
901     private Long start = null;
902     private Long end  = null;
903     private Integer id = null;
904   } // AnnotationObject
905 }//GateFormatXmlDocumentHandler
906 
907