| GateFormatXmlDocumentHandler.java |
1 /*
2 * GateFormatXmlDocumentHandler.java
3 *
4 * Copyright (c) 1998-2005, The University of Sheffield.
5 *
6 * This file is part of GATE (see http://gate.ac.uk/), and is free
7 * software, licenced under the GNU Library General Public License,
8 * Version 2, June 1991 (in the distribution as file licence.html,
9 * and also available at http://gate.ac.uk/gate/licence.html).
10 *
11 * Cristian URSU, 22 Nov 2000
12 *
13 * $Id: GateFormatXmlDocumentHandler.java,v 1.32 2006/02/21 17:31:46 cursu Exp $
14 */
15
16 package gate.xml;
17
18 import java.lang.reflect.Constructor;
19 import java.util.*;
20
21 import org.xml.sax.*;
22 import org.xml.sax.helpers.DefaultHandler;
23
24 import gate.*;
25 import gate.corpora.DocumentContentImpl;
26 import gate.corpora.DocumentImpl;
27 import gate.event.StatusListener;
28 import gate.util.*;
29
30
31 /**
32 * Implements the behaviour of the XML reader. This is the reader for
33 * Gate Xml documents saved with DocumentImplementation.toXml() method.
34 */
35 public class GateFormatXmlDocumentHandler extends DefaultHandler{
36 /** Debug flag */
37 private static final boolean DEBUG = false;
38
39 /** This is used to capture all data within two tags before calling the actual characters method */
40 private StringBuffer contentBuffer = new StringBuffer("");
41
42 /** This is a variable that shows if characters have been read */
43 private boolean readCharacterStatus = false;
44
45
46 /** An OLD GATE XML format is the one in which Annotations IDs are not present */
47 private static final int OLD = 1;
48 /** A NEW GATE XML format is the one in which Annotations IDs are present */
49 private static final int NEW = 2;
50 /** This value signifies that the document being read can be either OLD or NEW*/
51 private static final int UNDEFINED = 0;
52
53 /** In the beginning we don't know the type of GATE XML format that we read.
54 * We need to be able to read both types, but not a mixture of them
55 */
56 private int gateXmlFormatType = UNDEFINED;
57
58 /** A Set recording every annotation ID read from the XML file.
59 * It is used to check the consistency of the annotations being read.
60 * At the end we need the maximum ID in order to set the annotation
61 * ID generator on the document. This is why we need a TreeSet.
62 */
63 private TreeSet annotationIdSet = new TreeSet();
64
65 /**
66 */
67 public GateFormatXmlDocumentHandler(gate.Document aDocument){
68 // This string contains the plain text (the text without markup)
69 tmpDocContent = new StringBuffer(aDocument.getContent().size().intValue());
70
71 // Colector is used later to transform all custom objects into annotation
72 // objects
73 colector = new LinkedList();
74
75 // The Gate document
76 doc = aDocument;
77 currentAnnotationSet = doc.getAnnotations();
78 }//GateFormatXmlDocumentHandler
79
80 /**
81 * This method is called when the SAX parser encounts the beginning of the
82 * XML document.
83 */
84 public void startDocument() throws org.xml.sax.SAXException {
85 }// startDocument
86
87 /**
88 * This method is called when the SAX parser encounts the end of the
89 * XML document.
90 * Here we set the content of the gate Document to be the one generated
91 * inside this class (tmpDocContent).
92 * After that we use the colector to generate all the annotation reffering
93 * this new gate document.
94 */
95 public void endDocument() throws org.xml.sax.SAXException {
96
97 // replace the document content with the one without markups
98 doc.setContent(new DocumentContentImpl(tmpDocContent.toString()));
99 //long docSize = doc.getContent().size().longValue();
100
101 // If annotations were present in the NEW GATE XML document format,
102 // set the document generator to start from th next MAX Annot ID value
103 if (gateXmlFormatType == NEW && !annotationIdSet.isEmpty()){
104 // Because annotationIdSet is a TreeSet its elements are already sorted.
105 // The last element will contain the maximum value
106 Integer maxAnnotID = (Integer) annotationIdSet.last();
107 // Set the document generator to start from the maxAnnotID value
108 ((DocumentImpl)doc).setNextAnnotationId(maxAnnotID.intValue() + 1);
109 // Dispose of the annotationIdSet
110 annotationIdSet = null;
111 }//fi
112
113 // fire the status listener
114 fireStatusChangedEvent("Total elements: " + elements);
115
116 }// endDocument
117
118 /**
119 * This method is called when the SAX parser encounts the beginning of an
120 * XML element.
121 */
122 public void startElement (String uri, String qName, String elemName,
123 Attributes atts) throws SAXException {
124
125 // call characterActions
126 if(readCharacterStatus) {
127 readCharacterStatus = false;
128 charactersAction(new String(contentBuffer).toCharArray(),0,contentBuffer.length());
129 }
130
131 // Inform the progress listener to fire only if no of elements processed
132 // so far is a multiple of ELEMENTS_RATE
133 if ((++elements % ELEMENTS_RATE) == 0 )
134 fireStatusChangedEvent("Processed elements : " + elements);
135
136 // Set the curent element being processed
137 currentElementStack.add(elemName);
138
139 if("AnnotationSet".equals(elemName))
140 processAnnotationSetElement(atts);
141
142 if("Annotation".equals(elemName))
143 processAnnotationElement(atts);
144
145 if("Feature".equals(elemName))
146 processFeatureElement(atts);
147
148 if("Name".equals(elemName))
149 processNameElement(atts);
150
151 if("Value".equals(elemName))
152 processValueElement(atts);
153
154 if("Node".equals(elemName))
155 processNodeElement(atts);
156 }// startElement
157
158 /**
159 * This method is called when the SAX parser encounts the end of an
160 * XML element.
161 */
162 public void endElement (String uri, String qName, String elemName )
163 throws SAXException{
164
165 // call characterActions
166 if(readCharacterStatus) {
167 readCharacterStatus = false;
168 charactersAction(new String(contentBuffer).toCharArray(),0,contentBuffer.length());
169 }
170
171 currentElementStack.pop();
172 // Deal with Annotation
173 if ("Annotation".equals(elemName)){
174 if (currentFeatureMap == null)
175 currentFeatureMap = Factory.newFeatureMap();
176 currentAnnot.setFM(currentFeatureMap);
177 colector.add(currentAnnot);
178 // Reset current Annot and current featue map
179 currentAnnot = null;
180 currentFeatureMap = null;
181 return;
182 }// End if
183 // Deal with Value
184 if ("Value".equals(elemName) && "Feature".equals(
185 (String)currentElementStack.peek())){
186 // If the Value tag was empty, then an empty string will be created.
187 if (currentFeatureValue == null) currentFeatureValue = "";
188 }// End if
189 // Deal with Feature
190 if ("Feature".equals(elemName)){
191 if(currentFeatureName == null){
192 // Cannot add the (key,value) pair to the map
193 // One of them is null something was wrong in the XML file.
194 throw new GateSaxException("A feature name was empty." +
195 "The annotation that cause it is " +
196 currentAnnot +
197 ".Please check the document with a text editor before trying again.");
198 }else {
199 if (currentFeatureMap == null){
200 // The XMl file was somehow altered and a start Feature wasn't found.
201 throw new GateSaxException("Document not consistent. A start"+
202 " feature element is missing. " +
203 "The annotation that cause it is " +
204 currentAnnot +
205 "Please check the document with a text editor before trying again.");
206 }// End if
207 // Create the appropiate feature name and values
208 // If those object cannot be created, their string representation will
209 // be used.
210 currentFeatureMap.put(createFeatKey(),createFeatValue());
211 // currentFeatureMap.put(currentFeatureName,currentFeatureValue);
212 // Reset current key
213 currentFeatureKeyClassName = null;
214 currentFeatureKeyItemClassName = null;
215 currentFeatureName = null;
216 // Reset current value
217 currentFeatureValueClassName = null;
218 currentFeatureValueItemClassName = null;
219 currentFeatureValue = null;
220 }// End if
221 // Reset the Name & Value pair.
222 currentFeatureName = null;
223 currentFeatureValue = null;
224 return;
225 }//End if
226 // Deal GateDocumentFeatures
227 if ("GateDocumentFeatures".equals(elemName)){
228 if (currentFeatureMap == null)
229 currentFeatureMap = Factory.newFeatureMap();
230 doc.setFeatures(currentFeatureMap);
231 currentFeatureMap = null;
232 return;
233 }// End if
234
235 // Deal with AnnotationSet
236 if ("AnnotationSet".equals(elemName)){
237 // Create and add annotations to the currentAnnotationSet
238 Iterator iterator = colector.iterator();
239 while (iterator.hasNext()){
240 AnnotationObject annot = (AnnotationObject) iterator.next();
241 // Clear the annot from the colector
242 iterator.remove();
243
244 // Create a new annotation and add it to the annotation set
245 try{
246
247 // This is the result of a code-fix.The XML writter has been modified
248 // to serialize the annotation ID.In order to keep backward compatibility
249 // with previously saved documents we had to keep the old code(where the id
250 // is not added) in place.
251 // If the document presents a mixture of the two formats, then error is signaled
252
253 // Check if the Annotation ID is present or not
254 if (annot.getId() == null){
255 //Annotation without ID. We assume the OLD format.
256
257 // If we previously detected a NEW format, then we have a mixture of the two
258 if (gateXmlFormatType == NEW)
259 // Signal the error to the user
260 throw new GateSaxException("Found an annotation without ID while " +
261 "previous annotations had one." + "The NEW GATE XML document format requires" +
262 " all annotations to have an UNIQUE ID." +
263 " The offending annotation was of [type=" + annot.getElemName() +
264 ", startOffset=" + annot.getStart() +
265 ", endOffset=" + annot.getEnd() + "]");
266
267 // We are reading OLD format document
268 gateXmlFormatType = OLD;
269 currentAnnotationSet.add( annot.getStart(),
270 annot.getEnd(),
271 annot.getElemName(),
272 annot.getFM());
273 }else{
274 // Annotation with ID. We assume the NEW format
275
276 // If we previously detected an OLD format, then it means we have a mixture of the two
277 if (gateXmlFormatType == OLD)
278 // Signal the error to the user
279 throw new GateSaxException("Found an annotation with ID while " +
280 "previous annotations didn't have one." + "The OLD GATE XML" +
281 "document format requires all annotations NOT to have an ID." +
282 " The offending annotation was of [Id=" + annot.getId() +
283 ", type=" + annot.getElemName() +
284 ", startOffset=" + annot.getStart() +
285 ", endOffset=" + annot.getEnd() + "]");
286
287 gateXmlFormatType = NEW;
288 // Test for the unicity of the annotation ID being used
289 // If the ID is not Unique, the method will throw an exception
290 testAnnotationIdUnicity(annot.getId());
291
292 // Add the annotation
293 currentAnnotationSet.add( annot.getId(),
294 annot.getStart(),
295 annot.getEnd(),
296 annot.getElemName(),
297 annot.getFM());
298 }
299 }catch (gate.util.InvalidOffsetException e){
300 throw new GateSaxException(e);
301 }// End try
302 }// End while
303 // The colector is empty and ready for the next AnnotationSet
304 return;
305 }// End if
306
307
308 }//endElement
309
310 /**
311 * This method is called when the SAX parser encounts text in the XML doc.
312 * Here we calculate the end indices for all the elements present inside the
313 * stack and update with the new values.
314 */
315 public void characters(char [] text,int start,int length) throws SAXException {
316 if(!readCharacterStatus) {
317 contentBuffer = new StringBuffer(new String(text,start,length));
318 } else {
319 contentBuffer.append(new String(text,start,length));
320 }
321 readCharacterStatus = true;
322 }
323
324 /**
325 * This method is called when all characters between specific tags have been read completely
326 */
327 public void charactersAction( char[] text,int start,int length) throws SAXException{
328 // Create a string object based on the reported text
329 String content = new String(text, start, length);
330 if ("TextWithNodes".equals((String)currentElementStack.peek())){
331 processTextOfTextWithNodesElement(content);
332 return;
333 }// End if
334 if ("Name".equals((String)currentElementStack.peek())){
335 processTextOfNameElement(content);
336 return;
337 }// End if
338 if ("Value".equals((String)currentElementStack.peek())){
339 //if (currentFeatureName != null && "string".equals(currentFeatureName) &&
340 //currentAnnot!= null && "Token".equals(currentAnnot.getElemName()) &&
341 //currentAnnot.getEnd().longValue() == 1063)
342 //System.out.println("Content=" + content + " start="+ start + " length=" + length);
343 processTextOfValueElement(content);
344 return;
345 }// End if
346 }//characters
347
348 /**
349 * This method is called when the SAX parser encounts white spaces
350 */
351 public void ignorableWhitespace(char ch[],int start,int length) throws
352 SAXException{
353 }//ignorableWhitespace
354
355 /**
356 * Error method.We deal with this exception inside SimpleErrorHandler class
357 */
358 public void error(SAXParseException ex) throws SAXException {
359 // deal with a SAXParseException
360 // see SimpleErrorhandler class
361 _seh.error(ex);
362 }//error
363
364 /**
365 * FatalError method.
366 */
367 public void fatalError(SAXParseException ex) throws SAXException {
368 // deal with a SAXParseException
369 // see SimpleErrorhandler class
370 _seh.fatalError(ex);
371 }//fatalError
372
373 /**
374 * Warning method comment.
375 */
376 public void warning(SAXParseException ex) throws SAXException {
377 // deal with a SAXParseException
378 // see SimpleErrorhandler class
379 _seh.warning(ex);
380 }//warning
381
382 // Custom methods section
383
384
385 /** This method deals with a AnnotationSet element. */
386 private void processAnnotationSetElement(Attributes atts){
387 if (atts != null){
388 for (int i = 0; i < atts.getLength(); i++) {
389 // Extract name and value
390 String attName = atts.getLocalName(i);
391 String attValue = atts.getValue(i);
392 if ("Name".equals(attName))
393 currentAnnotationSet = doc.getAnnotations(attValue);
394 }// End for
395 }// End if
396 }//processAnnotationSetElement
397
398 /** This method deals with the start of a Name element*/
399 private void processNameElement(Attributes atts){
400 if (atts == null) return;
401 currentFeatureKeyClassName = atts.getValue("className");
402 currentFeatureKeyItemClassName = atts.getValue("itemClassName");
403 }// End processNameElement();
404
405 /** This method deals with the start of a Value element*/
406 private void processValueElement(Attributes atts){
407 if (atts == null) return;
408 currentFeatureValueClassName = atts.getValue("className");
409 currentFeatureValueItemClassName = atts.getValue("itemClassName");
410 }// End processValueElement();
411
412 /** This method deals with a Annotation element. */
413 private void processAnnotationElement(Attributes atts){
414 if (atts != null){
415 currentAnnot = new AnnotationObject();
416 for (int i = 0; i < atts.getLength(); i++) {
417 // Extract name and value
418 String attName = atts.getLocalName(i);
419 String attValue = atts.getValue(i);
420
421 if ("Id".equals(attName))
422 currentAnnot.setId(new Integer(attValue));
423
424 if ("Type".equals(attName))
425 currentAnnot.setElemName(attValue);
426
427 try{
428 if ("StartNode".equals(attName)){
429 Integer id = new Integer(attValue);
430 Long offset = (Long)id2Offset.get(id);
431 if (offset == null){
432 throw new GateRuntimeException("Couldn't found Node with id = " +
433 id +
434 ".It was specified in annot " +
435 currentAnnot+
436 " as a start node!" +
437 "Check the document with a text editor or something"+
438 " before trying again.");
439
440 }else
441 currentAnnot.setStart(offset);
442 }// Endif
443 if ("EndNode".equals(attName)){
444 Integer id = new Integer(attValue);
445 Long offset = (Long) id2Offset.get(id);
446 if (offset == null){
447 throw new GateRuntimeException("Couldn't found Node with id = " +
448 id+
449 ".It was specified in annot " +
450 currentAnnot+
451 " as a end node!" +
452 "Check the document with a text editor or something"+
453 " before trying again.");
454 }else
455 currentAnnot.setEnd(offset);
456 }// End if
457 } catch (NumberFormatException e){
458 throw new GateRuntimeException("Offsets problems.Couldn't create"+
459 " Integers from" + " id[" +
460 attValue + "]) in annot " +
461 currentAnnot+
462 "Check the document with a text editor or something,"+
463 " before trying again");
464 }// End try
465 }// End For
466 }// End if
467 }//processAnnotationElement
468
469 /** This method deals with a Features element. */
470 private void processFeatureElement(Attributes atts){
471 // The first time feature is calle it will create a features map.
472 if (currentFeatureMap == null)
473 currentFeatureMap = Factory.newFeatureMap();
474 }//processFeatureElement
475
476 /** This method deals with a Node element. */
477 private void processNodeElement(Attributes atts){
478 if (atts != null){
479 for (int i = 0; i < atts.getLength(); i++) {
480 // Extract name and value
481 String attName = atts.getLocalName(i);
482 String attValue = atts.getValue(i);
483 //System.out.println("Node : " + attName + "=" +attValue);
484 if ("id".equals(attName)){
485 try{
486 Integer id = new Integer(attValue);
487 id2Offset.put(id,new Long(tmpDocContent.length()));
488 }catch(NumberFormatException e){
489 throw new GateRuntimeException("Coudn't create a node from " +
490 attValue + " Expected an integer.");
491 }// End try
492 }// End if
493 }// End for
494 }// End if
495 }// processNodeElement();
496
497 /** This method deals with a Text belonging to TextWithNodes element. */
498 private void processTextOfTextWithNodesElement(String text){
499 text = recoverNewLineSequence(text);
500 tmpDocContent.append(text);
501 }//processTextOfTextWithNodesElement
502
503 /** Restore new line as in the original document if needed */
504 private String recoverNewLineSequence(String text) {
505 String result = text;
506
507 // check for new line
508 if(text.indexOf('\n') != -1) {
509 String newLineType =
510 (String) doc.getFeatures().get(GateConstants.DOCUMENT_NEW_LINE_TYPE);
511
512 if("LF".equalsIgnoreCase(newLineType)) {
513 newLineType = null;
514 }
515
516 // exit with the same text if the change isn't necessary
517 if(newLineType == null) return result;
518
519 String newLine = "\n";
520 if("CRLF".equalsIgnoreCase(newLineType)) {
521 newLine = "\r\n";
522 }
523 if("CR".equalsIgnoreCase(newLineType)) {
524 newLine = "\r";
525 }
526 if("LFCR".equalsIgnoreCase(newLineType)) {
527 newLine = "\n\r";
528 }
529
530 StringBuffer buff = new StringBuffer(text);
531 int index = text.lastIndexOf('\n');
532 while(index != -1) {
533 buff.replace(index, index+1, newLine);
534 index = text.lastIndexOf('\n', index-1);
535 } // while
536 result = buff.toString();
537 } // if
538
539 return result;
540 } // recoverNewLineSequence(String text)
541
542 /** This method deals with a Text belonging to Name element. */
543 private void processTextOfNameElement(String text) throws GateSaxException{
544 if (currentFeatureMap == null)
545 throw new GateSaxException("GATE xml format processing error:" +
546 " Found a Name element that is not enclosed into a Feature one while" +
547 " analyzing the annotation " +
548 currentAnnot +
549 "Please check the document with a text editor or something before" +
550 " trying again.");
551 else{
552 // In the entities case, characters() gets called separately for each
553 // entity so the text needs to be appended.
554 if (currentFeatureName == null)
555 currentFeatureName = text;
556 else
557 currentFeatureName = currentFeatureName + text;
558 }// End If
559 }//processTextOfNameElement();
560
561 /** This method deals with a Text belonging to Value element. */
562 private void processTextOfValueElement(String text) throws GateSaxException{
563 if (currentFeatureMap == null)
564 throw new GateSaxException("GATE xml format processing error:" +
565 " Found a Value element that is not enclosed into a Feature one while" +
566 " analyzing the annotation " +
567 currentAnnot+
568 "Please check the document with a text editor or something before" +
569 " trying again.");
570 else{
571 // In the entities case, characters() gets called separately for each
572 // entity so the text needs to be appended.
573 if (currentFeatureValue == null)
574 currentFeatureValue = text;
575 else
576 currentFeatureValue = currentFeatureValue + text;
577 }// End If
578 }//processTextOfValueElement();
579
580 /** Creates a feature key using this information:
581 * currentFeatureKeyClassName, currentFeatureKeyItemClassName,
582 * currentFeatureName. See createFeatObject() method for more details.
583 */
584 private Object createFeatKey(){
585 return createFeatObject(currentFeatureKeyClassName,
586 currentFeatureKeyItemClassName,
587 currentFeatureName);
588 }//createFeatKey()
589
590 /** Creates a feature value using this information:
591 * currentFeatureValueClassName, currentFeatureValueItemClassName,
592 * currentFeatureValue. See createFeatObject() method for more details.
593 */
594 private Object createFeatValue(){
595 return createFeatObject(currentFeatureValueClassName,
596 currentFeatureValueItemClassName,
597 currentFeatureValue);
598 }//createFeatValue()
599
600 /** This method tries to reconstruct an object given its class name and its
601 * string representation. If the object is a Collection then the items
602 * from its string representation must be separated by a ";". In that
603 * case, the currentFeatureValueItemClassName is used to create items
604 * belonging to this class.
605 * @param aFeatClassName represents the name of the class of
606 * the feat object being created. If it is null then the javaLang.String will
607 * be used as default.
608 * @param aFeatItemClassName is it used only if aFeatClassName is a
609 * collection.If it is null then java.lang.String will be used as default;
610 * @param aFeatStringRepresentation sais it all
611 * @return an Object created from aFeatClassName and its
612 * aFeatStringRepresentation. If not possible, then aFeatStringRepresentation
613 * is returned.
614 * @throws GateRuntimeException If it can't create an item, that
615 * does not comply with its class definition, to add to the
616 * collection.
617 */
618 private Object createFeatObject( String aFeatClassName,
619 String aFeatItemClassName,
620 String aFeatStringRepresentation){
621 // If the string rep is null then the object will be null;
622 if (aFeatStringRepresentation == null) return null;
623 if (aFeatClassName == null) aFeatClassName = "java.lang.String";
624 if (aFeatItemClassName == null) aFeatItemClassName = "java.lang.String";
625 Class currentFeatClass = null;
626 try{
627 currentFeatClass = Gate.getClassLoader().loadClass(aFeatClassName);
628 }catch (ClassNotFoundException cnfex){
629 return aFeatStringRepresentation;
630 }// End try
631 if (java.util.Collection.class.isAssignableFrom(currentFeatClass)){
632 Class itemClass = null;
633 Collection featObject = null;
634 try{
635 featObject = (Collection) currentFeatClass.newInstance();
636 try{
637 itemClass = Gate.getClassLoader().loadClass(aFeatItemClassName);
638 }catch(ClassNotFoundException cnfex){
639 Out.prln("Warning: Item class "+ aFeatItemClassName + " not found."+
640 "Adding items as Strings to the feature called \"" + currentFeatureName
641 + "\" in the annotation " + currentAnnot);
642 itemClass = java.lang.String.class;
643 }// End try
644 // Let's detect if itemClass takes a constructor with a String as param
645 Class[] paramsArray = new Class[1];
646 paramsArray[0] = java.lang.String.class;
647 Constructor itemConstructor = null;
648 boolean addItemAsString = false;
649 try{
650 itemConstructor = itemClass.getConstructor(paramsArray);
651 }catch (NoSuchMethodException nsme){
652 addItemAsString = true;
653 }catch (SecurityException se){
654 addItemAsString = true;
655 }// End try
656 StringTokenizer strTok = new StringTokenizer(
657 aFeatStringRepresentation,";");
658 Object[] params = new Object[1];
659 Object itemObj = null;
660 while (strTok.hasMoreTokens()){
661 String itemStrRep = strTok.nextToken();
662 if (addItemAsString) featObject.add(itemStrRep);
663 else{
664 params[0] = itemStrRep;
665 try{
666 itemObj = itemConstructor.newInstance(params);
667 }catch (Exception e){
668 throw new GateRuntimeException("An item("+
669 itemStrRep +
670 ") does not comply with its class" +
671 " definition("+aFeatItemClassName+").Happened while tried to"+
672 " add feature: " +
673 aFeatStringRepresentation + " to the annotation " + currentAnnot);
674 }// End try
675 featObject.add(itemObj);
676 }// End if
677 }// End while
678 }catch(InstantiationException instex ){
679 return aFeatStringRepresentation;
680 }catch (IllegalAccessException iae){
681 return aFeatStringRepresentation;
682 }// End try
683 return featObject;
684 }// End if
685 // If currentfeatClass is not a Collection,test to see if
686 // it has a constructor that takes a String as param
687 Class[] params = new Class[1];
688 params[0] = java.lang.String.class;
689 try{
690 Constructor featConstr = currentFeatClass.getConstructor(params);
691 Object[] featConstrParams = new Object[1];
692 featConstrParams[0] = aFeatStringRepresentation;
693 Object featObject = featConstr.newInstance(featConstrParams);
694 return featObject;
695 } catch(Exception e){
696 return aFeatStringRepresentation;
697 }// End try
698 }// createFeatObject()
699
700 /**
701 * This method tests if the Annotation ID has been used previously (in which case
702 * will rase an exception) and also adds the ID being tested to the annotationIdSet
703 * @param anAnnotId An Integer representing an annotation ID to be tested
704 * @throws GateSaxException if there is already an annotation wit the same ID
705 */
706 private void testAnnotationIdUnicity(Integer anAnnotId) throws GateSaxException{
707
708 if (annotationIdSet.contains(anAnnotId))
709 throw new GateSaxException("Found two or possibly more annotations with" +
710 " the same ID! The offending ID was " + anAnnotId );
711 else annotationIdSet.add(anAnnotId);
712 }// End of testAnnotationIdUnicity()
713
714
715 /**
716 * This method is called when the SAX parser encounts a comment
717 * It works only if the XmlDocumentHandler implements a
718 * com.sun.parser.LexicalEventListener
719 */
720 public void comment(String text) throws SAXException {
721 }//comment
722
723 /**
724 * This method is called when the SAX parser encounts a start of a CDATA
725 * section
726 * It works only if the XmlDocumentHandler implements a
727 * com.sun.parser.LexicalEventListener
728 */
729 public void startCDATA()throws SAXException {
730 }//startCDATA
731
732 /**
733 * This method is called when the SAX parser encounts the end of a CDATA
734 * section.
735 * It works only if the XmlDocumentHandler implements a
736 * com.sun.parser.LexicalEventListener
737 */
738 public void endCDATA() throws SAXException {
739 }//endCDATA
740
741 /**
742 * This method is called when the SAX parser encounts a parsed Entity
743 * It works only if the XmlDocumentHandler implements a
744 * com.sun.parser.LexicalEventListener
745 */
746 public void startParsedEntity(String name) throws SAXException {
747 }//startParsedEntity
748
749 /**
750 * This method is called when the SAX parser encounts a parsed entity and
751 * informs the application if that entity was parsed or not
752 * It's working only if the CustomDocumentHandler implements a
753 * com.sun.parser.LexicalEventListener
754 */
755 public void endParsedEntity(String name, boolean included)throws SAXException{
756 }//endParsedEntity
757
758 //StatusReporter Implementation
759
760 /**
761 * This methos is called when a listener is registered with this class
762 */
763 public void addStatusListener(StatusListener listener){
764 myStatusListeners.add(listener);
765 }//addStatusListener
766 /**
767 * This methos is called when a listener is removed
768 */
769 public void removeStatusListener(StatusListener listener){
770 myStatusListeners.remove(listener);
771 }//removeStatusListener
772 /**
773 * This methos is called whenever we need to inform the listener about an
774 * event.
775 */
776 protected void fireStatusChangedEvent(String text){
777 Iterator listenersIter = myStatusListeners.iterator();
778 while(listenersIter.hasNext())
779 ((StatusListener)listenersIter.next()).statusChanged(text);
780 }//fireStatusChangedEvent
781
782 // XmlDocumentHandler member data
783
784 /** This constant indicates when to fire the status listener.
785 * This listener will add an overhead and we don't want a big overhead.
786 * It will be callled from ELEMENTS_RATE to ELEMENTS_RATE
787 */
788 final static int ELEMENTS_RATE = 128;
789
790 /** This object indicates what to do when the parser encounts an error */
791 private SimpleErrorHandler _seh = new SimpleErrorHandler();
792
793 /** The content of the XML document, without any tag */
794 private StringBuffer tmpDocContent = new StringBuffer("");
795
796 /** A gate document */
797 private gate.Document doc = null;
798
799 /** Listeners for status report */
800 protected List myStatusListeners = new LinkedList();
801
802 /** This reports the the number of elements that have beed processed so far*/
803 private int elements = 0;
804
805 /** We need a colection to retain all the CustomObjects that will be
806 * transformed into annotation over the gate document...
807 * At the end of every annotation set read the objects in the colector are
808 * transformed into annotations...
809 */
810 private List colector = null;
811 /** Maps nodes Ids to their offset in the document text. Those offsets will
812 * be used when creating annotations
813 */
814 private Map id2Offset = new TreeMap();
815 /** Holds the current element read.*/
816 private Stack currentElementStack = new Stack();
817 /** This inner objects maps an annotation object. When an annotation from the
818 * xml document was read this structure is filled out
819 */
820 private AnnotationObject currentAnnot = null;
821 /** A map holding current annotation's features*/
822 private FeatureMap currentFeatureMap = null;
823 /** A key of the current feature*/
824 private String currentFeatureName = null;
825 /** The value of the current feature*/
826 private String currentFeatureValue = null;
827 /** The class name of the key in the current feature*/
828 private String currentFeatureKeyClassName = null;
829 /** If the key is a collection then we need to know the class name of the
830 * items present in this collection. The next field holds just that.
831 */
832 private String currentFeatureKeyItemClassName = null;
833 /** The class name for the value in the current feature*/
834 private String currentFeatureValueClassName = null;
835 /** If the value is a collection then we need to know the class name of the
836 * items present in this collection. The next field holds just that.
837 */
838 private String currentFeatureValueItemClassName = null;
839 /** the current annotation set that is being created and filled with
840 * annotations
841 */
842 private AnnotationSet currentAnnotationSet = null;
843
844 /** An inner class modeling the information contained by an annotation.*/
845 class AnnotationObject {
846 /** Constructor */
847 public AnnotationObject(){}//AnnotationObject
848
849 /** Accesor for the annotation type modeled here as ElemName */
850 public String getElemName(){
851 return elemName;
852 }//getElemName
853 /** Accesor for the feature map*/
854 public FeatureMap getFM(){
855 return fm;
856 }// getFM()
857 /** Accesor for the start ofset*/
858 public Long getStart(){
859 return start;
860 }// getStart()
861 /** Accesor for the end offset*/
862 public Long getEnd(){
863 return end;
864 }// getEnd()
865 /** Mutator for the annotation type */
866 public void setElemName(String anElemName){
867 elemName = anElemName;
868 }// setElemName();
869 /** Mutator for the feature map*/
870 public void setFM(FeatureMap aFm){
871 fm = aFm;
872 }// setFM();
873 /** Mutator for the start offset*/
874 public void setStart(Long aStart){
875 start = aStart;
876 }// setStart();
877 /** Mutator for the end offset*/
878 public void setEnd(Long anEnd){
879 end = anEnd;
880 }// setEnd();
881 /** Accesor for the id*/
882 public Integer getId() {
883 return id;
884 }// End of getId()
885 /** Mutator for the id*/
886 public void setId(Integer anId) {
887 id = anId;
888 }// End of setId()
889
890 public String toString(){
891 return " [id =" + id +
892 " type=" + elemName +
893 " startNode=" + start+
894 " endNode=" + end+
895 " features="+ fm +"] ";
896 }
897
898 // Data fields
899 private String elemName = null;
900 private FeatureMap fm = null;
901 private Long start = null;
902 private Long end = null;
903 private Integer id = null;
904 } // AnnotationObject
905 }//GateFormatXmlDocumentHandler
906
907