1
15
16 package gate.corpora;
17
18 import java.io.IOException;
19 import java.net.URL;
20 import java.util.*;
21
22 import gate.*;
23 import gate.annotation.AnnotationSetImpl;
24 import gate.creole.AbstractLanguageResource;
25 import gate.creole.ResourceInstantiationException;
26 import gate.event.*;
27 import gate.util.*;
28
29
131 public class DocumentImpl
132 extends AbstractLanguageResource implements TextualDocument, CreoleListener,
133 DatastoreListener {
134
135 private static final boolean DEBUG = false;
136
137
140 private Boolean preserveOriginalContent = new Boolean(false);
141
142
146 private Boolean collectRepositioningInfo = new Boolean(false);
147
148
153 private Annotation crossedOverAnnotation = null;
154
155
156 public DocumentImpl() {
157 content = new DocumentContentImpl();
158 stringContent = "";
159 }
161
162 public FeatureMap getFeatures() {
163 if (features == null) {
164 features = new SimpleFeatureMapImpl();
165 }
166 return features;
167 }
168
169
170 public Resource init() throws ResourceInstantiationException {
171 if(sourceUrl == null) {
173 if(stringContent == null) {
174 throw new ResourceInstantiationException(
175 "The sourceURL and document's content were null."
176 );
177 }
178
179 content = new DocumentContentImpl(stringContent);
180 getFeatures().put("gate.SourceURL", "created from String");
181 } else {
182 try {
183 content = new DocumentContentImpl(
184 sourceUrl, getEncoding(), sourceUrlStartOffset, sourceUrlEndOffset);
185 getFeatures().put("gate.SourceURL", sourceUrl.toExternalForm());
186 } catch(IOException e) {
187 throw new ResourceInstantiationException("DocumentImpl.init: " + e);
188 }
189 }
190
191 if(preserveOriginalContent.booleanValue() && content != null) {
192 String originalContent = new String(
193 ((DocumentContentImpl) content).getOriginalContent());
194 getFeatures().put(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME,
195 originalContent);
196 }
198 if(getMarkupAware().booleanValue()) {
200 DocumentFormat docFormat =
201 DocumentFormat.getDocumentFormat(this, sourceUrl);
202 try {
203 if(docFormat != null){
204 StatusListener sListener = (StatusListener)
205 gate.gui.MainFrame.getListeners().
206 get("gate.event.StatusListener");
207 if(sListener != null) docFormat.addStatusListener(sListener);
208
209 docFormat.setShouldCollectRepositioning(collectRepositioningInfo);
211
212 if(docFormat.getShouldCollectRepositioning().booleanValue()) {
213 RepositioningInfo info = new RepositioningInfo();
215
216 String origContent = (String) getFeatures().get(
217 GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
218
219 RepositioningInfo ampCodingInfo = new RepositioningInfo();
220 if(origContent != null) {
221 boolean shouldCorrectCR = docFormat instanceof XmlDocumentFormat;
222 collectInformationForAmpCodding(origContent, ampCodingInfo,
223 shouldCorrectCR);
224 if(docFormat instanceof HtmlDocumentFormat) {
225 collectInformationForWS(origContent, ampCodingInfo);
226 } }
229 docFormat.unpackMarkup(this, info, ampCodingInfo);
230
231 if(origContent != null
232 && docFormat instanceof XmlDocumentFormat) {
233 correctRepositioningForCRLFInXML(origContent, info);
235 }
237 getFeatures().put(
238 GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME, info);
239 }
240 else {
241 docFormat.unpackMarkup(this);
243 }
244 docFormat.removeStatusListener(sListener);
245 } } catch(DocumentFormatException e) {
247 throw new ResourceInstantiationException(
248 "Couldn't unpack markup in document " + (sourceUrl != null ? sourceUrl.toExternalForm() : "") +
249 "!", e);
250 }
251 }
253
262 return this;
263 }
265
268 private void correctRepositioningForCRLFInXML(String content,
269 RepositioningInfo info) {
270 int index = -1;
271
272 do {
273 index = content.indexOf("\r\n", index+1);
274 if(index != -1) {
275 info.correctInformationOriginalMove(index, 1);
276 } } while(index != -1);
278 }
280
293 private void collectInformationForAmpCodding(String content,
294 RepositioningInfo info,
295 boolean shouldCorrectCR) {
296
297 if(content == null || info == null) return;
298
299 int ampIndex = -1;
300 int semiIndex;
301
302 do {
303 ampIndex = content.indexOf('&', ampIndex+1);
304 if(ampIndex != -1) {
305 semiIndex = content.indexOf(';', ampIndex+1);
306 if(semiIndex != -1 && (semiIndex-ampIndex) < 8) {
308 info.addPositionInfo(ampIndex, semiIndex-ampIndex+1, 0, 1);
309 }
310 else {
311 int maxEnd = Math.min(ampIndex+8, content.length());
314 String ampCandidate = content.substring(ampIndex, maxEnd);
315 int ampCodingSize = analyseAmpCodding(ampCandidate);
316
317 if(ampCodingSize != -1) {
318 info.addPositionInfo(ampIndex, ampCodingSize, 0, 1);
319 }
321 } } } while (ampIndex != -1);
324
325 int index = -1;
328
329 if(shouldCorrectCR) {
330 do {
331 index = content.indexOf("\r\n", index+1);
332 if(index != -1) {
333 info.correctInformationOriginalMove(index, -1);
334 } } while(index != -1);
336 } }
339
343 private int analyseAmpCodding(String content) {
344 int result = -1;
345
346 try {
347 char ch = content.charAt(1);
348
349 switch(ch) {
350 case 'l' : case 'L' : if(content.charAt(2) == 't' || content.charAt(2) == 'T') {
353 result = 3;
354 } break;
356 case 'g' : case 'G' : if(content.charAt(2) == 't' || content.charAt(2) == 'T') {
359 result = 3;
360 } break;
362 case 'a' : case 'A' : if(content.substring(2, 4).equalsIgnoreCase("mp")) {
365 result = 4;
366 } break;
368 case 'q' : case 'Q' : if(content.substring(2, 5).equalsIgnoreCase("uot")) {
371 result = 5;
372 } break;
374 case '#' : int endIndex = 2;
376 boolean hexCoded = false;
377 if(content.charAt(2) == 'x' || content.charAt(2) == 'X') {
378 ++endIndex;
380 hexCoded = true;
381 }
383 while (endIndex < 8
384 && isNumber(content.charAt(endIndex), hexCoded) ) {
385 ++endIndex;
386 } result = endIndex;
388 break;
389 } } catch (StringIndexOutOfBoundsException ex) {
391 }
394 return result;
395 }
397
398 private boolean isNumber(char ch, boolean hex) {
399 if(ch >= '0' && ch <= '9') return true;
400
401 if(hex) {
402 if(ch >= 'A' && ch <= 'F') return true;
403 if(ch >= 'a' && ch <= 'f') return true;
404 }
406 return false;
407 }
409
415 private void collectInformationForWS(String content, RepositioningInfo info) {
416
417 if(content == null || info == null) return;
418
419 char ch;
421 int startWS, endWS;
422
423 startWS = endWS = -1;
424 int contentLength = content.length();
425
426 for(int i=0; i<contentLength; ++i) {
427 ch = content.charAt(i);
428
429 if(ch <= ' ') {
431 if(startWS == -1) {
432 startWS = i;
433 } endWS = i;
435 }
436 else {
437 if(endWS - startWS > 0) {
438 info.addPositionInfo(
440 (long)startWS, (long)(endWS - startWS + 1), 0, 1);
441 } startWS = endWS = -1;
444 } } }
448
449 public void cleanup() {
450
451 defaultAnnots = null;
452 if ( (namedAnnotSets != null) && (!namedAnnotSets.isEmpty()))
453 namedAnnotSets.clear();
454 if (DEBUG) Out.prln("Document cleanup called");
455 if (this.lrPersistentId != null)
456 Gate.getCreoleRegister().removeCreoleListener(this);
457 if(this.getDataStore() != null)
458 this.getDataStore().removeDatastoreListener(this);
459 }
461
462
463 public URL getSourceUrl() { return sourceUrl; }
464
465
466 public void setSourceUrl(URL sourceUrl) {
467 this.sourceUrl = sourceUrl;
468 }
470
473 public Long[] getSourceUrlOffsets() {
474 Long[] sourceUrlOffsets = new Long[2];
475 sourceUrlOffsets[0] = sourceUrlStartOffset;
476 sourceUrlOffsets[1] = sourceUrlEndOffset;
477 return sourceUrlOffsets;
478 }
480
485 public void setPreserveOriginalContent(Boolean b) {
486 preserveOriginalContent = b;
487 }
489
493 public Boolean getPreserveOriginalContent() {
494 return preserveOriginalContent;
495 }
497
505 public void setCollectRepositioningInfo(Boolean b) {
506 collectRepositioningInfo = b;
507 }
509
517 public Boolean getCollectRepositioningInfo() {
518 return collectRepositioningInfo;
519 }
521
525 public Long getSourceUrlStartOffset() { return sourceUrlStartOffset; }
526
527
531 public void setSourceUrlStartOffset(Long sourceUrlStartOffset) {
532 this.sourceUrlStartOffset = sourceUrlStartOffset;
533 }
535
539 public Long getSourceUrlEndOffset() { return sourceUrlEndOffset; }
540
541
545 public void setSourceUrlEndOffset(Long sourceUrlEndOffset) {
546 this.sourceUrlEndOffset = sourceUrlEndOffset;
547 }
549
550 public DocumentContent getContent() { return content; }
551
552
553 public void setContent(DocumentContent content) {
554 this.content = content;
555 this.stringContent = content.toString();
556 }
557
558
559 public String getEncoding() {
560 if(encoding == null || encoding.trim().length() == 0){
562 encoding = java.nio.charset.Charset.forName(
564 System.getProperty("file.encoding")).name();
565 }
566 return encoding;
567 }
568
569
570 public void setEncoding(String encoding) { this.encoding = encoding; }
571
572
575 public AnnotationSet getAnnotations() {
576 if(defaultAnnots == null){
577 defaultAnnots = new AnnotationSetImpl(this);
578 fireAnnotationSetAdded(new DocumentEvent(
579 this, DocumentEvent.ANNOTATION_SET_ADDED, null));
580 } return defaultAnnots;
582 }
584
588 public AnnotationSet getAnnotations(String name) {
589 if(name == null) return getAnnotations();
590 if(namedAnnotSets == null)
591 namedAnnotSets = new HashMap();
592 AnnotationSet namedSet = (AnnotationSet) namedAnnotSets.get(name);
593
594 if(namedSet == null) {
595 namedSet = new AnnotationSetImpl(this, name);
596 namedAnnotSets.put(name, namedSet);
597
598 DocumentEvent evt = new DocumentEvent(
599 this, DocumentEvent.ANNOTATION_SET_ADDED, name
600 );
601 fireAnnotationSetAdded(evt);
602 }
603 return namedSet;
604 }
606
613 public void setMarkupAware(Boolean newMarkupAware) {
614 this.markupAware = newMarkupAware;
615 }
616
617
621 public Boolean getMarkupAware() { return markupAware; }
622
623
629 public String toXml(Set aSourceAnnotationSet){
630 return toXml(aSourceAnnotationSet, true);
631 }
632
633
648 public String toXml(Set aSourceAnnotationSet, boolean includeFeatures){
649
650 if(hasOriginalContentFeatures()) {
651 return saveAnnotationSetAsXmlInOrig(aSourceAnnotationSet,includeFeatures);
652 }
654 AnnotationSet originalMarkupsAnnotSet =
655 this.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
656
657 List dumpingList = new ArrayList(originalMarkupsAnnotSet.size());
661
662
670 StatusListener sListener = (StatusListener)
671 gate.gui.MainFrame.getListeners().
672 get("gate.event.StatusListener");
673 if(sListener != null)
677 sListener.statusChanged("Constructing the dumping annotation set.");
678 dumpingList.addAll(originalMarkupsAnnotSet);
680 if (aSourceAnnotationSet != null){
684 Iterator iter = aSourceAnnotationSet.iterator();
685 while (iter.hasNext()){
686 Annotation currentAnnot = (Annotation) iter.next();
687 if(insertsSafety(dumpingList,currentAnnot)){
688 dumpingList.add(currentAnnot);
690 }else if (crossedOverAnnotation != null && DEBUG){
691 try {
692 Out.prln("Warning: Annotations were found to violate the " +
693 "crossed over condition: \n" +
694 "1. [" +
695 getContent().getContent(
696 crossedOverAnnotation.getStartNode().getOffset(),
697 crossedOverAnnotation.getEndNode().getOffset()) +
698 " (" + crossedOverAnnotation.getType() + ": " +
699 crossedOverAnnotation.getStartNode().getOffset() +
700 ";" + crossedOverAnnotation.getEndNode().getOffset() +
701 ")]\n" +
702 "2. [" +
703 getContent().getContent(
704 currentAnnot.getStartNode().getOffset(),
705 currentAnnot.getEndNode().getOffset()) +
706 " (" + currentAnnot.getType() + ": " +
707 currentAnnot.getStartNode().getOffset() +
708 ";" + currentAnnot.getEndNode().getOffset() +
709 ")]\nThe second one will be discarded.\n" );
710 } catch (gate.util.InvalidOffsetException ex) {
711 throw new GateRuntimeException(ex.getMessage());
712 }
713 } } }
717 Collections.sort(dumpingList, new gate.util.OffsetComparator());
719
720 if(sListener != null) sListener.statusChanged("Dumping annotations as XML");
723 StringBuffer xmlDoc = new StringBuffer(
724 DOC_SIZE_MULTIPLICATION_FACTOR*(this.getContent().size().intValue()));
725
726 String mimeType = getFeatures() == null ?
728 null :
729 (String)getFeatures().get("MimeType");
730 boolean wasXML = mimeType != null && mimeType.equalsIgnoreCase("text/xml");
731
732 if(wasXML){
733 xmlDoc.append("<?xml version=\"1.0\" encoding=\"");
734 xmlDoc.append(getEncoding());
735 xmlDoc.append("\" ?>");
736 xmlDoc.append(Strings.getNl());
737 } theRootAnnotation = identifyTheRootAnnotation(dumpingList);
740 if (theRootAnnotation != null){
743 dumpingList.remove(theRootAnnotation);
744 xmlDoc.append(writeStartTag(theRootAnnotation,includeFeatures));
745 } xmlDoc.append(saveAnnotationSetAsXml(dumpingList, includeFeatures));
748 if (theRootAnnotation != null){
751 xmlDoc.append(writeEndTag(theRootAnnotation));
752 }
754 if(sListener != null) sListener.statusChanged("Done.");
755 return xmlDoc.toString();
756 }
758
766 private boolean insertsSafety(AnnotationSet aTargetAnnotSet,
767 Annotation aSourceAnnotation){
768
769 if (aTargetAnnotSet == null || aSourceAnnotation == null) {
770 this.crossedOverAnnotation = null;
771 return false;
772 }
773 if (aSourceAnnotation.getStartNode() == null ||
774 aSourceAnnotation.getStartNode().getOffset()== null) {
775 this.crossedOverAnnotation = null;
776 return false;
777 }
778 if (aSourceAnnotation.getEndNode() == null ||
779 aSourceAnnotation.getEndNode().getOffset()== null) {
780 this.crossedOverAnnotation = null;
781 return false;
782 }
783
784 Long start = aSourceAnnotation.getStartNode().getOffset();
786 Long end = aSourceAnnotation.getEndNode().getOffset();
787 long s2 = start.longValue();
789 long e2 = end.longValue();
790
791 AnnotationSet as = aTargetAnnotSet.get(start,end);
794
795 Iterator it = as.iterator();
798 while(it.hasNext()){
799 Annotation ann = (Annotation) it.next();
800 long s1 = ann.getStartNode().getOffset().longValue();
802 long e1 = ann.getEndNode().getOffset().longValue();
803
804 if (s1<s2 && s2<e1 && e1<e2) {
805 this.crossedOverAnnotation = ann;
806 return false;
807 }
808 if (s2<s1 && s1<e2 && e2<e1) {
809 this.crossedOverAnnotation = ann;
810 return false;
811 }
812 } return true;
814 }
816 private boolean insertsSafety(List aTargetAnnotList,
817 Annotation aSourceAnnotation){
818
819 if (aTargetAnnotList == null || aSourceAnnotation == null) {
820 this.crossedOverAnnotation = null;
821 return false;
822 }
823 if (aSourceAnnotation.getStartNode() == null ||
824 aSourceAnnotation.getStartNode().getOffset()== null) {
825 this.crossedOverAnnotation = null;
826 return false;
827 }
828 if (aSourceAnnotation.getEndNode() == null ||
829 aSourceAnnotation.getEndNode().getOffset()== null) {
830 this.crossedOverAnnotation = null;
831 return false;
832 }
833
834 Long start = aSourceAnnotation.getStartNode().getOffset();
836 Long end = aSourceAnnotation.getEndNode().getOffset();
837 long s2 = start.longValue();
839 long e2 = end.longValue();
840
841 List as = new ArrayList();
844 for (int i=0; i < aTargetAnnotList.size(); i++) {
845 Annotation annot = (Annotation) aTargetAnnotList.get(i);
846 if (annot.getStartNode().getOffset().longValue() >= s2
847 &&
848 annot.getStartNode().getOffset().longValue() <= e2)
849 as.add(annot);
850 else if (annot.getEndNode().getOffset().longValue() >= s2
851 &&
852 annot.getEndNode().getOffset().longValue() <= e2)
853 as.add(annot);
854 }
855
856 Iterator it = as.iterator();
859 while(it.hasNext()){
860 Annotation ann = (Annotation) it.next();
861 long s1 = ann.getStartNode().getOffset().longValue();
863 long e1 = ann.getEndNode().getOffset().longValue();
864
865 if (s1<s2 && s2<e1 && e1<e2) {
866 this.crossedOverAnnotation = ann;
867 return false;
868 }
869 if (s2<s1 && s1<e2 && e2<e1) {
870 this.crossedOverAnnotation = ann;
871 return false;
872 }
873 } return true;
875 }
877
887 private String saveAnnotationSetAsXml(AnnotationSet aDumpAnnotSet,
888 boolean includeFeatures){
889 String content = null;
890 if (this.getContent()== null)
891 content = new String("");
892 else
893 content = this.getContent().toString();
894 StringBuffer docContStrBuff = filterNonXmlChars(new StringBuffer(content));
895 if (aDumpAnnotSet == null) return docContStrBuff.toString();
896
897 TreeMap offsets2CharsMap = new TreeMap();
898 if (this.getContent().size().longValue() != 0){
899 buildEntityMapFromString(content,offsets2CharsMap);
902 }
909 TreeSet offsets = new TreeSet();
911 Iterator iter = aDumpAnnotSet.iterator();
912 while (iter.hasNext()){
913 Annotation annot = (Annotation) iter.next();
914 offsets.add(annot.getStartNode().getOffset());
915 offsets.add(annot.getEndNode().getOffset());
916 }
918 while (!offsets.isEmpty()){
922 Long offset = (Long)offsets.last();
923 offsets.remove(offset);
925 List annotations = getAnnotationsForOffset(aDumpAnnotSet,offset);
929 StringBuffer tmpBuff = new StringBuffer(
932 DOC_SIZE_MULTIPLICATION_FACTOR*(this.getContent().size().intValue()));
933 Stack stack = new Stack();
934 Iterator it = annotations.iterator();
936 while(it.hasNext()){
937 Annotation a = (Annotation) it.next();
938 it.remove();
939 if ( offset.equals(a.getEndNode().getOffset()) ){
941 if ( offset.equals(a.getStartNode().getOffset()) ){
943 if ( null != a.getFeatures().get("isEmptyAndSpan") &&
945 "true".equals((String)a.getFeatures().get("isEmptyAndSpan"))){
946
947 tmpBuff.append(writeStartTag(a, includeFeatures));
949 stack.push(a);
950 }else{
951 tmpBuff.append(writeEmptyTag(a));
953 aDumpAnnotSet.remove(a);
955 } }else{
957 if (!stack.isEmpty()){
960 while(!stack.isEmpty()){
961 Annotation a1 = (Annotation)stack.pop();
962 tmpBuff.append(writeEndTag(a1));
963 } } tmpBuff.append(writeEndTag(a));
966 } }else{
968 if ( offset.equals(a.getStartNode().getOffset()) ){
971 if (!stack.isEmpty()){
974 while(!stack.isEmpty()){
975 Annotation a1 = (Annotation)stack.pop();
976 tmpBuff.append(writeEndTag(a1));
977 } } tmpBuff.append(writeStartTag(a, includeFeatures));
980 aDumpAnnotSet.remove(a);
982 } } }
986 if (!stack.isEmpty()){
988 while(!stack.isEmpty()){
989 Annotation a1 = (Annotation)stack.pop();
990 tmpBuff.append(writeEndTag(a1));
991 } }
994 if (!offsets2CharsMap.isEmpty()){
998 Long offsChar = (Long) offsets2CharsMap.lastKey();
999 while( !offsets2CharsMap.isEmpty() &&
1000 offsChar.intValue() >= offset.intValue()){
1001 docContStrBuff.replace(offsChar.intValue(),offsChar.intValue()+1,
1004 (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
1005 offsets2CharsMap.remove(offsChar);
1007 if (!offsets2CharsMap.isEmpty())
1009 offsChar = (Long) offsets2CharsMap.lastKey();
1010 } } docContStrBuff.insert(offset.intValue(),tmpBuff.toString());
1014 } while (!offsets2CharsMap.isEmpty()){
1019 Long offsChar = (Long) offsets2CharsMap.lastKey();
1020 docContStrBuff.replace(offsChar.intValue(),offsChar.intValue()+1,
1022 (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
1023 offsets2CharsMap.remove(offsChar);
1025 } return docContStrBuff.toString();
1027 }
1029 private String saveAnnotationSetAsXml(List aDumpAnnotList,
1030 boolean includeFeatures){
1031 String content = null;
1032 if (this.getContent()== null)
1033 content = new String("");
1034 else
1035 content = this.getContent().toString();
1036 StringBuffer docContStrBuff = filterNonXmlChars(new StringBuffer(content));
1037 if (aDumpAnnotList == null) return docContStrBuff.toString();
1038
1039 StringBuffer resultStrBuff = new StringBuffer(
1040 DOC_SIZE_MULTIPLICATION_FACTOR*(this.getContent().size().intValue()));
1041 Long lastOffset = new Long(0);
1043
1044 TreeMap offsets2CharsMap = new TreeMap();
1045 HashMap annotsForOffset = new HashMap(100);
1046 if (this.getContent().size().longValue() != 0){
1047 buildEntityMapFromString(content,offsets2CharsMap);
1050 }
1057 TreeSet offsets = new TreeSet();
1059 Iterator iter = aDumpAnnotList.iterator();
1060 Annotation annot;
1061 Long start;
1062 Long end;
1063 while (iter.hasNext()){
1064 annot = (Annotation) iter.next();
1065 start = annot.getStartNode().getOffset();
1066 end = annot.getEndNode().getOffset();
1067 offsets.add(start);
1068 offsets.add(end);
1069 if (annotsForOffset.containsKey(start)) {
1070 ((List) annotsForOffset.get(start)).add(annot);
1071 } else {
1072 List newList = new ArrayList(10);
1073 newList.add(annot);
1074 annotsForOffset.put(start, newList);
1075 }
1076 if (annotsForOffset.containsKey(end)) {
1077 ((List) annotsForOffset.get(end)).add(annot);
1078 } else {
1079 List newList = new ArrayList(10);
1080 newList.add(annot);
1081 annotsForOffset.put(end, newList);
1082 }
1083 }
1085 Iterator offsetIt = offsets.iterator();
1089 Long offset;
1090 List annotations;
1091 StringBuffer tmpBuff = new StringBuffer(255);
1093 Stack stack = new Stack();
1094 while (offsetIt.hasNext()){
1095 offset = (Long)offsetIt.next();
1096 annotations = (List) annotsForOffset.get(offset);
1100 annotations = getAnnotationsForOffset(annotations, offset);
1102 tmpBuff.setLength(0);
1104 stack.clear();
1105
1106 Iterator it = annotations.iterator();
1108 Annotation a;
1109 Annotation annStack;
1110 while(it.hasNext()){
1111 a = (Annotation) it.next();
1112 if ( offset.equals(a.getEndNode().getOffset()) ){
1114 if ( offset.equals(a.getStartNode().getOffset()) ){
1116 if ( null != a.getFeatures().get("isEmptyAndSpan") &&
1118 "true".equals((String)a.getFeatures().get("isEmptyAndSpan"))){
1119
1120 tmpBuff.append(writeStartTag(a, includeFeatures));
1122 stack.push(a);
1123 }else{
1124 tmpBuff.append(writeEmptyTag(a));
1126 aDumpAnnotList.remove(a);
1128 } }else{
1130 if (!stack.isEmpty()){
1133 while(!stack.isEmpty()){
1134 annStack = (Annotation)stack.pop();
1135 tmpBuff.append(writeEndTag(annStack));
1136 } } tmpBuff.append(writeEndTag(a));
1139 } }else{
1141 if ( offset.equals(a.getStartNode().getOffset()) ){
1144 if (!stack.isEmpty()){
1147 while(!stack.isEmpty()){
1148 annStack = (Annotation)stack.pop();
1149 tmpBuff.append(writeEndTag(annStack));
1150 } } tmpBuff.append(writeStartTag(a, includeFeatures));
1153 } } }
1158 if (!stack.isEmpty()){
1160 while(!stack.isEmpty()){
1161 annStack = (Annotation)stack.pop();
1162 tmpBuff.append(writeEndTag(annStack));
1163 } }
1166 StringBuffer partText = new StringBuffer();
1168 SortedMap offsetsInRange =
1169 offsets2CharsMap.subMap(lastOffset, offset);
1170 Long tmpOffset;
1171 Long tmpLastOffset = lastOffset;
1172 String replacement;
1173
1174 if(!offsetsInRange.isEmpty()) {
1177 tmpOffset = (Long) offsetsInRange.firstKey();
1178 replacement =
1179 (String)entitiesMap.get((Character)offsets2CharsMap.get(tmpOffset));
1180 partText.append(docContStrBuff.substring(tmpLastOffset.intValue(),
1181 tmpOffset.intValue()));
1182 partText.append(replacement);
1183 tmpLastOffset = new Long(tmpOffset.longValue()+1);
1184 }
1185 partText.append(docContStrBuff.substring(tmpLastOffset.intValue(),
1186 offset.intValue()));
1187 resultStrBuff.append(partText);
1188 resultStrBuff.append(tmpBuff.toString());
1190 lastOffset = offset;
1191 }
1193 StringBuffer partText = new StringBuffer();
1196 SortedMap offsetsInRange =
1197 offsets2CharsMap.subMap(lastOffset, new Long(docContStrBuff.length()));
1198 Long tmpOffset;
1199 Long tmpLastOffset = lastOffset;
1200 String replacement;
1201
1202 if(!offsetsInRange.isEmpty()) {
1206 tmpOffset = (Long) offsetsInRange.firstKey();
1207 replacement =
1208 (String)entitiesMap.get((Character)offsets2CharsMap.get(tmpOffset));
1209 partText.append(docContStrBuff.substring(tmpLastOffset.intValue(),
1210 tmpOffset.intValue()));
1211 partText.append(replacement);
1212 tmpLastOffset = new Long(tmpOffset.longValue()+1);
1213 }
1214 partText.append(docContStrBuff.substring(tmpLastOffset.intValue(),
1215 docContStrBuff.length()));
1216 resultStrBuff.append(partText);
1217
1218 return resultStrBuff.toString();
1219 }
1221
1382
1383
1387 private boolean hasOriginalContentFeatures() {
1388 FeatureMap features = getFeatures();
1389 boolean result = false;
1390
1391 result =
1392 (features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME) != null)
1393 &&
1394 (features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME)
1395 != null);
1396
1397 return result;
1398 }
1400
1410 private String saveAnnotationSetAsXmlInOrig(Set aSourceAnnotationSet,
1411 boolean includeFeatures){
1412 StringBuffer docContStrBuff;
1413
1414 String origContent;
1415
1416 origContent =
1417 (String)features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
1418 if(origContent == null) {
1419 origContent = "";
1420 }
1422 long originalContentSize = origContent.length();
1423
1424 RepositioningInfo repositioning = (RepositioningInfo)
1425 getFeatures().get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME);
1426
1427 docContStrBuff = new StringBuffer(origContent);
1428 if (aSourceAnnotationSet == null) return docContStrBuff.toString();
1429
1430 StatusListener sListener = (StatusListener)
1431 gate.gui.MainFrame.getListeners().
1432 get("gate.event.StatusListener");
1433
1434 AnnotationSet originalMarkupsAnnotSet =
1435 this.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
1436 AnnotationSet dumpingSet = new AnnotationSetImpl((Document) this);
1439 if(sListener != null)
1440 sListener.statusChanged("Constructing the dumping annotation set.");
1441 if (aSourceAnnotationSet != null){
1445 Iterator iter = aSourceAnnotationSet.iterator();
1446 Annotation currentAnnot;
1447 while (iter.hasNext()){
1448 currentAnnot = (Annotation) iter.next();
1449 if(insertsSafety(originalMarkupsAnnotSet, currentAnnot)
1450 && insertsSafety(dumpingSet, currentAnnot)){
1451 dumpingSet.add(currentAnnot);
1452 }else{
1453 Out.prln("Warning: Annotation with ID=" + currentAnnot.getId() +
1454 ", startOffset=" + currentAnnot.getStartNode().getOffset() +
1455 ", endOffset=" + currentAnnot.getEndNode().getOffset() +
1456 ", type=" + currentAnnot.getType()+ " was found to violate the" +
1457 " crossed over condition. It will be discarded");
1458 } } }
1462 if(sListener != null) sListener.statusChanged("Dumping annotations as XML");
1465
1466
1471 TreeSet offsets = new TreeSet();
1473 Iterator iter = aSourceAnnotationSet.iterator();
1474 while (iter.hasNext()){
1475 Annotation annot = (Annotation) iter.next();
1476 offsets.add(annot.getStartNode().getOffset());
1477 offsets.add(annot.getEndNode().getOffset());
1478 }
1480 while (!offsets.isEmpty()){
1484 Long offset = (Long)offsets.last();
1485 offsets.remove(offset);
1487 List annotations = getAnnotationsForOffset(aSourceAnnotationSet,offset);
1491 StringBuffer tmpBuff = new StringBuffer("");
1493 Stack stack = new Stack();
1494 Iterator it = annotations.iterator();
1496 Annotation a = null;
1497 while(it.hasNext()) {
1498 a = (Annotation) it.next();
1499 it.remove();
1500 if ( offset.equals(a.getEndNode().getOffset()) ){
1502 if ( offset.equals(a.getStartNode().getOffset()) ){
1504 if ( null != a.getFeatures().get("isEmptyAndSpan") &&
1506 "true".equals((String)a.getFeatures().get("isEmptyAndSpan"))){
1507
1508 tmpBuff.append(writeStartTag(a, includeFeatures, false));
1510 stack.push(a);
1511 }else{
1512 tmpBuff.append(writeEmptyTag(a, false));
1514 aSourceAnnotationSet.remove(a);
1516 } }else{
1518 while(!stack.isEmpty()){
1521 Annotation a1 = (Annotation)stack.pop();
1522 tmpBuff.append(writeEndTag(a1));
1523 } tmpBuff.append(writeEndTag(a));
1525 } }else{
1527 if ( offset.equals(a.getStartNode().getOffset()) ){
1530 while(!stack.isEmpty()){
1533 Annotation a1 = (Annotation)stack.pop();
1534 tmpBuff.append(writeEndTag(a1));
1535 }
1537 tmpBuff.append(writeStartTag(a, includeFeatures, false));
1538 aSourceAnnotationSet.remove(a);
1540 } } }
1544 while(!stack.isEmpty()){
1546 Annotation a1 = (Annotation)stack.pop();
1547 tmpBuff.append(writeEndTag(a1));
1548 }
1550 long originalPosition = -1;
1551 boolean backPositioning =
1552 a != null && offset.equals(a.getEndNode().getOffset());
1553 if ( backPositioning ) {
1554 originalPosition =
1556 repositioning.getOriginalPos(offset.intValue(), true);
1557 }
1559 if(originalPosition == -1) {
1560 originalPosition = repositioning.getOriginalPos(offset.intValue());
1561 }
1563 if(originalPosition != -1 && originalPosition <= originalContentSize ) {
1565 docContStrBuff.insert((int) originalPosition, tmpBuff.toString());
1566 }
1567 else {
1568 Out.prln("Error in the repositioning. The offset ("+offset.intValue()
1569 +") could not be positioned in the original document. \n"
1570 +"Calculated position is: "+originalPosition
1571 +" placed back: "+backPositioning);
1572 }
1574 } if (theRootAnnotation != null)
1576 docContStrBuff.append(writeEndTag(theRootAnnotation));
1577 return docContStrBuff.toString();
1578 }
1580
1589 private List getAnnotationsForOffset(Set aDumpAnnotSet, Long offset){
1590 List annotationList = new LinkedList();
1591 if (aDumpAnnotSet == null || offset == null) return annotationList;
1592 Set annotThatStartAtOffset = new TreeSet(
1593 new AnnotationComparator(ORDER_ON_END_OFFSET,DESC));
1594 Set annotThatEndAtOffset = new TreeSet(
1595 new AnnotationComparator(ORDER_ON_START_OFFSET,DESC));
1596 Set annotThatStartAndEndAtOffset = new TreeSet(
1597 new AnnotationComparator(ORDER_ON_ANNOT_ID,ASC));
1598
1599 Iterator iter = aDumpAnnotSet.iterator();
1602 while(iter.hasNext()){
1603 Annotation ann = (Annotation) iter.next();
1604 if (offset.equals(ann.getStartNode().getOffset())){
1605 if (offset.equals(ann.getEndNode().getOffset()))
1606 annotThatStartAndEndAtOffset.add(ann);
1607 else
1608 annotThatStartAtOffset.add(ann);
1609 }else{
1610 if (offset.equals(ann.getEndNode().getOffset()))
1611 annotThatEndAtOffset.add(ann);
1612 } } annotationList.addAll(annotThatEndAtOffset);
1615 annotThatEndAtOffset = null;
1616 annotationList.addAll(annotThatStartAtOffset);
1617 annotThatStartAtOffset = null;
1618 iter = annotThatStartAndEndAtOffset.iterator();
1619 while(iter.hasNext()){
1620 Annotation ann = (Annotation) iter.next();
1621 Iterator it = annotationList.iterator();
1622 boolean breaked = false;
1623 while (it.hasNext()){
1624 Annotation annFromList = (Annotation) it.next();
1625 if (annFromList.getId().intValue() > ann.getId().intValue()){
1626 annotationList.add(annotationList.indexOf(annFromList),ann);
1627 breaked = true;
1628 break;
1629 } } if (!breaked)
1632 annotationList.add(ann);
1633 iter.remove();
1634 } return annotationList;
1636 }
1638 private List getAnnotationsForOffset(List aDumpAnnotList, Long offset){
1639 List annotationList = new ArrayList();
1640 if (aDumpAnnotList == null || offset == null) return annotationList;
1641 Set annotThatStartAtOffset;
1642 Set annotThatEndAtOffset;
1643 Set annotThatStartAndEndAtOffset;
1644 annotThatStartAtOffset = new TreeSet(
1645 new AnnotationComparator(ORDER_ON_END_OFFSET, DESC));
1646 annotThatEndAtOffset = new TreeSet(
1647 new AnnotationComparator(ORDER_ON_START_OFFSET, DESC));
1648 annotThatStartAndEndAtOffset = new TreeSet(
1649 new AnnotationComparator(ORDER_ON_ANNOT_ID, ASC));
1650
1651 Iterator iter = aDumpAnnotList.iterator();
1654 while(iter.hasNext()){
1655 Annotation ann = (Annotation) iter.next();
1656 if (offset.equals(ann.getStartNode().getOffset())){
1657 if (offset.equals(ann.getEndNode().getOffset()))
1658 annotThatStartAndEndAtOffset.add(ann);
1659 else
1660 annotThatStartAtOffset.add(ann);
1661 }else{
1662 if (offset.equals(ann.getEndNode().getOffset()))
1663 annotThatEndAtOffset.add(ann);
1664 } }
1667 annotationList.addAll(annotThatEndAtOffset);
1668 annotationList.addAll(annotThatStartAtOffset);
1669 annotThatEndAtOffset = null;
1670 annotThatStartAtOffset = null;
1671
1672 iter = annotThatStartAndEndAtOffset.iterator();
1673 while(iter.hasNext()){
1674 Annotation ann = (Annotation) iter.next();
1675 Iterator it = annotationList.iterator();
1676 boolean breaked = false;
1677 while (it.hasNext()){
1678 Annotation annFromList = (Annotation) it.next();
1679 if (annFromList.getId().intValue() > ann.getId().intValue()){
1680 annotationList.add(annotationList.indexOf(annFromList),ann);
1681 breaked = true;
1682 break;
1683 } } if (!breaked)
1686 annotationList.add(ann);
1687 iter.remove();
1688 } return annotationList;
1690 }
1692 private String writeStartTag(Annotation annot, boolean includeFeatures){
1693 return writeStartTag(annot, includeFeatures, true);
1694 }
1696
1697 private String writeStartTag(Annotation annot, boolean includeFeatures,
1698 boolean includeNamespace){
1699 AnnotationSet originalMarkupsAnnotSet =
1700 this.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
1701
1702 StringBuffer strBuff = new StringBuffer("");
1703 if (annot == null) return strBuff.toString();
1704 if (theRootAnnotation != null && annot.getId().equals(theRootAnnotation.getId())){
1706 if (includeFeatures) {
1710 strBuff.append("<");
1711 strBuff.append(annot.getType());
1712 strBuff.append(" ");
1713 if(includeNamespace) {
1714 strBuff.append(" xmlns:gate=\"http://www.gate.ac.uk\"");
1715 strBuff.append(" gate:");
1716 }
1717 strBuff.append("gateId=\"");
1718 strBuff.append(annot.getId());
1719 strBuff.append("\"");
1720 strBuff.append(" ");
1721 if(includeNamespace) {
1722 strBuff.append("gate:");
1723 }
1724 strBuff.append("annotMaxId=\"");
1725 strBuff.append(nextAnnotationId);
1726 strBuff.append("\"");
1727 strBuff.append(writeFeatures(annot.getFeatures(), includeNamespace));
1728 strBuff.append(">");
1729 }
1730 else if (originalMarkupsAnnotSet.contains(annot)) {
1731 strBuff.append("<");
1732 strBuff.append(annot.getType());
1733 strBuff.append(writeFeatures(annot.getFeatures(), includeNamespace));
1734 strBuff.append(">");
1735 }
1736 else {
1737 strBuff.append("<");
1738 strBuff.append(annot.getType());
1739 strBuff.append(">");
1740 }
1741
1742 }else{
1743 if (includeFeatures) {
1747 strBuff.append("<");
1748 strBuff.append(annot.getType());
1749 strBuff.append(" ");
1750 if(includeNamespace) {
1751 strBuff.append("gate:");
1752 } strBuff.append("gateId=\"");
1754 strBuff.append(annot.getId());
1755 strBuff.append("\"");
1756 strBuff.append(writeFeatures(annot.getFeatures(), includeNamespace));
1757 strBuff.append(">");
1758 }
1759 else if (originalMarkupsAnnotSet.contains(annot)) {
1760 strBuff.append("<");
1761 strBuff.append(annot.getType());
1762 strBuff.append(writeFeatures(annot.getFeatures(), includeNamespace));
1763 strBuff.append(">");
1764 }
1765 else {
1766 strBuff.append("<");
1767 strBuff.append(annot.getType());
1768 strBuff.append(">");
1769 }
1770 } return strBuff.toString();
1772 }
1774
1784 private Annotation identifyTheRootAnnotation(AnnotationSet anAnnotationSet){
1785 if (anAnnotationSet == null) return null;
1786 Node startNode = anAnnotationSet.firstNode();
1789 Node endNode = anAnnotationSet.lastNode();
1790 if (startNode.getOffset().longValue() != 0) return null;
1795 Annotation theRootAnnotation = null;
1797 long start = startNode.getOffset().longValue();
1801 long end = endNode.getOffset().longValue();
1802 for(Iterator it = anAnnotationSet.iterator(); it.hasNext();){
1803 Annotation currentAnnot = (Annotation) it.next();
1804 if (
1807 (start == currentAnnot.getStartNode().getOffset().longValue()) &&
1808 (end == currentAnnot.getEndNode().getOffset().longValue())
1809 ){
1810 if (theRootAnnotation == null)
1812 theRootAnnotation = currentAnnot;
1813 else{
1814 if ( theRootAnnotation.getId().intValue() > currentAnnot.getId().intValue())
1816 theRootAnnotation = currentAnnot;
1817 } } } return theRootAnnotation;
1821 }
1823 private Annotation identifyTheRootAnnotation(List anAnnotationList){
1824 if (anAnnotationList == null || anAnnotationList.isEmpty()) return null;
1825 if(((Annotation)anAnnotationList.get(0)).
1828 getStartNode().getOffset().longValue() > 0) return null;
1829
1830 if (anAnnotationList.size() == 1){
1833 Annotation onlyAnn = (Annotation) anAnnotationList.get(0);
1834 if ( onlyAnn.getEndNode().getOffset().equals( content.size() ) ) return onlyAnn;
1835 return null;
1836 }
1837
1838 long start = 0; long end = 0; for(int i = 0; i < anAnnotationList.size(); i++){
1842 Annotation anAnnotation = (Annotation)anAnnotationList.get(i);
1843 long localEnd = anAnnotation.getEndNode().getOffset().longValue();
1844 if(localEnd > end) end = localEnd;
1845 }
1846
1847 Annotation theRootAnnotation = null;
1851 for(int i = 0; i < anAnnotationList.size(); i++){
1852 Annotation currentAnnot = (Annotation) anAnnotationList.get(i);
1853 long localStart = currentAnnot.getStartNode().getOffset().longValue();
1854 long localEnd = currentAnnot.getEndNode().getOffset().longValue();
1855 if (
1858 (start == localStart) && (end == localEnd)){
1859 if (theRootAnnotation == null) theRootAnnotation = currentAnnot;
1861 else{
1862 if (theRootAnnotation.getId().intValue() > currentAnnot.getId().intValue())
1864 theRootAnnotation = currentAnnot;
1865 } } } return theRootAnnotation;
1869 }
1871
1872
1877 private void buildEntityMapFromString(String aScanString, TreeMap aMapToFill){
1878 if (aScanString == null || aMapToFill == null) return;
1879 if (entitiesMap == null || entitiesMap.isEmpty()){
1880 Err.prln("WARNING: Entities map was not initialised !");
1881 return;
1882 } Iterator entitiesMapIterator = entitiesMap.keySet().iterator();
1885 Character c;
1886 int fromIndex;
1887 while(entitiesMapIterator.hasNext()){
1888 c = (Character) entitiesMapIterator.next();
1889 fromIndex = 0;
1890 while (-1 != fromIndex){
1891 fromIndex = aScanString.indexOf(c.charValue(),fromIndex);
1892 if (-1 != fromIndex){
1893 aMapToFill.put(new Long(fromIndex),c);
1894 fromIndex ++;
1895 } } } }
1900 private String writeEmptyTag(Annotation annot){
1901 return writeEmptyTag(annot, true);
1902 }
1904
1905 private String writeEmptyTag(Annotation annot, boolean includeNamespace){
1906 StringBuffer strBuff = new StringBuffer("");
1907 if (annot == null) return strBuff.toString();
1908
1909 strBuff.append("<");
1910 strBuff.append(annot.getType());
1911
1912 AnnotationSet originalMarkupsAnnotSet =
1913 this.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
1914 if (! originalMarkupsAnnotSet.contains(annot)) {
1915 strBuff.append(" gateId=\"");
1916 strBuff.append(annot.getId());
1917 strBuff.append("\"");
1918 }
1919 strBuff.append(writeFeatures(annot.getFeatures(),includeNamespace));
1920 strBuff.append("/>");
1921
1922 return strBuff.toString();
1923 }
1925
1926 private String writeEndTag(Annotation annot){
1927 StringBuffer strBuff = new StringBuffer("");
1928 if (annot == null) return strBuff.toString();
1929
1934 strBuff.append("</"+annot.getType()+">");
1935
1936 return strBuff.toString();
1937 }
1939
1940 private String writeFeatures(FeatureMap feat, boolean includeNamespace){
1941 StringBuffer strBuff = new StringBuffer("");
1942 if (feat == null) return strBuff.toString();
1943 Iterator it = feat.keySet().iterator();
1944 while (it.hasNext()){
1945 Object key = it.next();
1946 Object value = feat.get(key);
1947 if ( (key != null) && (value != null) ){
1948 if ("isEmptyAndSpan".equals(key.toString()))
1951 continue;
1952 if( !(String.class.isAssignableFrom(key.getClass()) ||
1953 Number.class.isAssignableFrom(key.getClass()))){
1954
1955 Out.prln("Warning:Found a feature NAME("+key+") that doesn't came"+
1956 " from String or Number.(feature discarded)");
1957 continue;
1958 } if ( !(String.class.isAssignableFrom(value.getClass()) ||
1960 Number.class.isAssignableFrom(value.getClass()) ||
1961 java.util.Collection.class.isAssignableFrom(value.getClass()))){
1962
1963 Out.prln("Warning:Found a feature VALUE("+value+") that doesn't came"+
1964 " from String, Number or Collection.(feature discarded)");
1965 continue;
1966 } if ("matches".equals(key)) {
1968 strBuff.append(" ");
1969 if(includeNamespace) {
1970 strBuff.append("gate:");
1971 }
1972 strBuff.append(
1975 filterNonXmlChars(replaceCharsWithEntities(key.toString())));
1976 strBuff.append("=\"");
1977 }
1978 else {
1979 strBuff.append(" ");
1980 strBuff.append(
1983 filterNonXmlChars(replaceCharsWithEntities(key.toString())));
1984 strBuff.append("=\"");
1985 }
1986 if (java.util.Collection.class.isAssignableFrom(value.getClass())){
1987 Iterator valueIter = ((Collection)value).iterator();
1988 while(valueIter.hasNext()){
1989 Object item = valueIter.next();
1990 if (!(String.class.isAssignableFrom(item.getClass()) ||
1991 Number.class.isAssignableFrom(item.getClass())))
1992 continue;
1993 strBuff.append(
1996 filterNonXmlChars(replaceCharsWithEntities(item.toString())));
1997 strBuff.append(";");
1998 } if (strBuff.charAt(strBuff.length()-1) == ';')
2000 strBuff.deleteCharAt(strBuff.length()-1);
2001 }else{
2002 strBuff.append(
2005 filterNonXmlChars(replaceCharsWithEntities(value.toString())));
2006 } strBuff.append("\"");
2008 } } return strBuff.toString();
2011 }
2013
2018 public String toXml(){
2019 StringBuffer xmlContent = new StringBuffer(
2023 DOC_SIZE_MULTIPLICATION_FACTOR*(getContent().size().intValue()));
2024 xmlContent.append("<?xml version=\"1.0\" encoding=\"");
2026 xmlContent.append(getEncoding());
2027 xmlContent.append("\" ?>");
2028 xmlContent.append(Strings.getNl());
2029
2030 xmlContent.append("<GateDocument>\n");
2032 xmlContent.append("<!-- The document's features-->\n\n");
2033 xmlContent.append("<GateDocumentFeatures>\n");
2034
2035 xmlContent.append(featuresToXml(this.getFeatures()));
2036 xmlContent.append("</GateDocumentFeatures>\n");
2037 xmlContent.append("<!-- The document content area with serialized"+
2038 " nodes -->\n\n");
2039 xmlContent.append("<TextWithNodes>");
2041 xmlContent.append(textWithNodes(this.getContent().toString()));
2042 xmlContent.append("</TextWithNodes>\n");
2043 StatusListener sListener = (StatusListener)
2046 gate.gui.MainFrame.getListeners().
2047 get("gate.event.StatusListener");
2048 if(sListener != null)
2049 sListener.statusChanged("Saving the default annotation set ");
2050 xmlContent.append("<!-- The default annotation set -->\n\n");
2051 xmlContent.append(annotationSetToXml(this.getAnnotations()));
2052 if (namedAnnotSets != null){
2055 Iterator iter = namedAnnotSets.values().iterator();
2056 while(iter.hasNext()){
2057 AnnotationSet annotSet = (AnnotationSet) iter.next();
2058 xmlContent.append("<!-- Named annotation set -->\n\n");
2059 if(sListener != null) sListener.statusChanged("Saving " +
2061 annotSet.getName()+
2062 " annotation set ");
2063 xmlContent.append(annotationSetToXml(annotSet));
2064 } } xmlContent.append("</GateDocument>");
2068 if(sListener != null) sListener.statusChanged("Done !");
2069 return xmlContent.toString();
2071 }
2073
2081 private StringBuffer filterNonXmlChars(StringBuffer aStrBuffer){
2082 if (aStrBuffer == null) return new StringBuffer("");
2083 char space = ' ';
2085 for (int i=aStrBuffer.length()-1;i>=0; i--){
2086 if (!isXmlChar(aStrBuffer.charAt(i)))
2087 aStrBuffer.setCharAt(i, space);
2088 } return aStrBuffer;
2090 }
2092
2096 public static boolean isXmlChar(char ch){
2097 if (ch == 0x9 || ch == 0xA || ch ==0xD) return true;
2098 if ((0x20 <= ch) && (ch <= 0xD7FF)) return true;
2099 if ((0xE000 <= ch) && (ch <= 0xFFFD)) return true;
2100 if ((0x10000 <= ch) && (ch <= 0x10FFFF)) return true;
2101 return false;
2102 }
2104
2109 private String featuresToXml(FeatureMap aFeatureMap){
2110 StringBuffer str = new StringBuffer("");
2111
2112 if (aFeatureMap == null) return str.toString();
2113
2114 Set keySet = aFeatureMap.keySet();
2115 Iterator keyIterator = keySet.iterator();
2116 while(keyIterator.hasNext()){
2117 Object key = keyIterator.next();
2118 Object value = aFeatureMap.get(key);
2119 if ((key != null) && (value != null)){
2120 String keyClassName = null;
2121 String keyItemClassName = null;
2122 String valueClassName = null;
2123 String valueItemClassName = null;
2124 String key2String = key.toString();
2125 String value2String = value.toString();
2126
2127 Object item = null;
2128 if (key instanceof java.lang.String ||
2130 key instanceof java.lang.Number ||
2131 key instanceof java.util.Collection)
2132 keyClassName = key.getClass().getName();
2133
2134 if (value instanceof java.lang.String ||
2136 value instanceof java.lang.Number ||
2137 value instanceof java.util.Collection)
2138 valueClassName = value.getClass().getName();
2139
2140 if (keyClassName == null || valueClassName == null) continue;
2143
2144 if (key instanceof java.util.Collection){
2146 StringBuffer keyStrBuff = new StringBuffer("");
2147 Iterator iter = ((Collection) key).iterator();
2148 if (iter.hasNext()){
2149 item = iter.next();
2150 if (item instanceof java.lang.Number)
2151 keyItemClassName = item.getClass().getName();
2152 else
2153 keyItemClassName = String.class.getName();
2154 keyStrBuff.append(item.toString());
2155 } while (iter.hasNext()){
2157 item = iter.next();
2158 keyStrBuff.append(";" + item.toString());
2159 } key2String = keyStrBuff.toString();
2161 } if (value instanceof java.util.Collection){
2164 StringBuffer valueStrBuff = new StringBuffer("");
2165 Iterator iter = ((Collection) value).iterator();
2166 if (iter.hasNext()){
2167 item = iter.next();
2168 if (item instanceof java.lang.Number)
2169 valueItemClassName = item.getClass().getName();
2170 else
2171 valueItemClassName = String.class.getName();
2172 valueStrBuff.append(item.toString());
2173 } while (iter.hasNext()){
2175 item = iter.next();
2176 valueStrBuff.append(";" + item.toString());
2177 } value2String = valueStrBuff.toString();
2179 } str.append("<Feature>\n <Name");
2181 if (keyClassName != null)
2182 str.append(" className=\""+keyClassName+"\"");
2183 if (keyItemClassName != null)
2184 str.append(" itemClassName=\""+keyItemClassName+"\"");
2185 str.append(">");
2186 str.append(filterNonXmlChars(replaceCharsWithEntities(key2String)));
2187 str.append("</Name>\n <Value");
2188 if (valueClassName != null)
2189 str.append(" className=\"" + valueClassName + "\"");
2190 if (valueItemClassName != null)
2191 str.append(" itemClassName=\"" + valueItemClassName + "\"");
2192 str.append(">");
2193 str.append(filterNonXmlChars(replaceCharsWithEntities(value2String)));
2194 str.append("</Value>\n</Feature>\n");
2195 } } return str.toString();
2198 }
2200
2207 private StringBuffer replaceCharsWithEntities(String anInputString){
2208 if (anInputString == null) return new StringBuffer("");
2209 StringBuffer strBuff = new StringBuffer(anInputString);
2210 for (int i=strBuff.length()-1; i>=0; i--){
2211 Character ch = new Character(strBuff.charAt(i));
2212 if (entitiesMap.keySet().contains(ch)){
2213 strBuff.replace(i,i+1,(String) entitiesMap.get(ch));
2214 } } return strBuff;
2217 }
2219
2225 private String textWithNodes(String aText){
2226 if (aText == null) return new String("");
2227 StringBuffer textWithNodes = filterNonXmlChars(new StringBuffer(aText));
2228
2229 TreeMap offsets2CharsMap = new TreeMap();
2231 if (aText.length()!= 0){
2232 buildEntityMapFromString(aText,offsets2CharsMap);
2234 } TreeSet offsetsSet = new TreeSet();
2237 Iterator annotSetIter = this.getAnnotations().iterator();
2238 while (annotSetIter.hasNext()){
2239 Annotation annot = (Annotation) annotSetIter.next();
2240 offsetsSet.add(annot.getStartNode().getOffset());
2241 offsetsSet.add(annot.getEndNode().getOffset());
2242 } if (namedAnnotSets != null){
2245 Iterator iter = namedAnnotSets.values().iterator();
2246 while(iter.hasNext()){
2247 AnnotationSet annotSet = (AnnotationSet) iter.next();
2248 Iterator iter2 = annotSet.iterator();
2249 while(iter2.hasNext()){
2250 Annotation annotTmp = (Annotation) iter2.next();
2251 offsetsSet.add(annotTmp.getStartNode().getOffset());
2252 offsetsSet.add(annotTmp.getEndNode().getOffset());
2253 } } }
2259 if (offsetsSet.isEmpty()){
2260 return replaceCharsWithEntities(aText).toString();
2261 } while (!offsetsSet.isEmpty()){
2266 Long offset = (Long) offsetsSet.last();
2267 offsetsSet.remove(offset);
2269 int offsetValue = offset.intValue();
2271 String strNode = "<Node id=\"" + offsetValue + "\"/>";
2272 if (!offsets2CharsMap.isEmpty()){
2275 Long offsChar = (Long) offsets2CharsMap.lastKey();
2276 while( !offsets2CharsMap.isEmpty() &&
2277 offsChar.intValue() >= offset.intValue()){
2278 textWithNodes.replace(offsChar.intValue(),offsChar.intValue()+1,
2281 (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
2282 offsets2CharsMap.remove(offsChar);
2285 if (!offsets2CharsMap.isEmpty())
2287 offsChar = (Long) offsets2CharsMap.lastKey();
2288 } } textWithNodes.insert(offsetValue,strNode);
2292 } while (!offsets2CharsMap.isEmpty()){
2297 Long offsChar = (Long) offsets2CharsMap.lastKey();
2298 textWithNodes.replace(offsChar.intValue(),offsChar.intValue()+1,
2300 (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
2301 offsets2CharsMap.remove(offsChar);
2303 } return textWithNodes.toString();
2305 }
2307
2312 private String annotationSetToXml(AnnotationSet anAnnotationSet){
2313 StringBuffer str = new StringBuffer("");
2314
2315 if (anAnnotationSet == null){
2316 str.append("<AnnotationSet>\n");
2317 str.append("</AnnotationSet>\n");
2318 return str.toString();
2319 } if (anAnnotationSet.getName() == null)
2321 str.append("<AnnotationSet>\n");
2322 else str.append("<AnnotationSet Name=\"" + anAnnotationSet.getName()+
2323 "\" >\n");
2324 Iterator iterator = anAnnotationSet.iterator();
2326 while (iterator.hasNext()){
2327 Annotation annot = (Annotation) iterator.next();
2328 str.append("<Annotation " + "Id=\"" + annot.getId() +
2329 "\" Type=\"" + annot.getType() +
2330 "\" StartNode=\"" + annot.getStartNode().getOffset() +
2331 "\" EndNode=\"" + annot.getEndNode().getOffset() + "\">\n");
2332 str.append(featuresToXml(annot.getFeatures()));
2333 str.append("</Annotation>\n");
2334 }
2336 str.append("</AnnotationSet>\n");
2337 return str.toString();
2338 }
2340
2342 public Map getNamedAnnotationSets() {
2343 return namedAnnotSets;
2344 }
2346
2348 public Set getAnnotationSetNames(){
2349 return namedAnnotSets.keySet();
2350 }
2351
2352
2353
2358 public void removeAnnotationSet(String name){
2359 Object removed = namedAnnotSets.remove(name);
2360 if(removed != null){
2361 fireAnnotationSetRemoved(
2362 new DocumentEvent(this, DocumentEvent.ANNOTATION_SET_REMOVED, name));
2363 }
2364 }
2365
2366
2367 public void edit(Long start, Long end, DocumentContent replacement)
2368 throws InvalidOffsetException
2369 {
2370 if(! isValidOffsetRange(start, end))
2371 throw new InvalidOffsetException();
2372
2373 if(content != null)
2374 ((DocumentContentImpl) content).edit(start, end, replacement);
2375
2376 if(defaultAnnots != null)
2377 ((AnnotationSetImpl) defaultAnnots).edit(start, end, replacement);
2378
2379 if(namedAnnotSets != null) {
2380 Iterator iter = namedAnnotSets.values().iterator();
2381 while(iter.hasNext())
2382 ((AnnotationSetImpl) iter.next()).edit(start, end, replacement);
2383 }
2384 fireContentEdited(new DocumentEvent(this, DocumentEvent.CONTENT_EDITED,
2386 start, end));
2387 }
2389
2392 public boolean isValidOffset(Long offset) {
2393 if(offset == null)
2394 return false;
2395
2396 long o = offset.longValue();
2397 if(o > getContent().size().longValue() || o < 0)
2398 return false;
2399
2400 return true;
2401 }
2403
2407 public boolean isValidOffsetRange(Long start, Long end) {
2408 return
2409 isValidOffset(start) && isValidOffset(end) &&
2410 start.longValue() <= end.longValue();
2411 }
2413
2414 public void setNextAnnotationId(int aNextAnnotationId){
2415 nextAnnotationId = aNextAnnotationId;
2416 }
2418
2419 public Integer getNextAnnotationId() {
2420 return new Integer(nextAnnotationId++);
2421 }
2423
2424 public Integer getNextNodeId() { return new Integer(nextNodeId++); }
2425
2426
2427 public int compareTo(Object o) throws ClassCastException {
2428 DocumentImpl other = (DocumentImpl) o;
2429 return getOrderingString().compareTo(other.getOrderingString());
2430 }
2432
2435 protected String getOrderingString() {
2436 if(sourceUrl == null) return toString();
2437
2438 StringBuffer orderingString = new StringBuffer(sourceUrl.toString());
2439 if(sourceUrlStartOffset != null && sourceUrlEndOffset != null) {
2440 orderingString.append(sourceUrlStartOffset.toString());
2441 orderingString.append(sourceUrlEndOffset.toString());
2442 }
2443
2444 return orderingString.toString();
2445 }
2447
2448 protected int nextAnnotationId = 0;
2449
2450
2451 protected int nextNodeId = 0;
2452
2453 protected URL sourceUrl;
2454
2455
2456
2457
2458 protected DocumentContent content;
2459
2460
2461 protected String encoding = null;
2462
2463
2465
2469
2471
2474 private Annotation theRootAnnotation = null;
2475
2476
2480 private final int DOC_SIZE_MULTIPLICATION_FACTOR = 2;
2481
2482
2485 private final int ORDER_ON_START_OFFSET = 0;
2486
2489 private final int ORDER_ON_END_OFFSET = 1;
2490
2493 private final int ORDER_ON_ANNOT_ID = 2;
2494
2497 private final int ASC = 3;
2498
2501 private final int DESC = -3;
2502
2503
2506 private static Map entitiesMap = null;
2507 static{
2509 entitiesMap = new HashMap();
2510 entitiesMap.put(new Character('<'),"<");
2511 entitiesMap.put(new Character('>'),">");
2512 entitiesMap.put(new Character('&'),"&");
2513 entitiesMap.put(new Character('\''),"'");
2514 entitiesMap.put(new Character('"'),""");
2515 entitiesMap.put(new Character((char)160)," ");
2516 entitiesMap.put(new Character((char)169),"©");
2517 }
2519
2522
2524
2527 protected Long sourceUrlStartOffset;
2528
2529
2532 protected Long sourceUrlEndOffset;
2533
2534
2535 protected AnnotationSet defaultAnnots;
2536
2537
2538 protected Map namedAnnotSets;
2539
2540
2545 private String stringContent;
2546
2547
2555 public String getStringContent() { return stringContent; }
2556
2557
2565 public void setStringContent(String stringContent) {
2566 this.stringContent = stringContent;
2567 }
2569
2570 protected Boolean markupAware = new Boolean(false);
2571
2596
2597 public String toString() {
2598 String n = Strings.getNl();
2599 StringBuffer s = new StringBuffer("DocumentImpl: " + n);
2600 s.append(" content:" + content + n);
2601 s.append(" defaultAnnots:" + defaultAnnots + n);
2602 s.append(" encoding:" + encoding + n);
2603 s.append(" features:" + features + n);
2604 s.append(" markupAware:" + markupAware + n);
2605 s.append(" namedAnnotSets:" + namedAnnotSets + n);
2606 s.append(" nextAnnotationId:" + nextAnnotationId + n);
2607 s.append(" nextNodeId:" + nextNodeId + n);
2608 s.append(" sourceUrl:" + sourceUrl + n);
2609 s.append(" sourceUrlStartOffset:" + sourceUrlStartOffset + n);
2610 s.append(" sourceUrlEndOffset:" + sourceUrlEndOffset + n);
2611 s.append(n);
2612
2613 return s.toString();
2614 }
2616
2617 static final long serialVersionUID = -8456893608311510260L;
2618
2619
2620 class AnnotationComparator implements java.util.Comparator {
2621 int orderOn = -1;
2622 int orderType = ASC;
2623
2626 public AnnotationComparator(int anOrderOn, int anOrderType){
2627 orderOn = anOrderOn;
2628 orderType = anOrderType;
2629 }
2631
2632 public int compare(Object o1, Object o2){
2633 Annotation a1 = (Annotation) o1;
2634 Annotation a2 = (Annotation) o2;
2635 if (orderOn == ORDER_ON_START_OFFSET){
2637 int result = a1.getStartNode().getOffset().compareTo(
2638 a2.getStartNode().getOffset());
2639 if (orderType == ASC){
2640 if (result == 0)
2643 return a1.getId().compareTo(a2.getId());
2644 return result;
2645 }else{
2646 if (result == 0)
2648 return - (a1.getId().compareTo(a2.getId()));
2649 return -result;
2650 } }
2653 if (orderOn == ORDER_ON_END_OFFSET){
2655 int result = a1.getEndNode().getOffset().compareTo(
2656 a2.getEndNode().getOffset());
2657 if (orderType == ASC){
2658 if (result == 0)
2661 return - (a1.getId().compareTo(a2.getId()));
2662 return result;
2663 }else{
2664 if (result == 0)
2667 return a1.getId().compareTo(a2.getId());
2668 return - result;
2669 } }
2672 if (orderOn == ORDER_ON_ANNOT_ID){
2674 if (orderType == ASC)
2675 return a1.getId().compareTo(a2.getId());
2676 else
2677 return -(a1.getId().compareTo(a2.getId()));
2678 } return 0;
2680 } }
2683
2684 private transient Vector documentListeners;
2685 private transient Vector gateListeners;
2686
2687 public synchronized void removeDocumentListener(DocumentListener l) {
2688 if (documentListeners != null && documentListeners.contains(l)) {
2689 Vector v = (Vector) documentListeners.clone();
2690 v.removeElement(l);
2691 documentListeners = v;
2692 }
2693 }
2694 public synchronized void addDocumentListener(DocumentListener l) {
2695 Vector v = documentListeners == null ? new Vector(2) : (Vector) documentListeners.clone();
2696 if (!v.contains(l)) {
2697 v.addElement(l);
2698 documentListeners = v;
2699 }
2700 }
2701
2702 protected void fireAnnotationSetAdded(DocumentEvent e) {
2703 if (documentListeners != null) {
2704 Vector listeners = documentListeners;
2705 int count = listeners.size();
2706 for (int i = 0; i < count; i++) {
2707 ((DocumentListener) listeners.elementAt(i)).annotationSetAdded(e);
2708 }
2709 }
2710 }
2711
2712 protected void fireAnnotationSetRemoved(DocumentEvent e) {
2713 if (documentListeners != null) {
2714 Vector listeners = documentListeners;
2715 int count = listeners.size();
2716 for (int i = 0; i < count; i++) {
2717 ((DocumentListener) listeners.elementAt(i)).annotationSetRemoved(e);
2718 }
2719 }
2720 }
2721
2722 protected void fireContentEdited(DocumentEvent e) {
2723 if (documentListeners != null) {
2724 Vector listeners = documentListeners;
2725 int count = listeners.size();
2726 for (int i = 0; i < count; i++) {
2727 ((DocumentListener) listeners.elementAt(i)).contentEdited(e);
2728 }
2729 }
2730 }
2731
2732 public void resourceLoaded(CreoleEvent e) {
2733 }
2734 public void resourceUnloaded(CreoleEvent e) {
2735 }
2736 public void datastoreOpened(CreoleEvent e) {
2737 }
2738 public void datastoreCreated(CreoleEvent e) {
2739 }
2740 public void resourceRenamed(Resource resource, String oldName,
2741 String newName){
2742 }
2743 public void datastoreClosed(CreoleEvent e) {
2744 if (! e.getDatastore().equals(this.getDataStore()))
2745 return;
2746 Factory.deleteResource(this);
2749 }
2750 public void setLRPersistenceId(Object lrID) {
2751 super.setLRPersistenceId( lrID);
2752 Gate.getCreoleRegister().addCreoleListener(this);
2755 }
2756 public void resourceAdopted(DatastoreEvent evt) {
2757 }
2758 public void resourceDeleted(DatastoreEvent evt) {
2759 if(! evt.getSource().equals(this.getDataStore()))
2760 return;
2761 if(evt.getResourceID().equals(this.getLRPersistenceId()))
2764 Factory.deleteResource(this);
2765 }
2766 public void resourceWritten(DatastoreEvent evt) {
2767 }
2768 public void setDataStore(DataStore dataStore) throws gate.persist.PersistenceException {
2769 super.setDataStore( dataStore);
2770 if (this.dataStore != null)
2771 this.dataStore.addDatastoreListener(this);
2772 }
2773
2774
2779 public void setDefaultAnnotations(AnnotationSet defaultAnnotations) {
2780 defaultAnnots = defaultAnnotations;
2781 }
2782
2783}