Document.java |
1 /* 2 * Document.java 3 * 4 * Copyright (c) 1998-2005, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 19/Jan/2000 12 * 13 * $Id: Document.java,v 1.41 2005/01/11 13:51:30 ian Exp $ 14 */ 15 16 package gate; 17 18 import java.net.URL; 19 import java.util.Map; 20 import java.util.Set; 21 22 import gate.event.DocumentListener; 23 import gate.util.InvalidOffsetException; 24 25 26 /** Represents the commonalities between all sorts of documents. 27 */ 28 public interface Document extends SimpleDocument { 29 30 /** 31 * The parameter name that determines whether or not a document is markup aware 32 */ 33 public static final String 34 DOCUMENT_MARKUP_AWARE_PARAMETER_NAME = "markupAware"; 35 36 public static final String 37 DOCUMENT_ENCODING_PARAMETER_NAME = "encoding"; 38 39 public static final String 40 DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME = "preserveOriginalContent"; 41 42 public static final String 43 DOCUMENT_STRING_CONTENT_PARAMETER_NAME = "stringContent"; 44 45 public static final String 46 DOCUMENT_REPOSITIONING_PARAMETER_NAME = "collectRepositioningInfo"; 47 48 public static final String 49 DOCUMENT_START_OFFSET_PARAMETER_NAME = "sourceUrlStartOffset"; 50 51 public static final String 52 DOCUMENT_END_OFFSET_PARAMETER_NAME = "sourceUrlEndOffset"; 53 54 /** Documents may be packed within files; in this case an optional pair of 55 * offsets refer to the location of the document. 56 */ 57 public Long[] getSourceUrlOffsets(); 58 59 /** Documents may be packed within files; in this case an optional pair of 60 * offsets refer to the location of the document. This method gets the 61 * start offset. 62 */ 63 public Long getSourceUrlStartOffset(); 64 65 /** Documents may be packed within files; in this case an optional pair of 66 * offsets refer to the location of the document. This method gets the 67 * end offset. 68 */ 69 public Long getSourceUrlEndOffset(); 70 71 /** Returns a map with the named annotation sets 72 */ 73 public Map getNamedAnnotationSets(); 74 75 /** Make the document markup-aware. This will trigger the creation 76 * of a DocumentFormat object at Document initialisation time; the 77 * DocumentFormat object will unpack the markup in the Document and 78 * add it as annotations. Documents are <B>not</B> markup-aware by default. 79 * 80 * @param b markup awareness status. 81 */ 82 public void setMarkupAware(Boolean b); 83 84 /** Get the markup awareness status of the Document. 85 * 86 * @return whether the Document is markup aware. 87 */ 88 public Boolean getMarkupAware(); 89 90 /** 91 * Allow/disallow preserving of the original document content. 92 * If is <B>true</B> the original content will be retrieved from 93 * the DocumentContent object and preserved as document feature. 94 */ 95 public void setPreserveOriginalContent(Boolean b); 96 97 /** Get the preserving of content status of the Document. 98 * 99 * @return whether the Document should preserve it's original content. 100 */ 101 public Boolean getPreserveOriginalContent(); 102 103 /** 104 * Allow/disallow collecting of repositioning information. 105 * If is <B>true</B> information will be retrieved and preserved 106 * as document feature.<BR> 107 * Preserving of repositioning information give the possibilities 108 * for converting of coordinates between the original document content and 109 * extracted from the document text. 110 */ 111 public void setCollectRepositioningInfo(Boolean b); 112 113 /** Get the collectiong and preserving of repositioning information 114 * for the Document. <BR> 115 * Preserving of repositioning information give the possibilities 116 * for converting of coordinates between the original document content and 117 * extracted from the document text. 118 * 119 * @return whether the Document should collect and preserve information. 120 */ 121 public Boolean getCollectRepositioningInfo(); 122 123 /** Returns a GateXml document. This document is actually a serialization of 124 * a Gate Document in XML. 125 * @return a string representing a Gate Xml document 126 */ 127 public String toXml(); 128 129 /** Returns an XML document aming to preserve the original markups( 130 * the original markup will be in the same place and format as it was 131 * before processing the document) and include (if possible) 132 * the annotations specified in the aSourceAnnotationSet. 133 * <b>Warning:</b> Annotations from the aSourceAnnotationSet will be lost 134 * if they will cause a crosed over situation. 135 * @param aSourceAnnotationSet is an annotation set containing all the 136 * annotations that will be combined with the original marup set. 137 * @param includeFeatures determines whether or not features and gate IDs 138 * of the annotations should be included as attributes on the tags or not. 139 * If false, then only the annotation types are exported as tags, with no 140 * attributes. 141 * @return a string representing an XML document containing the original 142 * markup + dumped annotations form the aSourceAnnotationSet 143 */ 144 public String toXml(Set aSourceAnnotationSet, boolean includeFeatures); 145 146 /** 147 * Equivalent to toXml(aSourceAnnotationSet, true). 148 */ 149 public String toXml(Set aSourceAnnotationSet); 150 151 /** Make changes to the content. 152 */ 153 public void edit(Long start, Long end, DocumentContent replacement) 154 throws InvalidOffsetException; 155 156 /** 157 * Adds a {@link gate.event.DocumentListener} to this document. 158 * All the registered listeners will be notified of changes occured to the 159 * document. 160 */ 161 public void addDocumentListener(DocumentListener l); 162 163 /** 164 * Removes one of the previously registered document listeners. 165 */ 166 public void removeDocumentListener(DocumentListener l); 167 168 169 /** Documents may be packed within files; in this case an optional pair of 170 * offsets refer to the location of the document. This method sets the 171 * end offset. 172 */ 173 public void setSourceUrlEndOffset(Long sourceUrlEndOffset); 174 175 176 /** Documents may be packed within files; in this case an optional pair of 177 * offsets refer to the location of the document. This method sets the 178 * start offset. 179 */ 180 public void setSourceUrlStartOffset(Long sourceUrlStartOffset); 181 182 } // interface Document 183 184