1   /*
2    *  Document.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 19/Jan/2000
12   *
13   *  $Id: Document.java,v 1.41 2005/01/11 13:51:30 ian Exp $
14   */
15  
16  package gate;
17  
18  import java.net.URL;
19  import java.util.Map;
20  import java.util.Set;
21  
22  import gate.event.DocumentListener;
23  import gate.util.InvalidOffsetException;
24  
25  
26  /** Represents the commonalities between all sorts of documents.
27   */
28  public interface Document extends SimpleDocument {
29  
30    /**
31    * The parameter name that determines whether or not a document is markup aware
32    */
33    public static final String
34      DOCUMENT_MARKUP_AWARE_PARAMETER_NAME = "markupAware";
35  
36    public static final String
37      DOCUMENT_ENCODING_PARAMETER_NAME = "encoding";
38  
39    public static final String
40      DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME = "preserveOriginalContent";
41  
42    public static final String
43      DOCUMENT_STRING_CONTENT_PARAMETER_NAME = "stringContent";
44  
45    public static final String
46      DOCUMENT_REPOSITIONING_PARAMETER_NAME = "collectRepositioningInfo";
47  
48    public static final String
49      DOCUMENT_START_OFFSET_PARAMETER_NAME = "sourceUrlStartOffset";
50  
51    public static final String
52      DOCUMENT_END_OFFSET_PARAMETER_NAME = "sourceUrlEndOffset";
53  
54    /** Documents may be packed within files; in this case an optional pair of
55     *  offsets refer to the location of the document.
56     */
57    public Long[] getSourceUrlOffsets();
58  
59    /** Documents may be packed within files; in this case an optional pair of
60     *  offsets refer to the location of the document. This method gets the
61     *  start offset.
62     */
63    public Long getSourceUrlStartOffset();
64  
65    /** Documents may be packed within files; in this case an optional pair of
66     *  offsets refer to the location of the document. This method gets the
67     *  end offset.
68     */
69    public Long getSourceUrlEndOffset();
70  
71    /** Returns a map with the named annotation sets
72      */
73    public Map getNamedAnnotationSets();
74  
75    /** Make the document markup-aware. This will trigger the creation
76     *  of a DocumentFormat object at Document initialisation time; the
77     *  DocumentFormat object will unpack the markup in the Document and
78     *  add it as annotations. Documents are <B>not</B> markup-aware by default.
79     *
80     *  @param b markup awareness status.
81     */
82    public void setMarkupAware(Boolean b);
83  
84    /** Get the markup awareness status of the Document.
85     *
86     *  @return whether the Document is markup aware.
87     */
88    public Boolean getMarkupAware();
89  
90    /**
91     * Allow/disallow preserving of the original document content.
92     * If is <B>true</B> the original content will be retrieved from
93     * the DocumentContent object and preserved as document feature.
94     */
95    public void setPreserveOriginalContent(Boolean b);
96  
97    /** Get the preserving of content status of the Document.
98     *
99     *  @return whether the Document should preserve it's original content.
100    */
101   public Boolean getPreserveOriginalContent();
102 
103   /**
104    *  Allow/disallow collecting of repositioning information.
105    *  If is <B>true</B> information will be retrieved and preserved
106    *  as document feature.<BR>
107    *  Preserving of repositioning information give the possibilities
108    *  for converting of coordinates between the original document content and
109    *  extracted from the document text.
110    */
111   public void setCollectRepositioningInfo(Boolean b);
112 
113   /** Get the collectiong and preserving of repositioning information
114    *  for the Document. <BR>
115    *  Preserving of repositioning information give the possibilities
116    *  for converting of coordinates between the original document content and
117    *  extracted from the document text.
118    *
119    *  @return whether the Document should collect and preserve information.
120    */
121   public Boolean getCollectRepositioningInfo();
122 
123   /** Returns a GateXml document. This document is actually a serialization of
124    *  a Gate Document in XML.
125     * @return a string representing a Gate Xml document
126     */
127   public String toXml();
128 
129   /** Returns an XML document aming to preserve the original markups(
130     * the original markup will be in the same place and format as it was
131     * before processing the document) and include (if possible)
132     * the annotations specified in the aSourceAnnotationSet.
133     * <b>Warning:</b> Annotations from the aSourceAnnotationSet will be lost
134     * if they will cause a crosed over situation.
135     * @param aSourceAnnotationSet is an annotation set containing all the
136     * annotations that will be combined with the original marup set.
137     * @param includeFeatures determines whether or not features and gate IDs
138     * of the annotations should be included as attributes on the tags or not.
139     * If false, then only the annotation types are exported as tags, with no
140     * attributes.
141     * @return a string representing an XML document containing the original
142     * markup + dumped annotations form the aSourceAnnotationSet
143     */
144   public String toXml(Set aSourceAnnotationSet, boolean includeFeatures);
145 
146   /**
147    * Equivalent to toXml(aSourceAnnotationSet, true).
148    */
149   public String toXml(Set aSourceAnnotationSet);
150 
151   /** Make changes to the content.
152    */
153   public void edit(Long start, Long end, DocumentContent replacement)
154     throws InvalidOffsetException;
155 
156   /**
157    * Adds a {@link gate.event.DocumentListener} to this document.
158    * All the registered listeners will be notified of changes occured to the
159    * document.
160    */
161   public void addDocumentListener(DocumentListener l);
162 
163   /**
164    * Removes one of the previously registered document listeners.
165    */
166   public void removeDocumentListener(DocumentListener l);
167 
168 
169   /** Documents may be packed within files; in this case an optional pair of
170     * offsets refer to the location of the document. This method sets the
171     * end offset.
172     */
173   public void setSourceUrlEndOffset(Long sourceUrlEndOffset);
174 
175 
176   /** Documents may be packed within files; in this case an optional pair of
177     * offsets refer to the location of the document. This method sets the
178     * start offset.
179     */
180   public void setSourceUrlStartOffset(Long sourceUrlStartOffset);
181 
182 } // interface Document
183 
184