1   /*
2    *  RtfDocumentFormat.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Cristian URSU, 26/July/2000
12   *
13   *  $Id: RtfDocumentFormat.java,v 1.21 2005/02/14 16:32:30 valyt Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.io.*;
19  
20  import javax.swing.text.*;
21  import javax.swing.text.rtf.RTFEditorKit;
22  
23  import gate.Resource;
24  import gate.creole.ResourceInstantiationException;
25  import gate.util.DocumentFormatException;
26  //import org.w3c.www.mime.*;
27  
28  /** The format of Documents. Subclasses of DocumentFormat know about
29    * particular MIME types and how to unpack the information in any
30    * markup or formatting they contain into GATE annotations. Each MIME
31    * type has its own subclass of DocumentFormat, e.g. XmlDocumentFormat,
32    * RtfDocumentFormat, MpegDocumentFormat. These classes register themselves
33    * with a static index residing here when they are constructed. Static
34    * getDocumentFormat methods can then be used to get the appropriate
35    * format class for a particular document.
36    */
37  public class RtfDocumentFormat extends TextualDocumentFormat{
38  
39    /** Debug flag */
40    private static final boolean DEBUG = false;
41  
42    /** Default construction */
43    public RtfDocumentFormat() { super(); }
44  
45    /** Unpack the markup in the document. This converts markup from the
46      * native format (e.g.RTF) into annotations in GATE format.
47      * Uses the markupElementsMap to determine which elements to convert, and
48      * what annotation type names to use.
49      * It always tryes to parse te doc's content. It doesn't matter if the
50      * sourceUrl is null or not.
51      *
52      * @param doc The gate document you want to parse.
53      *
54      */
55    public void unpackMarkup(gate.Document doc) throws DocumentFormatException {
56  
57      if ( (doc == null) ||
58           (doc.getSourceUrl() == null && doc.getContent() == null)){
59  
60        throw new DocumentFormatException(
61                 "GATE document is null or no content found. Nothing to parse!");
62      }// End if
63  
64      // create a RTF editor kit
65      RTFEditorKit aRtfEditorkit = new RTFEditorKit();
66  
67      // create a Styled Document
68      // NOTE that RTF Kit works only with Systled Document interface
69      StyledDocument styledDoc = new DefaultStyledDocument();
70  
71      try {
72        // get an Input stream from the gate document
73        InputStream in = (doc.getSourceUrl() == null) ?
74           (new ByteArrayInputStream(doc.getContent().toString().getBytes())) :
75            doc.getSourceUrl().openStream();
76        aRtfEditorkit.read(in, styledDoc, 0);
77        // replace the document content with the one without markups
78        doc.setContent(new DocumentContentImpl(
79                                        styledDoc.getText(0,styledDoc.getLength())
80                                              )
81                      );
82      } catch (BadLocationException e) {
83        throw new DocumentFormatException(e);
84      } catch (IOException e){
85        throw new DocumentFormatException("I/O exception for " +
86                                          doc.getSourceUrl().toExternalForm(),e);
87      }
88    } // unpackMarkup(doc)
89  
90    /** Initialise this resource, and return it. */
91    public Resource init() throws ResourceInstantiationException{
92      // Register RTF mime type
93      MimeType mime = new MimeType("text","rtf");
94      // Register the class handler for this mime type
95      mimeString2ClassHandlerMap.put(mime.getType()+ "/" + mime.getSubtype(),
96                                                                            this);
97      // Register the mime type with mine string
98      mimeString2mimeTypeMap.put(mime.getType() + "/" + mime.getSubtype(), mime);
99      // Register file sufixes for this mime type
100     suffixes2mimeTypeMap.put("rtf",mime);
101     // Register magic numbers for this mime type
102     magic2mimeTypeMap.put("{\\rtf1",mime);
103     // Set the mimeType for this language resource
104     setMimeType(mime);
105     return this;
106   }// init()
107 }// class RtfDocumentFormat
108