1
15
16 package gate.corpora;
17
18 import java.io.IOException;
19 import java.util.Iterator;
20
21 import gate.*;
22 import gate.creole.ResourceInstantiationException;
23 import gate.email.EmailDocumentHandler;
24 import gate.event.StatusListener;
25 import gate.util.DocumentFormatException;
26 import gate.util.InvalidOffsetException;
27
28
30
39 public class EmailDocumentFormat extends TextualDocumentFormat
40 {
41
42 private static final boolean DEBUG = false;
43
44
45 public EmailDocumentFormat() { super();}
46
47
57
58 public void unpackMarkup(gate.Document doc) throws DocumentFormatException{
59 if ( (doc == null) ||
60 (doc.getSourceUrl() == null && doc.getContent() == null)){
61
62 throw new DocumentFormatException(
63 "GATE document is null or no content found. Nothing to parse!");
64 }
66 setNewLineProperty(doc);
67
68 EmailDocumentHandler emailDocHandler = null;
70 emailDocHandler = new gate.email.EmailDocumentHandler(
71 doc,
72 this.markupElementsMap,
73 this.element2StringMap);
74 StatusListener statusListener = new StatusListener(){
75 public void statusChanged(String text) {
76 fireStatusChanged(text);
78 } };
80 emailDocHandler.addStatusListener(statusListener);
82 try{
83 emailDocHandler.annotateMessages();
85 AnnotationSet bodyAnnotations = doc.getAnnotations(
87 GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME).get("body");
88 if (bodyAnnotations != null && !bodyAnnotations.isEmpty()){
89 Iterator iter = bodyAnnotations.iterator();
90 while(iter.hasNext()){
91 Annotation a = (Annotation)iter.next();
92 annotateParagraphs(doc,a.getStartNode().getOffset().intValue(),
93 a.getEndNode().getOffset().intValue(),
94 GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
95 } } } catch (IOException e){
98 throw new DocumentFormatException("Couldn't create a buffered reader ",e);
99 } catch (InvalidOffsetException e){
100 throw new DocumentFormatException(e);
101 }finally{
102 emailDocHandler.removeStatusListener(statusListener);
103 } }
106
107 public Resource init() throws ResourceInstantiationException{
108 MimeType mime = new MimeType("text","email");
110 mimeString2ClassHandlerMap.put(mime.getType()+ "/" + mime.getSubtype(),
112 this);
113 mimeString2mimeTypeMap.put(mime.getType() + "/" + mime.getSubtype(), mime);
115 suffixes2mimeTypeMap.put("eml",mime);
117 suffixes2mimeTypeMap.put("email",mime);
118 suffixes2mimeTypeMap.put("mail",mime);
119 magic2mimeTypeMap.put("Subject:",mime);
121 setMimeType(mime);
123 return this;
124 }}
127