1   
2   /*
3    *  RDFFormatExporter.java
4    *
5    *  Copyright (c) 1998-2005, The University of Sheffield.
6    *
7    *  This file is part of GATE (see http://gate.ac.uk/), and is free
8    *  software, licenced under the GNU Library General Public License,
9    *  Version 2, June 1991 (in the distribution as file licence.html,
10   *  and also available at http://gate.ac.uk/gate/licence.html).
11   *
12   *  Marin Dimitrov, 07/May/2002
13   *
14   *  $Id: RDFFormatExporter.java,v 1.17 2005/09/19 14:00:21 valyt Exp $
15   */
16  
17  package gate.creole;
18  
19  import java.io.FileWriter;
20  import java.io.Writer;
21  import java.net.URL;
22  import java.util.*;
23  import com.hp.hpl.jena.ontology.daml.*;
24  import com.hp.hpl.jena.rdf.model.ModelFactory;
25  import com.hp.hpl.jena.rdf.model.RDFWriter;
26  import com.hp.hpl.jena.rdf.model.impl.RDFWriterFImpl;
27  
28  import junit.framework.Assert;
29  
30  import gate.*;
31  
32  
33  public class RDFFormatExporter extends AbstractLanguageAnalyser {
34  
35    private static final int DAML_EXPORT = 0;
36    private static final int RDF_EXPORT = 1;
37  
38    private static final String[] EXPORT_FORMATS = {"DAML+OIL","RDF"};
39    private static final String[] EXPORT_EXTS = {"daml","rdf"};
40  
41    private static final String ONTOGAZ_CLASS_FEATURE = "class";
42    private static final String ONTOGAZ_ONTOLOGY_FEATURE = "ontology";
43  
44    /** Debug flag */
45    private static final boolean DEBUG = false;
46  
47    private int exportFormat;
48  
49    /** This list of strings represents the entities type that will be exported*/
50    private List exportedTypes = null;
51  
52    private URL exportFilePath = null;
53  
54    private URL ontologyLocation = null;
55  
56    private String annotationSetName = null;
57  
58    public RDFFormatExporter() {
59    }
60  
61    /** Java bean style mutator for exportedTypes */
62    public void setExportedTypes(List anExportedTypesList){
63      exportedTypes = anExportedTypesList;
64    }// setExportedTypes();
65  
66  
67    /** Java bean style accesor for exportedTypes */
68    public List getExportedTypes(){
69      return exportedTypes;
70    }// getExportedTypes()
71  
72    /** Java bean style mutator for exportedTypes */
73    public void setExportFormat(String format){
74  
75      Assert.assertTrue(format.equalsIgnoreCase(EXPORT_FORMATS[DAML_EXPORT]) ||
76                        format.equalsIgnoreCase(EXPORT_FORMATS[RDF_EXPORT]));
77  
78      if (format.equalsIgnoreCase(EXPORT_FORMATS[DAML_EXPORT])) {
79        this.exportFormat = DAML_EXPORT;
80      }
81      else if (format.equalsIgnoreCase(EXPORT_FORMATS[RDF_EXPORT])) {
82        this.exportFormat = RDF_EXPORT;
83      }
84      else {
85        Assert.fail();
86      }
87  
88    }// setExportedTypes();
89  
90    /** Java bean style mutator for exportedTypes */
91    public String getExportFormat() {
92      return EXPORT_FORMATS[this.exportFormat];
93    }// setExportedTypes();
94  
95    /** Java bean style mutator for exportFilePath */
96    public void setExportFilePath(URL anExportFilePath){
97      exportFilePath = anExportFilePath;
98    }// setExportFilePath();
99  
100   /** Java bean style accesor for exportFilePath */
101   public URL getExportFilePath(){
102     return exportFilePath;
103   }// getDtdFileName()
104 
105   /** Java bean style mutator for exportFilePath */
106   public void setOntology(URL _ontologyLocation){
107     ontologyLocation = _ontologyLocation;
108   }// setExportFilePath();
109 
110   /** Java bean style accesor for exportFilePath */
111   public URL getOntology(){
112     return ontologyLocation;
113   }// getDtdFileName()
114 
115   /** Java bean style accessor for annotationSetName */
116   public String getAnnotationSetName() {
117     return annotationSetName;
118   } //getAnnotationSetName
119 
120 
121   /** Java bean style mutator for annotaionSetName */
122   public void setAnnotationSetName(String annotationSetName) {
123     this.annotationSetName = annotationSetName;
124   }
125 
126   /** Initialise this resource, and returns it. */
127   public gate.Resource init() throws ResourceInstantiationException {
128     return this;
129   } // init()
130 
131 
132   /** Run the resource and does the entire export process*/
133   public void execute() throws ExecutionException{
134 
135     // Check if the thing can be run
136     if(document == null) {
137       throw new ExecutionException("No document found to export in APF format!");
138     }
139 
140     /* Commented by Niraj to include support for annotationSetName where all the
141      * annotations should be exported incase exportedTypes is null
142     if (exportedTypes == null) {
143       throw new ExecutionException("No export types found.");
144     }*/
145 
146 //    StringBuffer rdfDoc = new StringBuffer(10*(document.getContent().size().intValue()));
147 
148     String exportFilePathStr = null;
149 
150     if (exportFilePath == null) {
151       exportFilePathStr = new String(document.getSourceUrl().getFile() + "." +
152                                     EXPORT_EXTS[this.exportFormat]);
153     }
154     else {
155       exportFilePathStr = new String(exportFilePath.getPath()+
156                                     "/" +
157                                     document.getName() + "." +
158                                     EXPORT_EXTS[this.exportFormat]);
159     }
160 //System.out.println("export path:" +exportFilePathStr);
161     // Prepare to write into the xmlFile
162     FileWriter  writer = null;
163     try{
164       writer = new FileWriter(exportFilePathStr,false);
165       annotations2ontology(writer);
166       writer.flush();
167       writer.close();
168     }catch (Exception e){
169       throw new ExecutionException(e);
170     }// End try
171 
172   } // execute()
173 
174   private void annotations2ontology(Writer output) throws Exception {
175 
176     DAMLModel ontologyModel, instanceModel;
177     HashMap ontologies = new HashMap();
178     HashMap instanceMatches = new HashMap();
179     HashSet instanceNames = new HashSet();
180 
181       ontologyModel = ModelFactory.createDAMLModel();;
182       instanceModel = ModelFactory.createDAMLModel();
183 
184       Assert.assertNotNull(ontologyModel);
185       Assert.assertNotNull(instanceModel);
186 
187       //final settings of the model
188       DAMLOntology onto = instanceModel.createDAMLOntology("");
189       onto.prop_comment().addValue("autogenerated from GATE RDFFormatExporter");
190       onto.prop_versionInfo().addValue("1.0");
191 
192       Assert.assertNotNull(this.ontologyLocation);
193       ontologyModel.read(this.ontologyLocation.toString());
194 
195       //get a mapping: class name to DAML class
196       HashMap ontologyMap = ontology2hashmap(ontologyModel);
197       Assert.assertNotNull(ontologyMap);
198 
199       //add the mapping to the ontologies hashmap
200       //key is ontology URL as generated by the OntoGaz
201       ontologies.put(this.ontologyLocation.toString(),ontologyMap);
202 
203       if (null == ontologyModel) {
204         throw new ExecutionException("cannot read ontology");
205       }
206 
207       HashMap defaultClasses = new HashMap((int)ontologyModel.size()/5);
208       Iterator itClasses = ontologyModel.listDAMLClasses();
209       while (itClasses.hasNext()) {
210         DAMLClass cls = (DAMLClass)itClasses.next();
211         String className = cls.getLocalName();
212         if (null != className) {
213           defaultClasses.put(className.toLowerCase(),cls);
214         }
215       }
216 
217       //* Addition by Niraj to include AnnotationSet Support */
218       AnnotationSet inputAs = (annotationSetName == null ||
219                                annotationSetName.length() == 0) ?
220                                document.getAnnotations() :
221                                document.getAnnotations(annotationSetName);
222 
223       // see if exportedTypes is null
224       Iterator itTypes = (exportedTypes == null || exportedTypes.size() == 0) ?
225                        inputAs.getAllTypes().iterator() : exportedTypes.iterator();
226 
227       //Iterator itTypes = this.exportedTypes.iterator();
228       // End of addition
229 
230       while (itTypes.hasNext()) {
231 
232         String type = (String)itTypes.next();
233         AnnotationSet as = 
234             (this.annotationSetName != null && 
235              this.annotationSetName.length() > 0) ?
236             this.document.getAnnotations(this.annotationSetName).get(type) :
237             this.document.getAnnotations().get(type);
238 
239         
240         if (null == as || true == as.isEmpty()) {
241           continue;
242         }
243 
244         Iterator itAnnotations = as.iterator();
245         while (itAnnotations.hasNext()) {
246 
247           Annotation ann = (Annotation)itAnnotations.next();
248           Assert.assertTrue(ann.getType().equals(type));
249 
250           FeatureMap features = ann.getFeatures();
251           String annClass = (String)features.get(ONTOGAZ_CLASS_FEATURE);
252           String annOntology = (String)features.get(ONTOGAZ_ONTOLOGY_FEATURE);
253           DAMLClass damlClass = null;
254 
255           if (null == annClass) {
256             //no ontological info
257             //try to get proper class from the default ontology
258             if (defaultClasses.containsKey(ann.getType().toLowerCase())) {
259               //bingo
260               //we have a class with the name of the annotation's type
261               damlClass = (DAMLClass)defaultClasses.get(ann.getType().toLowerCase());
262               Assert.assertNotNull(damlClass);
263             }
264             else {
265               continue;
266             }
267           }
268           else {
269             //ontological info available
270             //is this a new ontology?
271             if (false == ontologies.containsKey(annOntology)) {
272               //oops, new ontology:
273               //1. create model for it
274               //2. create class name 2 daml class mapping
275               //3. add it to hashmap
276 
277               //1.
278               DAMLModel model = ModelFactory.createDAMLModel();
279               model.read(annOntology);
280 
281               //2.
282               //create mapping between class names and DAML classes
283               HashMap name2class = ontology2hashmap(model);
284               Assert.assertNotNull(name2class);
285 
286               //3.
287               ontologies.put(annOntology,model);
288             }
289 
290             //get the class of the annotation
291             damlClass = (DAMLClass)((HashMap)ontologies.get(annOntology)).get(annClass);
292             Assert.assertNotNull(damlClass);
293           }
294 
295           String instanceName = this.document.getContent().getContent(
296                                                                   ann.getStartNode().getOffset(),
297                                                                   ann.getEndNode().getOffset())
298                                 .toString();
299           Assert.assertNotNull(instanceName);
300 
301           //create instance of proper type only if new
302           if (instanceNames.contains(instanceName)) {
303             continue;
304           }
305 
306           DAMLInstance annInstance = instanceModel.createDAMLInstance(damlClass,instanceName);
307           instanceNames.add(instanceName);
308 
309           //check orhtographic matches
310           List matches = (List)ann.getFeatures().get("matches");
311           if (null != matches) {
312             //try to get equiv instance
313             if (instanceMatches.containsKey(matches)) {
314               DAMLInstance equivInstance = (DAMLInstance)instanceMatches.get(matches);
315 
316               //make sure we don't have duplicated name
317               annInstance.prop_sameIndividualAs().add(equivInstance);
318             }
319             else {
320               //first entry of the coref chain
321               instanceMatches.put(matches,annInstance);
322             }
323           }
324 
325 
326         }//while
327       }//while
328 
329       //print the model into file
330       instanceModel.setNsPrefix("gate",this.ontologyLocation.toString()+"#");
331       instanceModel.writeAll(output, "RDF/XML-ABBREV", null);
332   }
333 
334   private HashMap ontology2hashmap(DAMLModel ontology) throws Exception {
335 
336     HashMap result = null;
337 
338     //0.
339     Assert.assertNotNull(ontology);
340 
341 
342     result = new HashMap((int)ontology.size()/5);
343 
344     //1.Iterate classes
345     Iterator itClasses = ontology.listDAMLClasses();
346     while (itClasses.hasNext()) {
347       DAMLClass clazz = (DAMLClass)itClasses.next();
348       //Assert.assertNotNull(clazz.getLocalName());
349       if (null != clazz.getLocalName()) {
350         result.put(clazz.getLocalName(),clazz);
351       }
352 
353     }
354 
355     return result;
356   }
357 }